Skip to content

Commit 8ce8a54

Browse files
authored
[ascend]zq/update LayerNorm by aclnn (#1204)
1 parent e70a2c3 commit 8ce8a54

File tree

2 files changed

+46
-24
lines changed

2 files changed

+46
-24
lines changed

impl/ascend/aclnn/adaptor.hpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <dlfcn.h>
1313

1414
#include <array>
15+
#include <cassert>
1516
#include <cstddef>
1617
#include <cstdint>
1718
#include <functional>
@@ -108,6 +109,17 @@ inline aclScalar* createAclScalarFromDiopiScalar(const diopiScalar_t* scalar) {
108109

109110
inline aclIntArray* createAclIntArrayFromDiopiSize(const diopiSize_t size) { return ::aclCreateIntArray(size.data, size.len); }
110111

112+
template <size_t N>
113+
inline aclBoolArray* createAclBoolArrayFromVector(const std::array<bool, N>& vec) {
114+
return ::aclCreateBoolArray(vec.data(), vec.size());
115+
}
116+
117+
template <typename T>
118+
struct IsBoolStdArray : std::false_type {};
119+
120+
template <std::size_t N>
121+
struct IsBoolStdArray<std::array<bool, N>> : std::true_type {};
122+
111123
inline aclIntArray* createAclIntArrayFromIntVector(const std::vector<int64_t>& vec) { return ::aclCreateIntArray(vec.data(), vec.size()); }
112124

113125
inline aclTensorList* createAclTensorListFromDiopiTensorVector(const std::vector<diopiTensorHandle_t>& tensorsVec) {
@@ -144,6 +156,8 @@ decltype(auto) convertType(T&& param) {
144156
return createAclIntArrayFromIntVector(std::forward<T>(param));
145157
} else if constexpr (std::is_same_v<U, diopiDtype_t>) {
146158
return diopiDtypeToAclDataType(std::forward<T>(param));
159+
} else if constexpr (IsBoolStdArray<U>::value) {
160+
return createAclBoolArrayFromVector<std::tuple_size_v<U>>(std::forward<T>(param));
147161
} else {
148162
static_assert(!std::is_class_v<U> && !std::is_pointer_v<U>);
149163
return std::forward<T>(param);

impl/ascend/functions/layer_norm.cpp

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
* @copyright (c) 2023, DeepLink.
55
*/
66

7-
#include "../common/acloprunner.hpp"
7+
#include "../aclnn/acl_scalar.hpp"
8+
#include "../aclnn/adaptor.hpp"
89

910
namespace impl {
1011
namespace ascend {
@@ -13,8 +14,10 @@ diopiError_t diopiLayerNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, d
1314
diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiSize_t normalizedShape,
1415
double eps) {
1516
AscendTensor inputAt(input);
17+
AscendTensor outAt(out);
1618
if (0 == inputAt.numel()) {
17-
AclOpRunner<1, 1>("Fills", ctx).addInput(out).setAttr<float>("value", 0).addOutput(out).run();
19+
diopiScalar_t zeroScalar = constructDiopiScalarT(outAt.dtype(), 0.0);
20+
DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceFillScalar, ctx, out, &zeroScalar);
1821
return diopiSuccess;
1922
}
2023

@@ -27,18 +30,8 @@ diopiError_t diopiLayerNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, d
2730
const int axis = inShape.len - normalizedShape.len;
2831
int64_t beginDim = axis;
2932

30-
// call acl op
31-
AclOpRunner<3, 3>("LayerNorm", ctx)
32-
.addInput(input)
33-
.addInput(weightTemp)
34-
.addInput(biasTemp)
35-
.addOutput(out)
36-
.addOutput(saveMean)
37-
.addOutput(saveInvstd)
38-
.setAttr("begin_norm_axis", beginDim)
39-
.setAttr("begin_params_axis", beginDim)
40-
.setAttr<float>("epsilon", eps)
41-
.run();
33+
// call aclnnLayerNorm
34+
DIOPI_ASCEND_CALL_ACLNN(aclnnLayerNorm, ctx, input, normalizedShape, weightTemp, biasTemp, eps, out, saveMean, saveInvstd);
4235
return diopiSuccess;
4336
}
4437

@@ -47,6 +40,7 @@ diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_
4740
diopiConstTensorHandle_t bias, diopiConstTensorHandle_t mean, diopiConstTensorHandle_t rstd, diopiSize_t normalizedShape) {
4841
AscendTensor inputAt(input);
4942
diopiTensorHandle_t weightTemp = createTensorIfNullptrOrConstCast(ctx, weight, normalizedShape, inputAt.dtype(), true, 1);
43+
diopiTensorHandle_t biasTemp = createTensorIfNullptrOrConstCast(ctx, bias, normalizedShape, inputAt.dtype(), true, 0);
5044
diopiTensorHandle_t gradWeightTemp = createTensorIfNullptrOrConstCast(ctx, gradWeight, normalizedShape, inputAt.dtype(), false, 0);
5145
diopiTensorHandle_t gradBiasTemp = createTensorIfNullptrOrConstCast(ctx, gradBias, normalizedShape, inputAt.dtype(), false, 0);
5246

@@ -57,16 +51,30 @@ diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_
5751
rstdAt.unsqueeze(rstdAt.dim());
5852
}
5953

60-
AclOpRunner<5, 3>("LayerNormGrad", ctx)
61-
.addInput(gradOutput)
62-
.addInput(input)
63-
.addInput(rstdAt)
64-
.addInput(meanAt)
65-
.addInput(weightTemp)
66-
.addOutput(gradInput)
67-
.addOutput(gradWeightTemp)
68-
.addOutput(gradBiasTemp)
69-
.run();
54+
std::array<bool, 3> gradMask = {true, true, true};
55+
if (nullptr == gradInput) {
56+
gradMask[0] = false;
57+
}
58+
if (nullptr == gradWeight) {
59+
gradMask[1] = false;
60+
}
61+
if (nullptr == gradBias) {
62+
gradMask[2] = false;
63+
}
64+
65+
DIOPI_ASCEND_CALL_ACLNN(aclnnLayerNormBackward,
66+
ctx,
67+
gradOutput,
68+
inputAt,
69+
normalizedShape,
70+
meanAt,
71+
rstdAt,
72+
weightTemp,
73+
biasTemp,
74+
gradMask,
75+
gradInput,
76+
gradWeightTemp,
77+
gradBiasTemp);
7078
return diopiSuccess;
7179
}
7280

0 commit comments

Comments
 (0)