From 55dd56f27f30c3629eb5e83b26218561348950be Mon Sep 17 00:00:00 2001 From: hellozmz <40719054@qq.com> Date: Wed, 10 Jul 2024 20:10:26 +0800 Subject: [PATCH 1/8] add bernoulli --- impl/ascend/functions/bernoulli.cpp | 24 ++++++++++++++++++++++++ impl/ascend/functions/erfinv.cpp | 24 ++++++++++++++++++++++++ impl/ascend_npu/ascend_config.yaml | 5 +++-- 3 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 impl/ascend/functions/bernoulli.cpp create mode 100644 impl/ascend/functions/erfinv.cpp diff --git a/impl/ascend/functions/bernoulli.cpp b/impl/ascend/functions/bernoulli.cpp new file mode 100644 index 000000000..5d056f2e2 --- /dev/null +++ b/impl/ascend/functions/bernoulli.cpp @@ -0,0 +1,24 @@ +/** + * @file + * @author DeepLink + * @copyright (c) 2023, DeepLink. + */ + +#include "../aclnn/acl_scalar.hpp" +#include "../aclnn/adaptor.hpp" + +namespace impl { +namespace ascend { +static const uint64_t PHILOX_DEFAULT_NUM = 10; + +diopiError_t diopiBernoulliScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, double p, diopiGeneratorHandle_t generator) { + const std::pair gen = getSeedAndOffset(ctx, generator, 10); + const uint64_t seed = gen.first; + const uint64_t offset = gen.second; + auto pScalar = constructDiopiScalarT(diopi_dtype_float64, p); + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceBernoulli, ctx, out, p, seed, offset); + return diopiSuccess; +} + +} // namespace ascend +} // namespace impl diff --git a/impl/ascend/functions/erfinv.cpp b/impl/ascend/functions/erfinv.cpp new file mode 100644 index 000000000..10d13f511 --- /dev/null +++ b/impl/ascend/functions/erfinv.cpp @@ -0,0 +1,24 @@ +/** + * @file + * @author DeepLink + * @copyright (c) 2023, DeepLink. + */ + +#include "../aclnn/acl_scalar.hpp" +#include "../aclnn/adaptor.hpp" + +namespace impl { +namespace ascend { + +diopiError_t diopiErfinv(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + DIOPI_ASCEND_CALL_ACLNN(aclnnErfinv, ctx, input, out); + return diopiSuccess; +} + +diopiError_t diopiErfinvInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + DIOPI_ASCEND_CALL_ACLNN(aclnnErfinv, ctx, input); + return diopiSuccess; +} + +} // namespace ascend +} // namespace impl diff --git a/impl/ascend_npu/ascend_config.yaml b/impl/ascend_npu/ascend_config.yaml index 7765d7989..9e0913b75 100755 --- a/impl/ascend_npu/ascend_config.yaml +++ b/impl/ascend_npu/ascend_config.yaml @@ -34,6 +34,7 @@ ascend: - diopiBitwiseOrInpScalar - diopiBitwiseOrScalar - diopiBmm +- diopiBernoulliScalar - diopiCastDtype - diopiCat - diopiCeil @@ -75,6 +76,8 @@ ascend: - diopiEqual - diopiErf - diopiErfInp +- diopiErfinv +- diopiErfinvInp - diopiExp - diopiExpInp - diopiExpand @@ -255,8 +258,6 @@ ascend_npu: - diopiCustomizedFlashAttentionVarLen - diopiCustomizedFlashAttentionVarLenBackward - diopiDestIndexCopyKV -- diopiErfinv -- diopiErfinvInp - diopiGetNativeMemoryFormat - diopiIndex - diopiIndexBackward From eef00786b3df77cba62d99734a4c7101cb919214 Mon Sep 17 00:00:00 2001 From: hellozmz <40719054@qq.com> Date: Wed, 10 Jul 2024 20:23:17 +0800 Subject: [PATCH 2/8] rm useless code --- impl/ascend/functions/bernoulli.cpp | 4 ++-- impl/ascend/functions/erfinv.cpp | 3 +-- impl/ascend_npu/diopi_impl/erfinv.cpp | 24 ------------------------ 3 files changed, 3 insertions(+), 28 deletions(-) delete mode 100644 impl/ascend_npu/diopi_impl/erfinv.cpp diff --git a/impl/ascend/functions/bernoulli.cpp b/impl/ascend/functions/bernoulli.cpp index 5d056f2e2..1d82c2619 100644 --- a/impl/ascend/functions/bernoulli.cpp +++ b/impl/ascend/functions/bernoulli.cpp @@ -1,7 +1,7 @@ /** * @file * @author DeepLink - * @copyright (c) 2023, DeepLink. + * @copyright (c) 2024, DeepLink. */ #include "../aclnn/acl_scalar.hpp" @@ -16,7 +16,7 @@ diopiError_t diopiBernoulliScalar(diopiContextHandle_t ctx, diopiTensorHandle_t const uint64_t seed = gen.first; const uint64_t offset = gen.second; auto pScalar = constructDiopiScalarT(diopi_dtype_float64, p); - DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceBernoulli, ctx, out, p, seed, offset); + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceBernoulli, ctx, out, &pScalar, seed, offset); return diopiSuccess; } diff --git a/impl/ascend/functions/erfinv.cpp b/impl/ascend/functions/erfinv.cpp index 10d13f511..2f1949485 100644 --- a/impl/ascend/functions/erfinv.cpp +++ b/impl/ascend/functions/erfinv.cpp @@ -1,10 +1,9 @@ /** * @file * @author DeepLink - * @copyright (c) 2023, DeepLink. + * @copyright (c) 2024, DeepLink. */ -#include "../aclnn/acl_scalar.hpp" #include "../aclnn/adaptor.hpp" namespace impl { diff --git a/impl/ascend_npu/diopi_impl/erfinv.cpp b/impl/ascend_npu/diopi_impl/erfinv.cpp deleted file mode 100644 index 69303db79..000000000 --- a/impl/ascend_npu/diopi_impl/erfinv.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/** - * @file - * @author DeepLink - * @copyright (c) 2024, DeepLink. - */ - -#include "helper.hpp" -#include "op_plugin/OpApiInterface.h" -#include "op_plugin/utils/op_api_common.h" - -namespace OP_IMPL_NS { -diopiError_t diopiErfinv(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { - BEGIN_CALL_ACL_OP(input, out); - EXEC_NPU_CMD(aclnnErfinv, inputAt, outAt); - END_CALL_ACL_OP(); -} - -diopiError_t diopiErfinvInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { - BEGIN_CALL_ACL_OP(input); - EXEC_NPU_CMD(aclnnInplaceErfinv, inputAt); - END_CALL_ACL_OP(); -} - -} // namespace OP_IMPL_NS From 1c07a843e64fe0a34428bee6d3494526f97bed43 Mon Sep 17 00:00:00 2001 From: hellozmz <40719054@qq.com> Date: Wed, 10 Jul 2024 20:59:40 +0800 Subject: [PATCH 3/8] rm useless code --- impl/ascend/functions/bernoulli.cpp | 1 - impl/ascend/functions/erfinv.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/impl/ascend/functions/bernoulli.cpp b/impl/ascend/functions/bernoulli.cpp index 1d82c2619..34eef66ef 100644 --- a/impl/ascend/functions/bernoulli.cpp +++ b/impl/ascend/functions/bernoulli.cpp @@ -9,7 +9,6 @@ namespace impl { namespace ascend { -static const uint64_t PHILOX_DEFAULT_NUM = 10; diopiError_t diopiBernoulliScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, double p, diopiGeneratorHandle_t generator) { const std::pair gen = getSeedAndOffset(ctx, generator, 10); diff --git a/impl/ascend/functions/erfinv.cpp b/impl/ascend/functions/erfinv.cpp index 2f1949485..1ddc2d751 100644 --- a/impl/ascend/functions/erfinv.cpp +++ b/impl/ascend/functions/erfinv.cpp @@ -15,7 +15,7 @@ diopiError_t diopiErfinv(diopiContextHandle_t ctx, diopiTensorHandle_t out, diop } diopiError_t diopiErfinvInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { - DIOPI_ASCEND_CALL_ACLNN(aclnnErfinv, ctx, input); + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceErfinv, ctx, input); return diopiSuccess; } From 32dcf2abe2fbae01dd81cc01fe6304e43f577779 Mon Sep 17 00:00:00 2001 From: hellozmz <40719054@qq.com> Date: Thu, 11 Jul 2024 11:49:32 +0800 Subject: [PATCH 4/8] add diopiBernoulli/diopiBernoulliInp --- impl/ascend/functions/bernoulli.cpp | 19 ++++++++++++++++++- impl/ascend_npu/CMakeLists.txt | 2 ++ impl/ascend_npu/ascend_config.yaml | 2 ++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/impl/ascend/functions/bernoulli.cpp b/impl/ascend/functions/bernoulli.cpp index 34eef66ef..a2dc98c62 100644 --- a/impl/ascend/functions/bernoulli.cpp +++ b/impl/ascend/functions/bernoulli.cpp @@ -9,9 +9,26 @@ namespace impl { namespace ascend { +static const uint64_t philoxDefaultNum = 10; + +diopiError_t diopiBernoulli(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiGeneratorHandle_t generator) { + const std::pair gen = getSeedAndOffset(ctx, generator, philoxDefaultNum); + const uint64_t seed = gen.first; + const uint64_t offset = gen.second; + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceBernoulliTensor, ctx, out, input, seed, offset); + return diopiSuccess; +} + +diopiError_t diopiBernoulliInp(diopiContextHandle_t ctx, diopiTensorHandle_t inout, diopiGeneratorHandle_t generator) { + const std::pair gen = getSeedAndOffset(ctx, generator, philoxDefaultNum); + const uint64_t seed = gen.first; + const uint64_t offset = gen.second; + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceBernoulliTensor, ctx, inout, inout, seed, offset); + return diopiSuccess; +} diopiError_t diopiBernoulliScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, double p, diopiGeneratorHandle_t generator) { - const std::pair gen = getSeedAndOffset(ctx, generator, 10); + const std::pair gen = getSeedAndOffset(ctx, generator, philoxDefaultNum); const uint64_t seed = gen.first; const uint64_t offset = gen.second; auto pScalar = constructDiopiScalarT(diopi_dtype_float64, p); diff --git a/impl/ascend_npu/CMakeLists.txt b/impl/ascend_npu/CMakeLists.txt index f2f03de69..81c7ba248 100755 --- a/impl/ascend_npu/CMakeLists.txt +++ b/impl/ascend_npu/CMakeLists.txt @@ -191,6 +191,8 @@ set(OLD_IMPL_SRC ${OLD_IMPL_DIR}/functions/max_pool2d.cpp ${OLD_IMPL_DIR}/functions/equal.cpp ${OLD_IMPL_DIR}/functions/masked_select.cpp + ${OLD_IMPL_DIR}/functions/bernoulli.cpp + ${OLD_IMPL_DIR}/functions/erfinv.cpp ${OLD_IMPL_DIR}/functions_mmcv/roi_align_npu.cpp ${OLD_IMPL_DIR}/functions_ext/rms_norm.cpp ${OLD_IMPL_DIR}/functions_ext/rotary_embedding.cpp diff --git a/impl/ascend_npu/ascend_config.yaml b/impl/ascend_npu/ascend_config.yaml index 9e0913b75..badf6525f 100755 --- a/impl/ascend_npu/ascend_config.yaml +++ b/impl/ascend_npu/ascend_config.yaml @@ -34,6 +34,8 @@ ascend: - diopiBitwiseOrInpScalar - diopiBitwiseOrScalar - diopiBmm +- diopiBernoulli +- diopiBernoulliInp - diopiBernoulliScalar - diopiCastDtype - diopiCat From 485f77fa38e3a21cf3603a94367f0a7c8a4ac9a8 Mon Sep 17 00:00:00 2001 From: hellozmz <40719054@qq.com> Date: Fri, 12 Jul 2024 13:43:13 +0800 Subject: [PATCH 5/8] rm skip ut --- impl/ascend/device_configs.py | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/impl/ascend/device_configs.py b/impl/ascend/device_configs.py index 6df8b849c..2780571c4 100755 --- a/impl/ascend/device_configs.py +++ b/impl/ascend/device_configs.py @@ -212,7 +212,7 @@ ), 'pointwise_op': dict( - name=['erfinv', 'asin'], + name=['asin'], tensor_para=dict( args=[ { @@ -982,30 +982,6 @@ ), ), - 'bernoulli': dict( - name=['bernoulli'], - tensor_para=dict( - args=[ - { - "ins": ['input'], - "dtype": [Skip(np.float32),Skip(np.float64),Skip(np.float16),], - }, - ] - ), - ), - - 'bernoulli_int': dict( - name=['bernoulli'], - tensor_para=dict( - args=[ - { - "ins": ['input'], - "dtype": [Skip(np.int64),Skip(np.int32),Skip(np.int16),Skip(np.int8),Skip(np.uint8),Skip(np.bool_),], - }, - ] - ), - ), - 'layer_norm': dict( name=['layer_norm'], atol=1e-2, From 33293d01b01eae90af1d2b8933485cca2fd6b219 Mon Sep 17 00:00:00 2001 From: hellozmz <40719054@qq.com> Date: Tue, 16 Jul 2024 15:19:39 +0800 Subject: [PATCH 6/8] run some test ok --- impl/ascend/functions/unique.cpp | 142 +++++++++++++++++++++++++++++ impl/ascend_npu/ascend_config.yaml | 1 + 2 files changed, 143 insertions(+) create mode 100644 impl/ascend/functions/unique.cpp diff --git a/impl/ascend/functions/unique.cpp b/impl/ascend/functions/unique.cpp new file mode 100644 index 000000000..edcdd7076 --- /dev/null +++ b/impl/ascend/functions/unique.cpp @@ -0,0 +1,142 @@ +/** + * @file + * @author DeepLink + * @copyright (c) 2023, DeepLink. + */ + +#include +#include +#include + +#include "../aclnn/adaptor.hpp" +#include "../common/debug.hpp" + +namespace impl { +namespace ascend { + +diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, + bool return_counts, diopiTensorHandle_t indices, diopiTensorHandle_t* counts) { + bool return_inverse = (indices != nullptr) ? true : false; + AscendTensor inputAt(input); + if (!inputAt.isContiguous()) { + // TODO: need contiguous + std::cout << "need contiguous" << std::endl; + } + constexpr int64_t NoneN = 1000; + int64_t dim_value = dim ? (*dim < 0 ? *dim + inputAt.dim() : *dim) : NoneN; + + const std::vector inSizeVec = inputAt.shape(); + std::cout << "inSizeVec=" << std::endl; + for (auto i : inSizeVec) { + std::cout << i << std::endl; + } + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + + diopiTensorHandle_t outTmp = nullptr; + if (dim) { + diopiRequireTensor(ctx, &outTmp, &inSize, nullptr, inputAt.dtype(), diopi_device); + } else { + std::vector inSizeVec{inputAt.numel()}; + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &outTmp, &inSize, nullptr, inputAt.dtype(), diopi_device); + } + + diopiTensorHandle_t inverseTmp = nullptr; + if (dim) { + std::vector inSizeVec = std::vector{dim_value}; + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &inverseTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } else { + diopiRequireTensor(ctx, &inverseTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } + + diopiTensorHandle_t countsTmp = nullptr; + if (dim) { + std::vector inSizeVec = std::vector{dim_value}; + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &countsTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } else { + std::vector inSizeVec{inputAt.numel()}; + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &countsTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } + + auto params = ::impl::ascend::aclnn_adaptor::convertParams(input, return_inverse, return_counts, dim_value, outTmp, inverseTmp, countsTmp).params(); + + if (dim) { + std::cout << "dim=" << *dim << ", change to " << (*dim < 0 ? dim_value : *dim) << std::endl; + } else { + std::cout << "all dim" << std::endl; + } + DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(aclnnUniqueConsecutive, ctx, params); + + // get true outShape by aclGetViewShape + int64_t* viewDims = nullptr; + uint64_t viewDimNum = 0; + using aclGetViewShapeFunc = int (*)(const aclTensor* tensor, int64_t** viewDims, uint64_t* viewDimsNum); + static aclGetViewShapeFunc aclGetViewShape = reinterpret_cast(impl::ascend::aclnn_adaptor::getOpApiFuncAddr("aclGetViewShape")); + int ret = aclGetViewShape(std::get<4>(params), &viewDims, &viewDimNum); + ASCEND_CHECK_ABORT(ret == 0, "aclGetViewShape failed"); + + std::cout << "viewDimNum=" << viewDimNum << std::endl; + diopiSize_t outShape{viewDims, static_cast(viewDimNum)}; + std::vector outShapeVec(viewDims, viewDims + viewDimNum); + std::cout << "outShapeVec=" << std::endl; + for (auto i : outShapeVec) { + std::cout << i << std::endl; + } + // require out tensor from true outShape + diopiRequireTensor(ctx, out, &outShape, nullptr, inputAt.dtype(), diopi_device); + // copy outTmp to out + AscendTensor outAt(*out); + AscendTensor outTmpAt(outTmp); + printContiguousTensor(ctx, outTmpAt, "outTmpAt"); + outTmpAt.view({outShape.data, outShape.data + outShape.len}); + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, outAt, outTmpAt); + + if (return_inverse) { + AscendTensor indicesAt(indices); + AscendTensor inverseTmpAt(inverseTmp); + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, indicesAt, inverseTmpAt); + } + + if (return_counts) { + int ret2 = aclGetViewShape(std::get<6>(params), &viewDims, &viewDimNum); + ASCEND_CHECK_ABORT(ret2 == 0, "aclGetViewShape failed"); + diopiSize_t outShape1{viewDims, static_cast(viewDimNum)}; + // require out tensor from true outShape + diopiRequireTensor(ctx, counts, &outShape1, nullptr, diopi_dtype_int64, diopi_device); + // copy outTmp to out + AscendTensor countsAt(*counts); + AscendTensor countsTmpAt(countsTmp); + printContiguousTensor(ctx, countsTmpAt, "countsTmpAt"); + countsTmpAt.view({outShape1.data, outShape1.data + outShape1.len}); + + std::vector outSizeVec1(outShape1.data, outShape1.data + outShape1.len); + std::cout << "outSizeVec1" << std::endl; + for (auto i : outSizeVec1) { + std::cout << i << " "; + } + std::cout << "outSizeVec1" << std::endl; + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, countsAt, countsTmpAt); + } + + // if (viewDims) { + // delete viewDims; + // viewDims = nullptr; + // } + + //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + std::cout << dumpTensor(*out, "out") << std::endl; + std::cout << dumpTensor(indices, "indices") << std::endl; + std::cout << dumpTensor(*counts, "counts") << std::endl; + printContiguousTensor(ctx, AscendTensor(*out), "out"); + printContiguousTensor(ctx, AscendTensor(indices), "indices"); + printContiguousTensor(ctx, AscendTensor(*counts), "counts"); + //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + + return diopiSuccess; +} + +} // namespace ascend +} // namespace impl diff --git a/impl/ascend_npu/ascend_config.yaml b/impl/ascend_npu/ascend_config.yaml index badf6525f..f77205bfe 100755 --- a/impl/ascend_npu/ascend_config.yaml +++ b/impl/ascend_npu/ascend_config.yaml @@ -244,6 +244,7 @@ ascend: - diopiTriu - diopiTriuInp - diopiUniformInp +- diopiUnique - diopiUpsampleLinear - diopiUpsampleLinearBackward - diopiUpsampleNearest From 0007155f6cad6133ca541cd14fc6239c409d4f3f Mon Sep 17 00:00:00 2001 From: hellozmz <40719054@qq.com> Date: Wed, 17 Jul 2024 14:23:07 +0800 Subject: [PATCH 7/8] add opapi impl(not ok) --- impl/ascend/functions/unique.cpp | 114 +++++++++++++++---------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/impl/ascend/functions/unique.cpp b/impl/ascend/functions/unique.cpp index edcdd7076..b4123aa41 100644 --- a/impl/ascend/functions/unique.cpp +++ b/impl/ascend/functions/unique.cpp @@ -8,28 +8,29 @@ #include #include +// #include "helper.hpp" #include "../aclnn/adaptor.hpp" #include "../common/debug.hpp" +#include "op_plugin/OpApiInterface.h" +#include "op_plugin/utils/op_api_common.h" + namespace impl { namespace ascend { -diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, + +diopiError_t diopiUnique1(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, bool return_counts, diopiTensorHandle_t indices, diopiTensorHandle_t* counts) { bool return_inverse = (indices != nullptr) ? true : false; - AscendTensor inputAt(input); + AscendTensor inputTmpAt(input), inputAt; + contiguous(ctx, inputTmpAt, inputAt); if (!inputAt.isContiguous()) { - // TODO: need contiguous - std::cout << "need contiguous" << std::endl; + std::cout << "!!!!!!!!!!!!!!!!!!!!!need contiguous" << std::endl; } constexpr int64_t NoneN = 1000; int64_t dim_value = dim ? (*dim < 0 ? *dim + inputAt.dim() : *dim) : NoneN; const std::vector inSizeVec = inputAt.shape(); - std::cout << "inSizeVec=" << std::endl; - for (auto i : inSizeVec) { - std::cout << i << std::endl; - } diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; diopiTensorHandle_t outTmp = nullptr; @@ -42,17 +43,21 @@ diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, dio } diopiTensorHandle_t inverseTmp = nullptr; - if (dim) { - std::vector inSizeVec = std::vector{dim_value}; - diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; - diopiRequireTensor(ctx, &inverseTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); - } else { - diopiRequireTensor(ctx, &inverseTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); - } + // if (return_inverse) { + if (dim) { + // std::vector inSizeVec = std::vector{dim_value}; + std::vector inSizeVec = {inputAt.shape(dim_value)}; + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &inverseTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } else { + diopiRequireTensor(ctx, &inverseTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } + // } diopiTensorHandle_t countsTmp = nullptr; if (dim) { - std::vector inSizeVec = std::vector{dim_value}; + // std::vector inSizeVec = std::vector{dim_value}; + std::vector inSizeVec = {inputAt.shape(dim_value)}; diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; diopiRequireTensor(ctx, &countsTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); } else { @@ -63,77 +68,72 @@ diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, dio auto params = ::impl::ascend::aclnn_adaptor::convertParams(input, return_inverse, return_counts, dim_value, outTmp, inverseTmp, countsTmp).params(); - if (dim) { - std::cout << "dim=" << *dim << ", change to " << (*dim < 0 ? dim_value : *dim) << std::endl; - } else { - std::cout << "all dim" << std::endl; - } DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(aclnnUniqueConsecutive, ctx, params); + std::cout << "finish aclnnUniqueConsecutive." << std::endl; + // get true outShape by aclGetViewShape int64_t* viewDims = nullptr; uint64_t viewDimNum = 0; using aclGetViewShapeFunc = int (*)(const aclTensor* tensor, int64_t** viewDims, uint64_t* viewDimsNum); static aclGetViewShapeFunc aclGetViewShape = reinterpret_cast(impl::ascend::aclnn_adaptor::getOpApiFuncAddr("aclGetViewShape")); int ret = aclGetViewShape(std::get<4>(params), &viewDims, &viewDimNum); - ASCEND_CHECK_ABORT(ret == 0, "aclGetViewShape failed"); + ASCEND_CHECK_ABORT(ret == 0, "get out aclGetViewShape failed"); - std::cout << "viewDimNum=" << viewDimNum << std::endl; diopiSize_t outShape{viewDims, static_cast(viewDimNum)}; - std::vector outShapeVec(viewDims, viewDims + viewDimNum); - std::cout << "outShapeVec=" << std::endl; - for (auto i : outShapeVec) { - std::cout << i << std::endl; - } // require out tensor from true outShape diopiRequireTensor(ctx, out, &outShape, nullptr, inputAt.dtype(), diopi_device); // copy outTmp to out AscendTensor outAt(*out); AscendTensor outTmpAt(outTmp); - printContiguousTensor(ctx, outTmpAt, "outTmpAt"); outTmpAt.view({outShape.data, outShape.data + outShape.len}); DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, outAt, outTmpAt); + std::cout << "finish outAt aclnnInplaceCopy." << std::endl; if (return_inverse) { - AscendTensor indicesAt(indices); AscendTensor inverseTmpAt(inverseTmp); + + diopiSize_t inSize = {inverseTmpAt.shape().data(), static_cast(inverseTmpAt.shape().size())}; + AscendTensor indicesTmpAt(indices); + if (indicesTmpAt.shape() != inverseTmpAt.shape()) { + diopiRequireTensor(ctx, &indices, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } + AscendTensor indicesAt(indices); + // inverseTmpAt.view(indicesAt.shape()); + std::cout << dumpTensor(inverseTmpAt, "inverseTmpAt") << std::endl; + std::cout << dumpTensor(indicesAt, "indicesAt") << std::endl; DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, indicesAt, inverseTmpAt); + std::cout << "finish indicesAt aclnnInplaceCopy." << std::endl; } if (return_counts) { + AscendTensor countsTmpAt(countsTmp); int ret2 = aclGetViewShape(std::get<6>(params), &viewDims, &viewDimNum); - ASCEND_CHECK_ABORT(ret2 == 0, "aclGetViewShape failed"); - diopiSize_t outShape1{viewDims, static_cast(viewDimNum)}; - // require out tensor from true outShape - diopiRequireTensor(ctx, counts, &outShape1, nullptr, diopi_dtype_int64, diopi_device); - // copy outTmp to out + ASCEND_CHECK_ABORT(ret2 == 0, "get count aclGetViewShape failed"); + diopiSize_t countShape{viewDims, static_cast(viewDimNum)}; + std::vector tmp{countShape.data, countShape.data + countShape.len}; // shape 是错误的。 + std::cout << "count value=" << std::endl; + for (auto i : tmp) { + std::cout << i << std::endl; + } + std::cout << "finish count value=" << std::endl; + std::cout << "tuple_size=" << std::tuple_size::value << std::endl; + std::cout << dumpTensor(countsTmpAt, "countsTmpAt before resize.") << std::endl; + diopiRequireTensor(ctx, counts, &countShape, nullptr, countsTmpAt.dtype(), diopi_device); AscendTensor countsAt(*counts); - AscendTensor countsTmpAt(countsTmp); - printContiguousTensor(ctx, countsTmpAt, "countsTmpAt"); - countsTmpAt.view({outShape1.data, outShape1.data + outShape1.len}); + countsTmpAt.view({countShape.data, countShape.data + countShape.len}); - std::vector outSizeVec1(outShape1.data, outShape1.data + outShape1.len); - std::cout << "outSizeVec1" << std::endl; - for (auto i : outSizeVec1) { - std::cout << i << " "; - } - std::cout << "outSizeVec1" << std::endl; + std::cout << dumpTensor(countsTmpAt, "countsTmpAt") << std::endl; + std::cout << dumpTensor(countsAt, "countsAt") << std::endl; + printContiguousTensor(ctx, countsAt, "countsAt alllllll"); DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, countsAt, countsTmpAt); + std::cout << "finish countsAt aclnnInplaceCopy." << std::endl; } - // if (viewDims) { - // delete viewDims; - // viewDims = nullptr; - // } - - //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> - std::cout << dumpTensor(*out, "out") << std::endl; - std::cout << dumpTensor(indices, "indices") << std::endl; - std::cout << dumpTensor(*counts, "counts") << std::endl; - printContiguousTensor(ctx, AscendTensor(*out), "out"); - printContiguousTensor(ctx, AscendTensor(indices), "indices"); - printContiguousTensor(ctx, AscendTensor(*counts), "counts"); - //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + if (viewDims) { + delete viewDims; + viewDims = nullptr; + } return diopiSuccess; } From 1dcd09b43316dc19091caf3c84ab5e6313a8a338 Mon Sep 17 00:00:00 2001 From: hellozmz <40719054@qq.com> Date: Thu, 18 Jul 2024 14:51:24 +0800 Subject: [PATCH 8/8] wq test --- .clang-format | 2 - .../python/{cache => cache-bk}/.gitkeep | 0 diopi_test/python/configs/diopi_configs.py | 8 +- diopi_test/python/conformance/check_result.py | 10 +- diopi_test/python/gencases/.gitkeep | 0 diopi_test/python/main.py | 1 + diopi_test/python/test_conv2d.py | 62 ++++++++ diopi_test/python/test_unique.sh | 8 ++ diopi_test/python/unique.py | 30 ++++ diopi_test/python/unique_ones.py | 14 ++ diopi_test/python/unique_rand.py | 25 ++++ impl/ascend/aclnn/adaptor.hpp | 6 +- impl/ascend/functions/max_pool2d.cpp | 2 +- impl/ascend/functions/unique.cpp-by_ascend | 120 ++++++++++++++++ impl/ascend/functions/unique.cpp.addlogs | 135 ++++++++++++++++++ .../unique.cpp.notok_but_impl_looks_good | 122 ++++++++++++++++ impl/ascend/functions/unique2.cpp | 90 ++++++++++++ impl/ascend/functions/unique3.cpp | 115 +++++++++++++++ impl/ascend_npu/CMakeLists.txt | 1 + impl/ascend_npu/ascend_config.yaml | 3 +- impl/ascend_npu/diopi_impl/bernoulli.cpp | 25 ++++ impl/ascend_npu/diopi_impl/erfinv.cpp | 24 ++++ impl/ascend_npu/diopi_impl/unique.cpp | 67 +++++++++ impl/camb/functions/unique.cpp | 10 +- impl/scripts/build_impl.sh | 4 +- 25 files changed, 866 insertions(+), 18 deletions(-) rename diopi_test/python/{cache => cache-bk}/.gitkeep (100%) delete mode 100644 diopi_test/python/gencases/.gitkeep create mode 100644 diopi_test/python/test_conv2d.py create mode 100644 diopi_test/python/test_unique.sh create mode 100644 diopi_test/python/unique.py create mode 100644 diopi_test/python/unique_ones.py create mode 100644 diopi_test/python/unique_rand.py create mode 100644 impl/ascend/functions/unique.cpp-by_ascend create mode 100644 impl/ascend/functions/unique.cpp.addlogs create mode 100644 impl/ascend/functions/unique.cpp.notok_but_impl_looks_good create mode 100644 impl/ascend/functions/unique2.cpp create mode 100644 impl/ascend/functions/unique3.cpp create mode 100644 impl/ascend_npu/diopi_impl/bernoulli.cpp create mode 100644 impl/ascend_npu/diopi_impl/erfinv.cpp create mode 100644 impl/ascend_npu/diopi_impl/unique.cpp diff --git a/.clang-format b/.clang-format index 1bbb4c42c..e94368148 100644 --- a/.clang-format +++ b/.clang-format @@ -24,12 +24,10 @@ Standard: Cpp11 AllowAllParametersOfDeclarationOnNextLine: true BinPackParameters: true BinPackArguments: false -BreakAfterAttributes: Leave ColumnLimit: 160 DerivePointerAlignment: false PointerAlignment: Left ReferenceAlignment: Left -InsertNewlineAtEOF: true SpacesBeforeTrailingComments: 2 IncludeIsMainSourceRegex: (\.cu)$ IncludeCategories: diff --git a/diopi_test/python/cache/.gitkeep b/diopi_test/python/cache-bk/.gitkeep similarity index 100% rename from diopi_test/python/cache/.gitkeep rename to diopi_test/python/cache-bk/.gitkeep diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 2e268dd2c..6b2f27ced 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -6372,10 +6372,10 @@ name=['unique'], interface=['torch'], para=dict( - sorted=[True, True, False, True, False, False, True, False], - return_inverse=[False, True, True, False, True, True, False, True], - return_counts=[False, False, True, True, True, False, True, False], - dim=[None, -1, 1, None, 2, 0, 1, -2], + sorted= [True, True, False, True, False, False, True, False], + return_inverse= [False, True, True, False, True, True, False, True], + return_counts= [False, False, True, True, True, False, True, False], + dim= [None, -1, 1, None, 2, 0, 1, -2], ), tensor_para=dict( gen_fn='Genfunc.randn', diff --git a/diopi_test/python/conformance/check_result.py b/diopi_test/python/conformance/check_result.py index 23e127cfd..75d968dd7 100644 --- a/diopi_test/python/conformance/check_result.py +++ b/diopi_test/python/conformance/check_result.py @@ -139,6 +139,7 @@ def compare_others(output, output_reference, **kwargs): def allclose(tensor_dev: np.ndarray, tensor_ref: np.ndarray, **kwargs) -> bool: var_name = kwargs.get('name', 'out') sum_to_compare = kwargs.get('sum_to_compare', False) + # sum_to_compare = False rtol = kwargs.get('rtol', 1e-5) atol = kwargs.get('atol', 1e-8) mismatch_ratio_threshold = kwargs.get('mismatch_ratio_threshold', 1e-3) @@ -150,15 +151,20 @@ def allclose(tensor_dev: np.ndarray, tensor_ref: np.ndarray, **kwargs) -> bool: glob_vars.func_status[glob_vars.cur_test_func] = 'passed' if not passed: glob_vars.func_status[glob_vars.cur_test_func] = 'failed' + print(f"tensor_dev {tensor_dev}") + print(f"tensor_ref {tensor_ref}") sum1 = tensor_dev.sum() sum2 = tensor_ref.sum() mask = np.isclose(tensor_dev, tensor_ref, rtol, atol, equal_nan=True) count = np.count_nonzero(np.equal(mask, False)) debug_level = glob_vars.debug_level + if debug_level < 1: + print(f'debug_level {debug_level}') + debug_level = 100 if tensor_dev.dtype == np.bool_: max_diff = 1 error_info = f"The count of elements that do not meet the accuracy requirement is {count}.\n" + \ - f"Max of diff is {max_diff}.\n" + f"\n" elif tensor_dev.ndim == 0 and tensor_ref.ndim == 0: # result is scalar array error_info = f"The actual val is {tensor_dev} and the expected is {tensor_ref}.\n" @@ -187,7 +193,7 @@ def allclose(tensor_dev: np.ndarray, tensor_ref: np.ndarray, **kwargs) -> bool: if np.isnan(sum1) or np.isnan(sum2): error_info += f"Exists nan, {var_name} is {sum1} and {var_name}_ref is {sum2}.\n" else: - error_info += f"Sum of {var_name} is {sum1}, Sum of {var_name}_ref is {sum2}, Max of diff is {max_diff}.\n" + error_info += f"Sum of {var_name} is {sum1}, Sum of {var_name}_ref is {sum2}.\n" if debug_level > 1: error_info += f"{var_name} is {tensor_dev},\n{var_name}_ref is {tensor_ref},\nMask is {mask}\n" raise OutputCheckFailedException(error_info) diff --git a/diopi_test/python/gencases/.gitkeep b/diopi_test/python/gencases/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/diopi_test/python/main.py b/diopi_test/python/main.py index 1de8653f9..4ab2fde8f 100644 --- a/diopi_test/python/main.py +++ b/diopi_test/python/main.py @@ -186,6 +186,7 @@ def parse_args(): if args.pytest_args is not None: pytest_args.extend(args.pytest_args.split()) pytest_args = ['--cache-clear', '--disable-warnings'] + pytest_args + # pytest_args = [ '-s'] + pytest_args exit_code = pytest.main(pytest_args) if exit_code != 0: raise SystemExit(exit_code) diff --git a/diopi_test/python/test_conv2d.py b/diopi_test/python/test_conv2d.py new file mode 100644 index 000000000..bd13f50fd --- /dev/null +++ b/diopi_test/python/test_conv2d.py @@ -0,0 +1,62 @@ +import torch +import torch_dipu +import torch.nn as nn +import os + +def add_forward(x): + return x + x + +def linear_forward(x): + torch.manual_seed(0) + device = x.device + dim = x.shape[-1] + + linear = nn.Linear(dim, dim) + linear.to(device) + + y = linear(x) + return y + +def conv_forward(x): + torch.manual_seed(0) + device = x.device + batch, in_ch, w, h = x.shape + + conv1 = nn.Conv2d(in_ch, in_ch, 3, 1, 1) + conv1.to(device) + + y = conv1(x) + return y + + +def batch_norm_forward(x): + device = x.device + batch, in_ch, w, h = x.shape + + conv1 = nn.Conv2d(in_ch, in_ch, 3, 1, 1) + conv1.to(device) + + y = conv1(x) + return y + + +def test_func_acc(func, x_cpu): + x_dipu = x_cpu.cuda() + # x_dipu = x_cpu + + y_cpu = func(x_cpu) + y_dipu = func(x_dipu) + + return torch.sum(torch.abs(y_dipu.cpu() - y_cpu)) / x_cpu.numel() + +def print_diff(fun_name, diff): + print(f"{fun_name} mean diff: {diff}") + +if __name__ == "__main__": + + torch.cuda.set_device(0) + os.environ['DIPU_DUMP_OP_ARGS'] = '1' + os.environ['DIPU_AUTOCOMPARE_OPS_LIST'] = '.*' + print_diff("add_forward" ,test_func_acc(add_forward, x_cpu=torch.randn(100,100))) + print_diff("linear_forward", test_func_acc(linear_forward, x_cpu=torch.randn(1000,1000))) + print_diff("conv_forward", test_func_acc(conv_forward, x_cpu=torch.randn(2, 32, 100,100))) \ No newline at end of file diff --git a/diopi_test/python/test_unique.sh b/diopi_test/python/test_unique.sh new file mode 100644 index 000000000..0836e9d8c --- /dev/null +++ b/diopi_test/python/test_unique.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# echo "First argument: $1" + + +current_time=$(date "+%Y.%m.%d-%H.%M.%S") + + +DIOPI_TRACK_ACL=1 DIPU_TRACK_ACL=1 DIPU_TRACK_HCCL=1 ASCEND_COREDUMP_SIGNAL=1 ASCEND_GLOBAL_LOG_LEVEL=0 DIPU_TRACK_ACL=1 DIPU_DEBUG_ALLOCATOR=15 python main.py --mode run_test | tee test_unique_${current_time}.log \ No newline at end of file diff --git a/diopi_test/python/unique.py b/diopi_test/python/unique.py new file mode 100644 index 000000000..01e09ba9a --- /dev/null +++ b/diopi_test/python/unique.py @@ -0,0 +1,30 @@ +import torch +import torch_dipu + +# # 创建一个张量 +# x = torch.tensor([1, 2, 3, 2, 3, 4, 4, 5, 6, 6]).cuda() +# # 使用unique算子 +# unique_x = torch.unique(x) + +# print(unique_x) + + +# 创建一个二维张量 +x = torch.tensor([[4, 5, 6,1],[1, 2, 3,1], [2, 3, 4,1], [1, 2, 3,1]]) + +# 在第一维度上使用unique算子 +unique_x, inverse_indices , counts = torch.unique(x, dim=0, sorted=False, return_inverse=True, return_counts=True) + +print(unique_x) + +# 创建一个二维张量 +x = torch.tensor([[4, 5, 6,1],[1, 2, 3,1], [2, 3, 4,1], [1, 2, 3,1]]).cuda() + +# 在第一维度上使用unique算子 +unique_x = torch.unique(x, dim=0) + +print(unique_x) + +print(inverse_indices) + +print(counts) \ No newline at end of file diff --git a/diopi_test/python/unique_ones.py b/diopi_test/python/unique_ones.py new file mode 100644 index 000000000..bc6d16869 --- /dev/null +++ b/diopi_test/python/unique_ones.py @@ -0,0 +1,14 @@ +import torch +import torch_dipu +# import torch_npu +# from torch_npu.contrib import transfer_to_npu + +x = torch.ones([4, 64, 128], dtype=torch.int32).cuda() + +unique_x, inverse_indices , counts = torch.unique(x, dim=1, sorted=False, return_inverse=True, return_counts=True) + +print(unique_x) + +print(inverse_indices) + +print(counts) diff --git a/diopi_test/python/unique_rand.py b/diopi_test/python/unique_rand.py new file mode 100644 index 000000000..ebdef38ea --- /dev/null +++ b/diopi_test/python/unique_rand.py @@ -0,0 +1,25 @@ +import torch +import torch_dipu + +x = torch.randn([252], dtype=torch.float32) +# x = torch.tensor([[4, 5, 6,1],[1, 2, 3,1], [2, 3, 4,1], [1, 2, 3,1]]) + +# 在第一维度上使用unique算子 +# unique_x = torch.unique(x, dim=1,) +unique_x, inverse_indices = torch.unique(x, dim=-1, sorted=True, return_inverse=True, return_counts=False) + +# print(unique_x) + +# # 创建一个二维张量 +# x = torch.tensor([[4, 5, 6,1],[1, 2, 3,1], [2, 3, 4,1], [1, 2, 3,1]]).cuda() + +# # 在第一维度上使用unique算子 +# unique_x = torch.unique(x, dim=0) + +# print(unique_x) + +print(unique_x) + +print(inverse_indices) + +# print(counts) \ No newline at end of file diff --git a/impl/ascend/aclnn/adaptor.hpp b/impl/ascend/aclnn/adaptor.hpp index 7c024a627..f1d5cfad2 100644 --- a/impl/ascend/aclnn/adaptor.hpp +++ b/impl/ascend/aclnn/adaptor.hpp @@ -364,7 +364,7 @@ void callAclnnImpl(diopiContextHandle_t ctx, const std::tuple& tuple) { static constexpr const char kWorkspaceApiName[] = #api "GetWorkspaceSize"; \ auto convertedParams = ::impl::ascend::aclnn_adaptor::convertParams(__VA_ARGS__); \ ::impl::ascend::aclnn_adaptor::callAclnnImpl(ctx, convertedParams.params()); \ - } while (false) + } while (false); #define DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(api, ctx, ...) \ do { \ @@ -374,12 +374,12 @@ void callAclnnImpl(diopiContextHandle_t ctx, const std::tuple& tuple) { diopiStreamHandle_t stream; \ diopiGetStream(ctx, &stream); \ CALL_ACLRT(aclrtSynchronizeStream(reinterpret_cast(stream))); \ - } while (false) + } while (false); #define DIOPI_ASCEND_CALL_ACLNN_SYNC(api, ctx, ...) \ do { \ auto convertedParams = ::impl::ascend::aclnn_adaptor::convertParams(__VA_ARGS__); \ DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(api, ctx, convertedParams.params()) \ - } while (false) + } while (false); #endif // IMPL_ASCEND_ACLNN_ADAPTOR_HPP_ diff --git a/impl/ascend/functions/max_pool2d.cpp b/impl/ascend/functions/max_pool2d.cpp index 7670058fb..10e5cb14e 100644 --- a/impl/ascend/functions/max_pool2d.cpp +++ b/impl/ascend/functions/max_pool2d.cpp @@ -90,7 +90,7 @@ diopiError_t diopiMaxPool2dWithIndices(diopiContextHandle_t ctx, diopiTensorHand ; ASCEND_CHECK_ABORT(indicesAt.dtype() == diopi_dtype_int32, "aclnnMaxPool2dWithIndices only support int32 indices"); - DIOPI_ASCEND_CALL_ACLNN(aclnnMaxPool2dWithIndices, + DIOPI_ASCEND_CALL_ACLNN(aclnnMaxPool2dWithIndices, /* aclnnAdaptiveMaxPool2d */ ctx, inputAt, diopiSize_t{kernelSizeData, 2}, diff --git a/impl/ascend/functions/unique.cpp-by_ascend b/impl/ascend/functions/unique.cpp-by_ascend new file mode 100644 index 000000000..93a71ea00 --- /dev/null +++ b/impl/ascend/functions/unique.cpp-by_ascend @@ -0,0 +1,120 @@ +/** + * @file + * @author DeepLink + * @copyright (c) 2023, DeepLink. + */ + +#include +#include +#include + +#include "../aclnn/adaptor.hpp" + +namespace impl { +namespace ascend { + +diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, + bool return_counts, diopiTensorHandle_t indices, diopiTensorHandle_t* counts) { + bool return_inverse = (indices != nullptr) ? true : false; + AscendTensor inputAt(input); + + std::cout << "Begin diopiUnique=>" << std::endl; + const std::vector inSizeVec = inputAt.shape(); + std::cout << "inSizeVec=" << std::endl; + for (auto i : inSizeVec) { + std::cout << i << std::endl; + } + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + + diopiTensorHandle_t outTmp = nullptr; + if (dim) { + diopiRequireTensor(ctx, &outTmp, &inSize, nullptr, inputAt.dtype(), diopi_device); + } else { + std::vector inSizeVec{inputAt.numel()}; + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &outTmp, &inSize, nullptr, inputAt.dtype(), diopi_device); + } + + diopiTensorHandle_t countsTmp = nullptr; + if (dim) { + std::vector inSizeVec = (dim != nullptr) ? std::vector{inputAt.shape(*dim)} : inputAt.shape(); + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &countsTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + // if (indices == nullptr) { + diopiRequireTensor(ctx, &indices, &inSize, nullptr, diopi_dtype_int64, diopi_device); + // } + } else { + // dim = nullptr; + // if (indices == nullptr) { + diopiRequireTensor(ctx, &indices, &inSize, nullptr, diopi_dtype_int64, diopi_device); + // } + std::vector inSizeVec{inputAt.numel()}; + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &countsTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } + + constexpr int64_t NoneN = 1000; + int64_t dim_value = dim ? *dim : NoneN; + auto params = ::impl::ascend::aclnn_adaptor::convertParams(input, return_inverse, return_counts, dim_value, outTmp, indices, countsTmp).params(); + + if (dim) { + std::cout << "dim=" << *dim << std::endl; + } else { + std::cout << "all dim" << std::endl; + } + DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(aclnnUniqueConsecutive, ctx, params); + std::cout << "diopiUnique finish" << std::endl; + + // get true outShape by aclGetViewShape + int64_t* viewDims = nullptr; + uint64_t viewDimNum = 0; + using aclGetViewShapeFunc = int (*)(const aclTensor* tensor, int64_t** viewDims, uint64_t* viewDimsNum); + static aclGetViewShapeFunc aclGetViewShape = reinterpret_cast(impl::ascend::aclnn_adaptor::getOpApiFuncAddr("aclGetViewShape")); + int ret = aclGetViewShape(std::get<4>(params), &viewDims, &viewDimNum); + ASCEND_CHECK_ABORT(ret == 0, "aclGetViewShape failed"); + + + std::cout << "viewDimNum=" << viewDimNum<< std::endl; + diopiSize_t outShape{viewDims, static_cast(viewDimNum)}; + std::vector outShapeVec(viewDims, viewDims + viewDimNum); + std::cout << "outShapeVec=" << std::endl; + for (auto i : outShapeVec) { + std::cout << i << std::endl; + } + // require out tensor from true outShape + diopiRequireTensor(ctx, out, &outShape, nullptr, inputAt.dtype(), diopi_device); + // copy outTmp to out + AscendTensor outAt(*out); + AscendTensor outTmpAt(outTmp); + outTmpAt.view({outShape.data, outShape.data + outShape.len}); + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, outAt, outTmpAt); + + + + int ret2 = aclGetViewShape(std::get<6>(params), &viewDims, &viewDimNum); + ASCEND_CHECK_ABORT(ret == 0, "aclGetViewShape failed"); + diopiSize_t outShape1{viewDims, static_cast(viewDimNum)}; + // require out tensor from true outShape + diopiRequireTensor(ctx, counts, &outShape1, nullptr, diopi_dtype_int64, diopi_device); + // copy outTmp to out + AscendTensor countsAt(*counts); + AscendTensor countsTmpAt(countsTmp); + countsTmpAt.view({outShape1.data, outShape1.data + outShape1.len}); + + std::vector outSizeVec1(outShape1.data, outShape1.data + outShape1.len); + std::cout << "outSizeVec1" << std::endl; + for (auto i : outSizeVec1) { + std::cout << i << " "; + } + std::cout << "outSizeVec1" << std::endl; + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, countsAt, countsTmpAt); + + if (viewDims) { + delete viewDims; + viewDims = nullptr; + } + return diopiSuccess; +} + +} // namespace ascend +} // namespace impl diff --git a/impl/ascend/functions/unique.cpp.addlogs b/impl/ascend/functions/unique.cpp.addlogs new file mode 100644 index 000000000..917ea372f --- /dev/null +++ b/impl/ascend/functions/unique.cpp.addlogs @@ -0,0 +1,135 @@ +/** + * @file + * @author DeepLink + * @copyright (c) 2023, DeepLink. + */ + +#include +#include +#include + +#include "../aclnn/adaptor.hpp" +#include "../common/debug.hpp" + +namespace impl { +namespace ascend { + +diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, + bool return_counts, diopiTensorHandle_t indices, diopiTensorHandle_t* counts) { + bool return_inverse = (indices != nullptr) ? true : false; + AscendTensor inputAt(input); + + std::cout << "diopiUnique" << std::endl; + // int64_t local_dim = dim ? (*dim >= 0 ? *dim : *dim + inputAt.dim()) : -1; + // std::vector inSizeVec = (dim != nullptr) ? std::vector{inputAt.shape(*dim)} : inputAt.shape(); + const std::vector inSizeVec = inputAt.shape(); + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + std::cout << "inSizeVec=" << std::endl; + for (auto i : inSizeVec) { + std::cout << i << std::endl; + } + + diopiTensorHandle_t outTmp = nullptr; + // diopiRequireTensor(ctx, &outTmp, &inSize, nullptr, inputAt.dtype(), diopi_device); + if (dim) { + diopiRequireTensor(ctx, &outTmp, &inSize, nullptr, inputAt.dtype(), diopi_device); + } else { + std::vector inSizeVec{inputAt.numel()}; + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &outTmp, &inSize, nullptr, inputAt.dtype(), diopi_device); + } + + diopiTensorHandle_t countsTmp = nullptr; + if (!dim) { + std::vector inSizeVec = (dim != nullptr) ? std::vector{inputAt.shape(*dim)} : inputAt.shape(); + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &countsTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + if (indices == nullptr) { + diopiRequireTensor(ctx, &indices, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } + } else { + diopiRequireTensor(ctx, &countsTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + if (indices == nullptr) { + diopiRequireTensor(ctx, &indices, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } + } + + constexpr int64_t NoneN = 1000; + int64_t dim_value = dim ? *dim : NoneN; + auto params = ::impl::ascend::aclnn_adaptor::convertParams(input, return_inverse, return_counts, dim_value, outTmp, indices, countsTmp).params(); + + if (dim) { + std::cout << "dim=" << *dim << std::endl; + } else { + std::cout << "all dim" << std::endl; + } + DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(aclnnUniqueConsecutive, ctx, params); + std::cout << "diopiUnique finish" << std::endl; + + // get true outShape by aclGetViewShape + int64_t* viewDims = nullptr; + uint64_t viewDimNum = 0; + using aclGetViewShapeFunc = int (*)(const aclTensor* tensor, int64_t** viewDims, uint64_t* viewDimsNum); + static aclGetViewShapeFunc aclGetViewShape = reinterpret_cast(impl::ascend::aclnn_adaptor::getOpApiFuncAddr("aclGetViewShape")); + int ret = aclGetViewShape(std::get<4>(params), &viewDims, &viewDimNum); + ASCEND_CHECK_ABORT(ret == 0, "aclGetViewShape failed"); + + + std::cout << "viewDimNum=" << viewDimNum<< std::endl; + diopiSize_t outShape{viewDims, static_cast(viewDimNum)}; + std::vector outShapeVec(viewDims, viewDims + viewDimNum); + std::cout << "outShapeVec=" << std::endl; + for (auto i : outShapeVec) { + std::cout << i << std::endl; + } + // require out tensor from true outShape + std::cout << "inputAt.dtype()=" << inputAt.dtype() << std::endl; + diopiRequireTensor(ctx, out, &outShape, nullptr, inputAt.dtype(), diopi_device); + // copy outTmp to out + AscendTensor outAt(*out); + AscendTensor outTmpAt(outTmp); + std::cout << "outAt.dtype()=" << outAt.dtype() << std::endl; + std::cout << "outTmpAt.dtype()=" << outTmpAt.dtype() << std::endl; + outTmpAt.view({outShape.data, outShape.data + outShape.len}); + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, outAt, outTmpAt); + + + + int ret2 = aclGetViewShape(std::get<6>(params), &viewDims, &viewDimNum); + ASCEND_CHECK_ABORT(ret2 == 0, "aclGetViewShape failed"); + diopiSize_t outShape1{viewDims, static_cast(viewDimNum)}; + // require out tensor from true outShape + diopiRequireTensor(ctx, counts, &outShape1, nullptr, diopi_dtype_int64, diopi_device); + // copy outTmp to out + AscendTensor countsAt(*counts); + AscendTensor countsTmpAt(countsTmp); + countsTmpAt.view({outShape1.data, outShape1.data + outShape1.len}); + + std::vector outSizeVec1(outShape1.data, outShape1.data + outShape1.len); + std::cout << "outSizeVec1" << std::endl; + for (auto i : outSizeVec1) { + std::cout << i << " "; + } + std::cout << "outSizeVec1" << std::endl; + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, countsAt, countsTmpAt); + + + + //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + std::cout << dumpTensor(*out, "out") << std::endl; + std::cout << dumpTensor(*counts, "counts") << std::endl; + std::cout << dumpTensor(indices, "indices") << std::endl; + printContiguousTensor(ctx, AscendTensor(*out), "out"); + printContiguousTensor(ctx, AscendTensor(*counts), "counts"); + printContiguousTensor(ctx, AscendTensor(indices), "indices"); + //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + + if (viewDims) { + delete viewDims; + viewDims = nullptr; + } + return diopiSuccess; +} + +} // namespace ascend +} // namespace impl diff --git a/impl/ascend/functions/unique.cpp.notok_but_impl_looks_good b/impl/ascend/functions/unique.cpp.notok_but_impl_looks_good new file mode 100644 index 000000000..b3f4e6df0 --- /dev/null +++ b/impl/ascend/functions/unique.cpp.notok_but_impl_looks_good @@ -0,0 +1,122 @@ +/** + * @file + * @author DeepLink + * @copyright (c) 2023, DeepLink. + */ + +#include +#include +#include + +#include "../aclnn/adaptor.hpp" + +namespace impl { +namespace ascend { + +diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, + bool return_counts, diopiTensorHandle_t indices, diopiTensorHandle_t* counts) { + bool return_inverse = (indices != nullptr) ? true : false; + AscendTensor inputAt(input); + + std::cout << "diopiUnique" << std::endl; + AscendTensor countsAtq(*counts); + std::cout << "counts dypte=" << countsAtq.dtype() << std::endl; + // int64_t local_dim = dim ? (*dim >= 0 ? *dim : *dim + inputAt.dim()) : -1; + // std::vector inSizeVec = (dim != nullptr) ? std::vector{inputAt.shape(*dim)} : inputAt.shape(); + const std::vector inSizeVec = inputAt.shape(); + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + std::cout << "inSizeVec=" << std::endl; + for (auto i : inSizeVec) { + std::cout << i << std::endl; + } + + diopiTensorHandle_t outTmp = nullptr; + // diopiRequireTensor(ctx, &outTmp, &inSize, nullptr, inputAt.dtype(), diopi_device); + if (dim) { + diopiRequireTensor(ctx, &outTmp, &inSize, nullptr, inputAt.dtype(), diopi_device); + } else { + std::vector inSizeVec{inputAt.numel()}; + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &outTmp, &inSize, nullptr, inputAt.dtype(), diopi_device); + } + + diopiTensorHandle_t indexTmp = nullptr; + if (dim) { + std::vector inSizeVec = (dim != nullptr) ? std::vector{inputAt.shape(*dim)} : inputAt.shape(); + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &indexTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } else { + diopiRequireTensor(ctx, &indexTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } + + diopiTensorHandle_t countsTmp = nullptr; + if (dim) { + diopiRequireTensor(ctx, &countsTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } else { + std::vector inSizeVec = (dim != nullptr) ? std::vector{inputAt.shape(*dim)} : inputAt.shape(); + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &countsTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } + + constexpr int64_t NoneN = 1000; + int64_t dim_value = dim ? *dim : NoneN; + auto params = ::impl::ascend::aclnn_adaptor::convertParams(input, return_inverse, return_counts, dim_value, outTmp, indices, countsTmp).params(); + + if (dim) { + std::cout << "dim=" << *dim << std::endl; + } else { + std::cout << "all dim" << std::endl; + } + DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(aclnnUniqueConsecutive, ctx, params); + std::cout << "diopiUnique finish" << std::endl; + + // get true outShape by aclGetViewShape + int64_t* viewDims = nullptr; + uint64_t viewDimNum = 0; + using aclGetViewShapeFunc = int (*)(const aclTensor* tensor, int64_t** viewDims, uint64_t* viewDimsNum); + static aclGetViewShapeFunc aclGetViewShape = reinterpret_cast(impl::ascend::aclnn_adaptor::getOpApiFuncAddr("aclGetViewShape")); + int ret = aclGetViewShape(std::get<4>(params), &viewDims, &viewDimNum); + ASCEND_CHECK_ABORT(ret == 0, "aclGetViewShape failed"); + + std::cout << "viewDimNum=" << viewDimNum << std::endl; + diopiSize_t outShape{viewDims, static_cast(viewDimNum)}; + std::vector outShapeVec(viewDims, viewDims + viewDimNum); + std::cout << "outShapeVec=" << std::endl; + for (auto i : outShapeVec) { + std::cout << i << std::endl; + } + // require out tensor from true outShape + diopiRequireTensor(ctx, out, &outShape, nullptr, inputAt.dtype(), diopi_device); + // copy outTmp to out + AscendTensor outAt(*out); + AscendTensor outTmpAt(outTmp); + outTmpAt.view({outShape.data, outShape.data + outShape.len}); + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, outAt, outTmpAt); + + int ret2 = aclGetViewShape(std::get<6>(params), &viewDims, &viewDimNum); + ASCEND_CHECK_ABORT(ret2 == 0, "aclGetViewShape failed"); + diopiSize_t outShape1{viewDims, static_cast(viewDimNum)}; + // require out tensor from true outShape + diopiRequireTensor(ctx, counts, &outShape1, nullptr, diopi_dtype_int64, diopi_device); + // copy outTmp to out + AscendTensor countsAt(*counts); + AscendTensor countsTmpAt(countsTmp); + countsTmpAt.view({outShape1.data, outShape1.data + outShape1.len}); + + std::vector outSizeVec1(outShape1.data, outShape1.data + outShape1.len); + std::cout << "outSizeVec1" << std::endl; + for (auto i : outSizeVec1) { + std::cout << i << " "; + } + std::cout << "outSizeVec1" << std::endl; + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, countsAt, countsTmpAt); + + if (viewDims) { + delete viewDims; + viewDims = nullptr; + } + return diopiSuccess; +} + +} // namespace ascend +} // namespace impl diff --git a/impl/ascend/functions/unique2.cpp b/impl/ascend/functions/unique2.cpp new file mode 100644 index 000000000..d656de2c2 --- /dev/null +++ b/impl/ascend/functions/unique2.cpp @@ -0,0 +1,90 @@ +// /** +// * @file +// * @author DeepLink +// * @copyright (c) 2023, DeepLink. +// */ + +// #include +// #include +// #include + +// #include "../aclnn/adaptor.hpp" + +// namespace impl { +// namespace ascend { + +// diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, +// bool return_counts, diopiTensorHandle_t indices, diopiTensorHandle_t* counts) { +// bool return_inverse = (indices != nullptr) ? true : false; +// AscendTensor inputAt(input); + +// std::cout << "diopiUnique" << std::endl; +// // int64_t local_dim = dim ? (*dim >= 0 ? *dim : *dim + inputAt.dim()) : -1; +// std::vector inSizeVec = (dim != nullptr) ? std::vector{inputAt.shape(*dim)} : inputAt.shape(); +// diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; +// std::cout << "inSizeVec=" << std::endl; +// for (auto i : inSizeVec) { +// std::cout << i << std::endl; +// } +// std::vector zeroShape = {0}; +// diopiSize_t zeroSize = {zeroShape.data(), static_cast(zeroShape.size())}; +// if (return_counts) { +// diopiRequireTensor(ctx, counts, &inSize, nullptr, diopi_dtype_int64, diopi_device); +// if (indices == nullptr) { +// diopiRequireTensor(ctx, &indices, &inSize, nullptr, diopi_dtype_int64, diopi_device); +// } +// } else { +// diopiRequireTensor(ctx, counts, &zeroSize, nullptr, diopi_dtype_int64, diopi_device); +// if (indices == nullptr) { +// diopiRequireTensor(ctx, &indices, &zeroSize, nullptr, diopi_dtype_int64, diopi_device); +// } +// } +// diopiTensorHandle_t outTmp = nullptr; +// diopiRequireTensor(ctx, &outTmp, &inSize, nullptr, inputAt.dtype(), diopi_device); +// AscendTensor countsAt(*counts); +// auto params = ::impl::ascend::aclnn_adaptor::convertParams(input, sorted, return_inverse, return_counts, outTmp, indices, countsAt).params(); + +// if (dim) { +// std::cout << "dim=" << *dim << std::endl; +// } else { +// std::cout << "all dim" << std::endl; +// } +// DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(aclnnUnique2, ctx, params); +// std::cout << "diopiUnique finish" << std::endl; + +// // get true outShape by aclGetViewShape +// int64_t* viewDims = nullptr; +// uint64_t viewDimNum = 0; +// using aclGetViewShapeFunc = int (*)(const aclTensor* tensor, int64_t** viewDims, uint64_t* viewDimsNum); +// static aclGetViewShapeFunc aclGetViewShape = reinterpret_cast(impl::ascend::aclnn_adaptor::getOpApiFuncAddr("aclGetViewShape")); +// int ret = aclGetViewShape(std::get<4>(params), &viewDims, &viewDimNum); +// ASCEND_CHECK_ABORT(ret == 0, "aclGetViewShape failed"); +// diopiSize_t outShape{viewDims, static_cast(viewDimNum)}; +// std::vector outShapeVec(viewDims, viewDims+ viewDimNum); +// std::cout << "outShapeVec=" << std::endl; +// for (auto i : outShapeVec) { +// std::cout << i << std::endl; +// } +// // require out tensor from true outShape +// diopiRequireTensor(ctx, out, &outShape, nullptr, inputAt.dtype(), diopi_device); +// // copy outTmp to out +// AscendTensor outAt(*out); +// AscendTensor outTmpAt(outTmp); +// outTmpAt.view({outShape.data, outShape.data + outShape.len}); + +// std::vector outSizeVec(outShape.data, outShape.data + outShape.len); +// std::cout << "outSizeVec" << std::endl; +// for (auto i : outSizeVec) { +// std::cout << i << " "; +// } +// std::cout << "outSizeVec" << std::endl; +// DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, outAt, outTmpAt); +// if (viewDims) { +// delete viewDims; +// viewDims = nullptr; +// } +// return diopiSuccess; +// } + +// } // namespace ascend +// } // namespace impl diff --git a/impl/ascend/functions/unique3.cpp b/impl/ascend/functions/unique3.cpp new file mode 100644 index 000000000..9a54a0311 --- /dev/null +++ b/impl/ascend/functions/unique3.cpp @@ -0,0 +1,115 @@ +/** + * @file + * @author DeepLink + * @copyright (c) 2023, DeepLink. + */ + +#include +#include +#include + +#include "../aclnn/adaptor.hpp" + +namespace impl { +namespace ascend { + +diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, + bool return_counts, diopiTensorHandle_t indices, diopiTensorHandle_t* counts) { + bool return_inverse = (indices != nullptr) ? true : false; + AscendTensor inputAt(input); + + std::vector inSizeVec = inputAt.shape(); + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + std::cout << "====inSizeVec=" << std::endl; + for (auto i : inSizeVec) { + std::cout << i << std::endl; + } + + diopiTensorHandle_t outTmp = nullptr; + std::vector numelVec{inputAt.numel()}; + diopiSize_t numelSize = {numelVec.data(), static_cast(numelVec.size())}; + if (dim) { + diopiRequireTensor(ctx, &outTmp, &numelSize, nullptr, inputAt.dtype(), diopi_device); + } else { + diopiRequireTensor(ctx, &outTmp, &numelSize, nullptr, inputAt.dtype(), diopi_device); + } + + diopiTensorHandle_t countsTmp = nullptr; + if (dim) { + std::vector inSizeVec = std::vector{inputAt.shape(*dim)}; + diopiSize_t inSize = {inSizeVec.data(), static_cast(inSizeVec.size())}; + diopiRequireTensor(ctx, &countsTmp, &inSize, nullptr, diopi_dtype_int64, diopi_device); + if (indices == nullptr) { + diopiRequireTensor(ctx, &indices, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } + } else { + diopiRequireTensor(ctx, &countsTmp, &numelSize, nullptr, diopi_dtype_int64, diopi_device); + if (indices == nullptr) { + diopiRequireTensor(ctx, &indices, &inSize, nullptr, diopi_dtype_int64, diopi_device); + } + } + + constexpr int64_t NoneN = 1000; + int64_t dim_value = dim ? *dim : NoneN; + auto params = ::impl::ascend::aclnn_adaptor::convertParams(input, return_inverse, return_counts, dim_value, outTmp, indices, countsTmp).params(); + + // if (dim) { + // std::cout << "dim=" << *dim << std::endl; + // } else { + // std::cout << "all dim" << std::endl; + // } + DIOPI_ASECND_CALL_ACLNN_TYPE_SYNC(aclnnUniqueConsecutive, ctx, params); + std::cout << "diopiUnique finish" << std::endl; + + using aclGetViewShapeFunc = int (*)(const aclTensor* tensor, int64_t** viewDims, uint64_t* viewDimsNum); + static aclGetViewShapeFunc aclGetViewShape = reinterpret_cast(impl::ascend::aclnn_adaptor::getOpApiFuncAddr("aclGetViewShape")); + + // get true outShape by aclGetViewShape + int64_t* viewDims = nullptr; + uint64_t viewDimNum = 0; + int ret = aclGetViewShape(std::get<4>(params), &viewDims, &viewDimNum); + ASCEND_CHECK_ABORT(ret == 0, "out aclGetViewShape failed"); + + std::cout << "viewDimNum=" << viewDimNum << std::endl; + diopiSize_t outShape{viewDims, static_cast(viewDimNum)}; + std::vector outShapeVec(viewDims, viewDims + viewDimNum); + std::cout << "outShapeVec=" << std::endl; + for (auto i : outShapeVec) { + std::cout << i << std::endl; + } + std::cout << "dtype=" << inputAt.dtype() << std::endl; + // require out tensor from true outShape + diopiRequireTensor(ctx, out, &outShape, nullptr, inputAt.dtype(), diopi_device); + // copy outTmp to out + AscendTensor outAt(*out); + AscendTensor outTmpAt(outTmp); + outTmpAt.view({outShape.data, outShape.data + outShape.len}); + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, outAt, outTmpAt); + + int ret2 = aclGetViewShape(std::get<6>(params), &viewDims, &viewDimNum); + ASCEND_CHECK_ABORT(ret2 == 0, "counts aclGetViewShape failed"); + diopiSize_t outShape1{viewDims, static_cast(viewDimNum)}; + // require out tensor from true outShape + diopiRequireTensor(ctx, counts, &outShape1, nullptr, diopi_dtype_int64, diopi_device); + // copy outTmp to out + AscendTensor countsAt(*counts); + AscendTensor countsTmpAt(countsTmp); + countsTmpAt.view({outShape1.data, outShape1.data + outShape1.len}); + + std::vector outSizeVec1(outShape1.data, outShape1.data + outShape1.len); + std::cout << "outSizeVec1" << std::endl; + for (auto i : outSizeVec1) { + std::cout << i << " "; + } + std::cout << "outSizeVec1" << std::endl; + DIOPI_ASCEND_CALL_ACLNN(aclnnInplaceCopy, ctx, countsAt, countsTmpAt); + + if (viewDims) { + delete viewDims; + viewDims = nullptr; + } + return diopiSuccess; +} + +} // namespace ascend +} // namespace impl diff --git a/impl/ascend_npu/CMakeLists.txt b/impl/ascend_npu/CMakeLists.txt index 81c7ba248..5c8187efb 100755 --- a/impl/ascend_npu/CMakeLists.txt +++ b/impl/ascend_npu/CMakeLists.txt @@ -193,6 +193,7 @@ set(OLD_IMPL_SRC ${OLD_IMPL_DIR}/functions/masked_select.cpp ${OLD_IMPL_DIR}/functions/bernoulli.cpp ${OLD_IMPL_DIR}/functions/erfinv.cpp + # ${OLD_IMPL_DIR}/functions/unique.cpp ${OLD_IMPL_DIR}/functions_mmcv/roi_align_npu.cpp ${OLD_IMPL_DIR}/functions_ext/rms_norm.cpp ${OLD_IMPL_DIR}/functions_ext/rotary_embedding.cpp diff --git a/impl/ascend_npu/ascend_config.yaml b/impl/ascend_npu/ascend_config.yaml index f77205bfe..202132fdd 100755 --- a/impl/ascend_npu/ascend_config.yaml +++ b/impl/ascend_npu/ascend_config.yaml @@ -244,7 +244,7 @@ ascend: - diopiTriu - diopiTriuInp - diopiUniformInp -- diopiUnique +# - diopiUnique - diopiUpsampleLinear - diopiUpsampleLinearBackward - diopiUpsampleNearest @@ -279,3 +279,4 @@ ascend_npu: - diopiTensorDestructionHook - diopiTokenAttentionInference - diopiTokenSoftmaxReduceVInference +- diopiUnique diff --git a/impl/ascend_npu/diopi_impl/bernoulli.cpp b/impl/ascend_npu/diopi_impl/bernoulli.cpp new file mode 100644 index 000000000..3dc25e461 --- /dev/null +++ b/impl/ascend_npu/diopi_impl/bernoulli.cpp @@ -0,0 +1,25 @@ +/** + * @file + * @author DeepLink + * @copyright (c) 2024, DeepLink. + */ + +#include + +#include "helper.hpp" +#include "op_plugin/OpApiInterface.h" +#include "op_plugin/utils/op_api_common.h" + +namespace OP_IMPL_NS { + +diopiError_t diopiBernoulliScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, double p, diopiGeneratorHandle_t generator) { + BEGIN_CALL_ACL_OP(out, generator); + auto pair = at::check_generator(generatorAt)->philox_engine_inputs(10); + const uint64_t seed = pair.first; + const uint64_t offset = pair.second; + auto pScalar = at::Scalar(p); + EXEC_NPU_CMD(aclnnInplaceBernoulli, outAt, pScalar, seed, offset); + END_CALL_ACL_OP(); +} + +} // namespace OP_IMPL_NS diff --git a/impl/ascend_npu/diopi_impl/erfinv.cpp b/impl/ascend_npu/diopi_impl/erfinv.cpp new file mode 100644 index 000000000..69303db79 --- /dev/null +++ b/impl/ascend_npu/diopi_impl/erfinv.cpp @@ -0,0 +1,24 @@ +/** + * @file + * @author DeepLink + * @copyright (c) 2024, DeepLink. + */ + +#include "helper.hpp" +#include "op_plugin/OpApiInterface.h" +#include "op_plugin/utils/op_api_common.h" + +namespace OP_IMPL_NS { +diopiError_t diopiErfinv(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + BEGIN_CALL_ACL_OP(input, out); + EXEC_NPU_CMD(aclnnErfinv, inputAt, outAt); + END_CALL_ACL_OP(); +} + +diopiError_t diopiErfinvInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + BEGIN_CALL_ACL_OP(input); + EXEC_NPU_CMD(aclnnInplaceErfinv, inputAt); + END_CALL_ACL_OP(); +} + +} // namespace OP_IMPL_NS diff --git a/impl/ascend_npu/diopi_impl/unique.cpp b/impl/ascend_npu/diopi_impl/unique.cpp new file mode 100644 index 000000000..c8e3b610e --- /dev/null +++ b/impl/ascend_npu/diopi_impl/unique.cpp @@ -0,0 +1,67 @@ +/** + * @file + * @author DeepLink + * @copyright (c) 2024, DeepLink. + */ + +#include +#include "helper.hpp" +#include "op_plugin/OpApiInterface.h" + +namespace OP_IMPL_NS { + +diopiError_t diopiUnique1(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, + bool return_counts, diopiTensorHandle_t indices, diopiTensorHandle_t* counts) { + bool return_inverse = (indices != nullptr) ? true : false; + BEGIN_CALL_ACL_OP(input); + c10::optional dimAt = dim ? c10::optional(*dim) : c10::nullopt; + at::Tensor y, y_inverse, y_counts; + std::tie(y, y_inverse, y_counts) = op_api::unique_consecutive(inputAt, return_inverse, return_counts, dimAt); + + impl::aten::buildDiopiTensor(ctx, y, out); + if (return_inverse) { + impl::aten::buildDiopiTensor(ctx, y_inverse, &indices); + } + if (return_counts) { + impl::aten::buildDiopiTensor(ctx, y_counts, counts); + } + END_CALL_ACL_OP(); +} + + + +diopiError_t diopiUnique2(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, + bool return_counts, diopiTensorHandle_t indices, diopiTensorHandle_t* counts) { + bool return_inverse = (indices != nullptr) ? true : false; + BEGIN_CALL_ACL_OP(input); + c10::optional dimAt = dim ? c10::optional(*dim) : c10::nullopt; + at::Tensor y, y_inverse, y_counts; + std::tie(y, y_inverse, y_counts) = op_api::_unique2(inputAt, sorted, return_inverse, return_counts); + + impl::aten::buildDiopiTensor(ctx, y, out); + if (return_inverse) { + impl::aten::buildDiopiTensor(ctx, y_inverse, &indices); + } + if (return_counts) { + impl::aten::buildDiopiTensor(ctx, y_counts, counts); + } + END_CALL_ACL_OP(); +} + + +diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, + bool return_counts, diopiTensorHandle_t indices, diopiTensorHandle_t* counts) { + + if (dim) { + return diopiUnique1(ctx, out, input, dim, sorted, return_counts, indices, counts); + } else { + return diopiUnique2(ctx, out, input, dim, sorted, return_counts, indices, counts); + } + return diopiSuccess; +} + + +} // namespace OP_IMPL_NS + + + diff --git a/impl/camb/functions/unique.cpp b/impl/camb/functions/unique.cpp index d2500018d..418454fb0 100644 --- a/impl/camb/functions/unique.cpp +++ b/impl/camb/functions/unique.cpp @@ -11,7 +11,7 @@ namespace camb { diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, const int64_t* dim, bool sorted, bool returnCounts, diopiTensorHandle_t indices, diopiTensorHandle_t* counts) { // version should be greater than 1.15.2 -#if (CNNL_MAJOR * 10000 + CNNL_MINOR * 100 + CNNL_PATCHLEVEL >= 11502) +// #if (CNNL_MAJOR * 10000 + CNNL_MINOR * 100 + CNNL_PATCHLEVEL >= 11502) cnnlHandle_t handle = cnnlHandlePool.get(ctx); // input_tensor @@ -36,10 +36,14 @@ diopiError_t diopiUnique(diopiContextHandle_t ctx, diopiTensorHandle_t* out, dio // output_tensor // require larger dimsize for output_tensor, it will be sliced to get final result DiopiTensor outputTensor = - (realDim != -1) ? requiresTensor(ctx, {inputTensor.shape()}, inputTensor.dtype()) : requiresTensor(ctx, {inputTensor.numel()}, inputTensor.dtype()); + (realDim != -1) ? + requiresTensor(ctx, {inputTensor.shape()}, inputTensor.dtype()) : + requiresTensor(ctx, {inputTensor.numel()}, inputTensor.dtype()); // index_tensor DiopiTensor indexTensor = - (realDim != -1) ? requiresTensor(ctx, {inputTensor.shape()[realDim]}, diopi_dtype_int32) : requiresTensor(ctx, inputTensor.shape(), diopi_dtype_int32); + (realDim != -1) + ? requiresTensor(ctx, {inputTensor.shape()[realDim]}, diopi_dtype_int32) + : requiresTensor(ctx, inputTensor.shape(), diopi_dtype_int32); // counts_tensor DiopiTensor countsTensor = (realDim != -1) ? requiresTensor(ctx, {outputTensor.shape()[realDim]}, diopi_dtype_int32) : requiresTensor(ctx, outputTensor.shape(), diopi_dtype_int32); diff --git a/impl/scripts/build_impl.sh b/impl/scripts/build_impl.sh index 2c91e9a08..7b29cb933 100644 --- a/impl/scripts/build_impl.sh +++ b/impl/scripts/build_impl.sh @@ -54,8 +54,8 @@ case $1 in ;; ascend) mkdir -p build && cd build - cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=ascend -DCMAKE_BUILD_TYPE=Release -DTEST=ON -DENABLE_COVERAGE=${USE_COVERAGE} - make -j16 + cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=ascend -DCMAKE_BUILD_TYPE=Debug -DTEST=ON -DENABLE_COVERAGE=${USE_COVERAGE} + make -j32 ;; hip_pytorch) mkdir build && cd build