Zgc/diopi ascend fix cat2 (DeepLink-org#785)

zhaoguochun1995 · web-flow · commit cdef7df6cdc7 · 2023-12-28T13:27:57.000+08:00
* fix cat bug

* use cast in op-plugin and optimize cat

* skip cat double test case

* enhance empty check

* remove OpCommandImpls relate

* use diopiCopyInp and diopiDtypeCast

* fallback cast to cpu

* skip some test case: contiguous to no contiguous

* fix copy and cast bug

* support stride_copy_support double

* enable some test case

* skip double test case in copy
diff --git a/impl/ascend/common/acloprunner.hpp b/impl/ascend/common/acloprunner.hpp
diff --git a/impl/ascend/device_configs.py b/impl/ascend/device_configs.py
@@ -207,9 +207,28 @@
     ),
 
     'conv_2d_no_contiguous': dict(
-        name=['conv2d'],
-        atol=1e-1,
-        rtol=1e-2,
+        name=["conv2d"],
+        tensor_para=dict(
+            args=[
+                {
+                    "ins": ["input"],
+                    "dtype": [Skip(np.float32), Skip(np.float16), Skip(np.float64)],
+                },
+            ]
+        ),
+    ),
+
+    'relu_no_contiguous': dict(
+        name=["relu"],
+        is_inplace=True,
+        tensor_para=dict(
+            args=[
+                {
+                    "ins": ['input'],
+                    "dtype": [Skip(np.float32), Skip(np.float64)],
+                },
+            ],
+        ),
     ),
 
     'hardswish': dict(
@@ -1312,78 +1331,6 @@
         ),
     ),
 
-    'remainder_self_scalar': dict(
-        name=['remainder'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['other'],
-                    "dtype": [Skip(np.float32),Skip(np.float64),Skip(np.float16),Skip(np.int16),Skip(np.int32),Skip(np.int64),Skip(np.int8),Skip(np.uint8),Skip(np.bool_),],
-                },
-            ]
-        ),
-    ),
-
-    'remainder_self_bool': dict(
-        name=['remainder'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['other'],
-                    "dtype": [Skip(np.float32),Skip(np.float64),Skip(np.float16),Skip(np.int16),Skip(np.int32),Skip(np.int64),Skip(np.int8),Skip(np.uint8),Skip(np.bool_),],
-                },
-            ]
-        ),
-    ),
-
-    'remainder_tensor': dict(
-        name=['remainder'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "dtype": [Skip(np.float32),Skip(np.float64),Skip(np.float16),Skip(np.int16),Skip(np.int32),Skip(np.int64),Skip(np.int8),Skip(np.uint8),Skip(np.bool_),],
-                },
-            ]
-        ),
-    ),
-
-    'remainder_tensor_zero': dict(
-        name=['remainder'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "dtype": [Skip(np.int16),Skip(np.uint8),Skip(np.int8),],
-                },
-            ]
-        ),
-    ),
-
-    'remainder_other_scalar': dict(
-        name=['remainder'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "dtype": [Skip(np.int16),Skip(np.int32),Skip(np.int64),Skip(np.uint8),Skip(np.int8),Skip(np.bool_),Skip(np.float16),Skip(np.float32),Skip(np.float64)],
-                },
-            ]
-        ),
-    ),
-
-    'remainder_other_scalar_bool': dict(
-        name=['remainder'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "dtype": [Skip(np.float32),Skip(np.float64),Skip(np.float16),Skip(np.int16),Skip(np.int32),Skip(np.int64),Skip(np.uint8),Skip(np.int8),],
-                },
-            ]
-        ),
-    ),
-
     'gather': dict(
         name=['gather'],
         tensor_para=dict(
@@ -1596,11 +1543,11 @@
                 {
                     "ins": ["input"],
                     "shape": [Skip((12, 0, 9)), Skip((8,))],
-                    "dtype": [Skip(np.complex128), Skip(np.complex64)],
+                    "dtype": [Skip(np.complex128), Skip(np.complex64), Skip(np.float64)],
                 },
                 {
                     "ins": ["other"],
-                    "dtype": [Skip(np.complex128)]
+                    "dtype": [Skip(np.complex128), Skip(np.float64)]
                 },
             ]
         )
@@ -1614,7 +1561,7 @@
                 {
                     "ins": ["input"],
                     "shape": [Skip((12, 1, 12)),],
-                    "dtype": [Skip(np.complex128), Skip(np.complex64)],
+                    "dtype": [Skip(np.complex128), Skip(np.complex64), Skip(np.float64)],
                 },
                 {
                     "ins": ["other"],
@@ -1632,12 +1579,12 @@
             args=[
                 {
                     "ins": ["input"],
-                    "shape": [Skip((6, 5, 384))],
-                    "dtype": [Skip(np.complex128), Skip(np.complex64)],
+                    "shape": [Skip((6, 5, 384)), Skip((2, 4, 38, 45))],
+                    "dtype": [Skip(np.complex128), Skip(np.complex64), Skip(np.float64)],
                 },
                 {
                     "ins": ["other"],
-                    "dtype": [Skip(np.complex128)],
+                    "dtype": [Skip(np.complex128), Skip(np.float64)],
                 },
             ]
         )
@@ -1650,11 +1597,12 @@
             args=[
                 {
                     "ins": ["input"],
-                    "shape": [Skip((192, 147, 2)), Skip((2, 12, 38, 45, 3))],
+                    "shape": [Skip((192, 147)), Skip((192, 147, 2)), Skip((2, 12, 38, 45, 3))],
+                    "dtype": [Skip(np.complex128), Skip(np.complex64), Skip(np.float64)],
                 },
                 {
                     "ins": ["other"],
-                    "dtype": [Skip(np.complex64)],
+                    "dtype": [Skip(np.complex64), Skip(np.float64)],
                 },
             ]
         )
diff --git a/impl/ascend/functions/cast.cpp b/impl/ascend/functions/cast.cpp
@@ -7,8 +7,14 @@
 #include "../common/acloprunner.hpp"
 
 namespace impl {
-namespace ascend {
 
+// TODO(zhaoguochun): fix me
+namespace ascend_npu {
+extern diopiError_t diopiCastDtype(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input);
+}
+
+namespace ascend {
+#if 0
 diopiError_t diopiCastDtype(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) {
     int64_t numel = 0;
     diopiGetTensorNumel(input, &numel);
@@ -57,6 +63,11 @@ diopiError_t diopiCastDtype(diopiContextHandle_t ctx, diopiTensorHandle_t out, d
 
     return diopiSuccess;
 }
+#endif
+
+diopiError_t diopiCastDtype(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) {
+    return ascend_npu::diopiCastDtype(ctx, out, input);
+}
 
 }  // namespace ascend
 }  // namespace impl
diff --git a/impl/ascend_npu/ascend_config.yaml b/impl/ascend_npu/ascend_config.yaml
@@ -17,8 +17,6 @@ ascend:
 - diopiBitwiseAnd
 - diopiBitwiseNot
 - diopiBmm
-- diopiCastDtype
-- diopiCat
 - diopiClamp
 - diopiClampInp
 - diopiClampInpScalar
@@ -32,7 +30,6 @@ ascend:
 - diopiClampMinScalar
 - diopiClampScalar
 - diopiContiguous
-- diopiCopyInp
 - diopiCos
 - diopiCosInp
 - diopiCrossEntropyLoss
@@ -205,6 +202,9 @@ ascend:
 - diopiApplyPenalty
 - diopiFormatCast
 ascend_npu:
+- diopiCastDtype
+- diopiCopyInp
+- diopiCat
 - diopiRemainderTensor
 - diopiRemainderScalar
 - diopiRemainder
diff --git a/impl/ascend_npu/diopi_impl/cast.cpp b/impl/ascend_npu/diopi_impl/cast.cpp
@@ -0,0 +1,21 @@
+/**
+ * @file
+ * @author DeepLink
+ * @copyright  (c) 2023, DeepLink.
+ */
+
+#include "helper.hpp"
+#include "op_plugin/AclOpsInterface.h"
+
+namespace OP_IMPL_NS {
+
+diopiError_t diopiCastDtype(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) {
+    BEGIN_CALL_ACL_OP(input, out);
+    if (out == nullptr || input == nullptr || !inputAt.defined() || !outAt.defined() || inputAt.numel() <= 0 || outAt.numel() <= 0) {
+        return diopiSuccess;
+    }
+    outAt.copy_(inputAt);
+    END_CALL_ACL_OP();
+}
+
+}  // namespace OP_IMPL_NS
diff --git a/impl/ascend_npu/diopi_impl/cat.cpp b/impl/ascend_npu/diopi_impl/cat.cpp
@@ -0,0 +1,40 @@
+/**
+ * @file
+ * @author DeepLink
+ * @copyright  (c) 2023, DeepLink.
+ */
+
+#include "helper.hpp"
+#include "op_plugin/AclOpsInterface.h"
+
+namespace OP_IMPL_NS {
+
+diopiError_t diopiCat(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t* tensors, int64_t numInputs, int64_t dim) {
+    BEGIN_CALL_ACL_OP(out);
+    at::Tensor outTempAt = outAt;
+    if (outAt.scalar_type() == at::kDouble) {
+        outTempAt = outAt.to(at::kFloat);
+    } else if (outAt.scalar_type() == at::kLong) {
+        outTempAt = outAt.to(at::kInt);
+    }
+
+    std::vector<at::Tensor> tensorsAt;
+    tensorsAt.reserve(numInputs);
+    for (int i = 0; i < numInputs; i++) {
+        auto tensorAt = impl::aten::buildATen(tensors[i]);
+        if (!tensorAt.defined() || tensorAt.numel() <= 0) {
+            continue;
+        }
+        tensorsAt.push_back(tensorAt.to(outTempAt.scalar_type()));
+    }
+    if (!tensorsAt.empty()) {
+        acl_op::cat_out(tensorsAt, dim, outTempAt);
+    }
+    if (outAt.scalar_type() != outTempAt.scalar_type()) {
+        outAt.copy_(outTempAt);
+    }
+
+    END_CALL_ACL_OP();
+}
+
+}  // namespace OP_IMPL_NS
diff --git a/impl/ascend_npu/diopi_impl/copy.cpp b/impl/ascend_npu/diopi_impl/copy.cpp
@@ -12,7 +12,7 @@ namespace OP_IMPL_NS {
 
 diopiError_t diopiCopyInp(diopiContextHandle_t ctx, diopiConstTensorHandle_t src, diopiTensorHandle_t dest) {
     BEGIN_CALL_ACL_OP(src, dest);
-    if (!srcAt.defined() || !destAt.defined()) {
+    if (src == nullptr || dest == nullptr || !srcAt.defined() || !destAt.defined() || srcAt.numel() <= 0 || destAt.numel() <= 0) {
         return diopiSuccess;
     }
     at_npu::native::NPUNativeFunctions::copy_(destAt, srcAt, false);
diff --git a/impl/ascend_npu/diopi_impl/helper.hpp b/impl/ascend_npu/diopi_impl/helper.hpp
@@ -108,8 +108,11 @@ inline int debugLevel() {
     impl::aten::setCurCtx(ctx);                                                        \
     BUILD_ATEN_ARGS(__VA_ARGS__)
 
-#define END_CALL_ACL_OP()      \
-    impl::aten::unsetCurCtx(); \
+#define END_CALL_ACL_OP()                                                                         \
+    impl::aten::unsetCurCtx();                                                                    \
+    if (debugLevel()) {                                                                           \
+        std::cout << __FILE__ << ":" << __LINE__ << " :" << __FUNCTION__ << " over" << std::endl; \
+    }                                                                                             \
     return diopiSuccess;
 
 inline void logError() { std::cerr << std::endl; }
diff --git a/impl/ascend_npu/torch_npu/csrc/CopyKernel.cpp b/impl/ascend_npu/torch_npu/csrc/CopyKernel.cpp
@@ -282,7 +282,7 @@ bool try_to_optimize_copy_with_any_format(at::Tensor& self, const at::Tensor& sr
 }
 
 at::Tensor& NPUNativeFunctions::copy_(at::Tensor& self, const at::Tensor& src, bool non_blocking) {
-    if (self.numel() == 0) {
+    if (!self.defined() || self.numel() == 0) {
         return self;
     }
     // save tensor dim name
diff --git a/impl/ascend_npu/torch_npu/csrc/DIOPIAdapter.cpp b/impl/ascend_npu/torch_npu/csrc/DIOPIAdapter.cpp
diff --git a/impl/ascend_npu/torch_npu/csrc/NPUNativeFunctions.cpp b/impl/ascend_npu/torch_npu/csrc/NPUNativeFunctions.cpp
diff --git a/impl/ascend_npu/torch_npu/csrc/framework/DIOPIAdapter.h b/impl/ascend_npu/torch_npu/csrc/framework/DIOPIAdapter.h

Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@ namespace OP_IMPL_NS {`
`12`	`12`
`13`	`13`	`diopiError_t diopiCopyInp(diopiContextHandle_t ctx, diopiConstTensorHandle_t src, diopiTensorHandle_t dest) {`
`14`	`14`	`BEGIN_CALL_ACL_OP(src, dest);`
`15`		`- if (!srcAt.defined() \|\| !destAt.defined()) {`
	`15`	`+ if (src == nullptr \|\| dest == nullptr \|\| !srcAt.defined() \|\| !destAt.defined() \|\| srcAt.numel() <= 0 \|\| destAt.numel() <= 0) {`
`16`	`16`	`return diopiSuccess;`
`17`	`17`	`}`
`18`	`18`	`at_npu::native::NPUNativeFunctions::copy_(destAt, srcAt, false);`
Original file line number	Diff line number	Diff line change
`@@ -282,7 +282,7 @@ bool try_to_optimize_copy_with_any_format(at::Tensor& self, const at::Tensor& sr`
`282`	`282`	`}`
`283`	`283`
`284`	`284`	`at::Tensor& NPUNativeFunctions::copy_(at::Tensor& self, const at::Tensor& src, bool non_blocking) {`
`285`		`- if (self.numel() == 0) {`
	`285`	`+ if (!self.defined() \|\| self.numel() == 0) {`
`286`	`286`	`return self;`
`287`	`287`	`}`
`288`	`288`	`// save tensor dim name`