DeepLink-org · NeosZhang · Aug 8, 2024 · Aug 8, 2024 · Aug 8, 2024 · Aug 9, 2024
@@ -196,7 +196,7 @@ def prepare() -> Tuple[dict, str]:
 
     impl_plugin = options.impl_plugin
     base_device = options.base_device
-    assert(base_device is None or base_device == "" or base_device == "torch", f"invalid base_device:{base_device}")
+    assert base_device is None or base_device == "" or base_device == "torch", f"invalid base_device:{base_device}"
     if base_device == "":
         base_device = None
     def create_if_not_exist(name):

@@ -898,15 +898,26 @@
         para=dict(
             accumulate=[Skip(False),],
         ),
+        tensor_para=dict(
+            args=[
+                {
+                    "ins": ['input'],
+                    "shape": [Skip((16, 4, 4)),],
+                },
+            ]
+        ),
     ),
 
     'index_put_bool_indices_value': dict( # llm used
         name=['index_put'],
+        para=dict(
+            accumulate=[Skip(False),],
+        ),
         tensor_para=dict(
             args=[
                 {
                     "ins": ['input'],
-                    "shape": [Skip((3, 2, 2, 20)),],
+                    "shape": [Skip((3, 2, 2, 20)), Skip((4, 2, 2, 6, 2))],
                 },
             ]
         ),

@@ -1,4 +1,4 @@
 /**
 * @file
 * @author DeepLink
 * @copyright  (c) 2024, DeepLink.
@@ -12,7 +12,8 @@
 namespace impl {
 namespace ascend {
 
-static std::vector<AscendTensor> castIntIndicesToLongIndices(diopiContextHandle_t ctx, std::vector<AscendTensor>& indices) {
+namespace indexProcess {
+std::vector<AscendTensor> castIntIndicesToLongIndices(diopiContextHandle_t ctx, std::vector<AscendTensor>& indices) {
     std::vector<AscendTensor> result;
     for (auto& t : indices) {
         if (!t.defined()) {
@@ -37,7 +38,7 @@
     return result;
 }
 
-static void checkIndexTensorTypes(const std::vector<AscendTensor>& indices) {
+void checkIndexTensorTypes(const std::vector<AscendTensor>& indices) {
     for (const auto& t : indices) {
         if (t.defined()) {
             diopiDtype_t type = t.dtype();
@@ -47,7 +48,7 @@
     }
 }
 
-static AscendTensor nonZeroTensor(diopiContextHandle_t ctx, const AscendTensor& self) {
+AscendTensor nonZeroTensor(diopiContextHandle_t ctx, const AscendTensor& self) {
     int64_t numELem = self.numel() * self.dim();
     std::vector<int64_t> nShape{self.numel(), self.dim()};
     std::vector<int64_t> nStride(nShape.size(), 1);
@@ -86,14 +87,14 @@
     return AscendTensor(nzTrans);
 }
 
-static std::vector<AscendTensor> expandIndicesTensors(diopiContextHandle_t ctx, const AscendTensor& self, const std::vector<AscendTensor>& indices) {
+std::vector<AscendTensor> expandIndicesTensors(diopiContextHandle_t ctx, const AscendTensor& self, const std::vector<AscendTensor>& indices) {
     std::vector<AscendTensor> result;
     for (auto& t : indices) {
         if (!t.defined()) {
             result.push_back(t);
         } else {
             if (t.dtype() == diopi_dtype_uint8 || t.dtype() == diopi_dtype_bool) {
-                ASCEND_CHECK(t.dtype() != diopi_dtype_uint8,
+                ASCEND_CHECK(t.dtype() == diopi_dtype_bool,
                              "indexing with dtype torch.uint8 is now deprecated,"
                              " please use a dtype torch.bool instead.");
                 for (uint64_t j = 0; j < static_cast<uint64_t>(t.dim()); j++) {
@@ -132,7 +133,7 @@
     return result;
 }
 
-static aclTensor* createEmptyAclTensor() {
+aclTensor* createEmptyAclTensor() {
     std::vector<int64_t> nShape{0};
     std::vector<int64_t> nStride{1};
     int64_t storageSize = 0;
@@ -167,7 +168,7 @@
     return result;
 }
 
-static bool hasContiguousSubspace(std::vector<AscendTensor> indices) {  // true if all the non-null tensors are adjacent
+bool hasContiguousSubspace(std::vector<AscendTensor> indices) {  // true if all the non-null tensors are adjacent
     auto isDefined = [](const AscendTensor& tensor) { return tensor.defined(); };
     auto isNull = [](const AscendTensor& tensor) { return !tensor.defined(); };
     auto start = std::find_if(indices.begin(), indices.end(), isDefined);
@@ -176,7 +177,7 @@
     return it == stop.base();
 }
 
-static std::tuple<AscendTensor, std::vector<AscendTensor>> transposeToFront(AscendTensor self, std::vector<AscendTensor> indices) {
+std::tuple<AscendTensor, std::vector<AscendTensor>> transposeToFront(AscendTensor self, std::vector<AscendTensor> indices) {
     std::vector<int64_t> dims;
     std::vector<AscendTensor> transposedIndices;
 
@@ -198,7 +199,7 @@
     return std::make_tuple(self.permute(dims), transposedIndices);
 }
 
-static std::vector<int64_t> indexReshape(std::vector<AscendTensor> endIndices, int64_t dimsBefore, int64_t dimsAfter) {
+std::vector<int64_t> indexReshape(std::vector<AscendTensor> endIndices, int64_t dimsBefore, int64_t dimsAfter) {
     std::vector<int64_t> indexShape;
     for (auto& idx : endIndices) {
         if (idx.defined()) {
@@ -216,7 +217,7 @@
     return indexShape;
 }
 
-static std::vector<int64_t> indexOutputSize(const AscendTensor& self, std::vector<AscendTensor>& indices) {
+std::vector<int64_t> indexOutputSize(const AscendTensor& self, std::vector<AscendTensor>& indices) {
     std::vector<AscendTensor> midIndices = indicesExpandedOutplace(indices);
     while (midIndices.size() < (size_t)self.dim()) {
         midIndices.emplace_back(nullptr);
@@ -269,6 +270,8 @@
     return outputSize;
 }
 
+}  // namespace indexProcess
+
 diopiError_t diopiIndex(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t* indices, int64_t nums) {
     AscendTensor inputAt(input);
     std::vector<AscendTensor> indicesOrigin(nums);
@@ -278,12 +281,13 @@
         }
     }
 
-    std::vector<AscendTensor> indicesList = castIntIndicesToLongIndices(ctx, indicesOrigin);
-    checkIndexTensorTypes(indicesList);
+    std::vector<AscendTensor> indicesList = indexProcess::castIntIndicesToLongIndices(ctx, indicesOrigin);
+    indexProcess::checkIndexTensorTypes(indicesList);
 
-    auto indicesExpanded = expandIndicesTensors(ctx, inputAt, indicesList);
+    auto indicesExpanded = indexProcess::expandIndicesTensors(ctx, inputAt, indicesList);
 
     std::vector<aclTensor*> allDefinedIndices;
+
     for (const auto& idx : indicesExpanded) {
         if (idx.defined()) {
             allDefinedIndices.push_back(aclnn_adaptor::createAclTensorFromAscendTensor(idx));
@@ -293,8 +297,7 @@
         }
     }
 
-    std::vector<int64_t> outShape = indexOutputSize(inputAt, indicesExpanded);
-
+    std::vector<int64_t> outShape = indexProcess::indexOutputSize(inputAt, indicesExpanded);
     diopiSize_t outSize = vectorToDiopiSize(outShape);
     diopiRequireTensor(ctx, out, &outSize, nullptr, inputAt.dtype(), diopi_device);
 

@@ -9,18 +9,72 @@
 
 namespace impl {
 namespace ascend {
+
+namespace indexProcess {
+extern std::vector<AscendTensor> castIntIndicesToLongIndices(diopiContextHandle_t ctx, std::vector<AscendTensor>& indices);
+extern void checkIndexTensorTypes(const std::vector<AscendTensor>& indices);
+extern std::vector<AscendTensor> expandIndicesTensors(diopiContextHandle_t ctx, const AscendTensor& self, const std::vector<AscendTensor>& indices);
+extern aclTensor* createEmptyAclTensor();
+}  // namespace indexProcess
+
 diopiError_t diopiIndexPut(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t values,
                            diopiConstTensorHandle_t* indices, int64_t indicesCounts, bool accumulate) {
     diopiCopyInp(ctx, input, out);
-    std::vector<diopiConstTensorHandle_t> indicesVec(indices, indices + indicesCounts);
-    DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, out, indicesVec, values, accumulate, false);
+    AscendTensor inputAt(input);
+    AscendTensor valuesAt(values);
+    if (inputAt.numel() == 0 || valuesAt.numel() == 0) {
+        return diopiSuccess;
+    }
+    std::vector<AscendTensor> indicesOrigin(indicesCounts);
+    for (int64_t i = 0; i < indicesCounts; i++) {
+        if (indices[i] != nullptr) {
+            indicesOrigin[i] = AscendTensor(indices[i]);
+        }
+    }
+    std::vector<AscendTensor> indicesList = indexProcess::castIntIndicesToLongIndices(ctx, indicesOrigin);
+    indexProcess::checkIndexTensorTypes(indicesList);
+    auto indicesExpanded = indexProcess::expandIndicesTensors(ctx, inputAt, indicesList);
+    std::vector<aclTensor*> allDefinedIndices;
+    auto emptyTensor = indexProcess::createEmptyAclTensor();
+    for (const auto& idx : indicesExpanded) {
+        if (idx.defined()) {
+            allDefinedIndices.push_back(aclnn_adaptor::createAclTensorFromAscendTensor(idx));
+        } else {
+            allDefinedIndices.push_back(emptyTensor);
+        }
+    }
+
+    DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, out, allDefinedIndices, values, accumulate, false);
     return diopiSuccess;
 }
 
 diopiError_t diopiIndexPutInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t values, diopiConstTensorHandle_t* indices,
                               int64_t indicesCounts, bool accumulate) {
-    std::vector<diopiConstTensorHandle_t> indicesVec(indices, indices + indicesCounts);
-    DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, input, indicesVec, values, accumulate, false);
+    AscendTensor inputAt(input);
+    AscendTensor valuesAt(values);
+    if (inputAt.numel() == 0 || valuesAt.numel() == 0) {
+        return diopiSuccess;
+    }
+    std::vector<AscendTensor> indicesOrigin(indicesCounts);
+    for (int64_t i = 0; i < indicesCounts; i++) {
+        if (indices[i] != nullptr) {
+            indicesOrigin[i] = AscendTensor(indices[i]);
+        }
+    }
+    std::vector<AscendTensor> indicesList = indexProcess::castIntIndicesToLongIndices(ctx, indicesOrigin);
+    indexProcess::checkIndexTensorTypes(indicesList);
+    auto indicesExpanded = indexProcess::expandIndicesTensors(ctx, inputAt, indicesList);
+    std::vector<aclTensor*> allDefinedIndices;
+    auto emptyTensor = indexProcess::createEmptyAclTensor();
+    for (const auto& idx : indicesExpanded) {
+        if (idx.defined()) {
+            allDefinedIndices.push_back(aclnn_adaptor::createAclTensorFromAscendTensor(idx));
+        } else {
+            allDefinedIndices.push_back(emptyTensor);
+        }
+    }
+
+    DIOPI_ASCEND_CALL_ACLNN(aclnnIndexPutImpl, ctx, input, allDefinedIndices, values, accumulate, false);
     return diopiSuccess;
 }