DeepLink-org
diff --git a/‎diopi_test/diopi_stub/codegen/gen.py‎
Lines changed: 15 additions & 2 deletions b/‎diopi_test/diopi_stub/codegen/gen.py‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎diopi_test/diopi_stub/codegen/op_template.py‎
Lines changed: 6 additions & 0 deletions b/‎diopi_test/diopi_stub/codegen/op_template.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎diopi_test/python/configs/diopi_configs.py‎
Lines changed: 174 additions & 10 deletions b/‎diopi_test/python/configs/diopi_configs.py‎
Lines changed: 174 additions & 10 deletions
@@ -50,6 +50,7 @@ def get_func_info(content):
     paras_can_be_none = []
     ins_vector, outs_vector = {}, {}
     out_ptr = []
+    var_len_array_out = {}
     type_change = False
     row = content.replace('\n', '').replace('(', ',').replace(')', '')
     arg_define = row.split(',')
@@ -73,6 +74,16 @@ def get_func_info(content):
                         out_ptr.append(arg_index)
                         arg_type = 'PtrWrapper<diopiTensor>'
                         break
+                    elif next_arg[0] == 'int64_t*':
+                        type_change = True
+                        next_arg_process = '(*static_cast<int64_t*>(' + next_arg[1] + '))'
+                        if arg_type == 'diopiTensorHandle_t*':
+                            outs_vector[arg_index] = next_arg_process
+                        else:
+                            ins_vector[arg_index] = next_arg_process
+                        arg_type = 'py::list&'
+                        var_len_array_out[arg_index] = ({"param": arg, "param_num": next_arg_process})
+                        break
                     elif next_arg[0] == 'int64_t':
                         type_change = True
                         if arg_type == 'diopiTensorHandle_t*':
@@ -98,7 +109,7 @@ def get_func_info(content):
             if arg_type in can_be_none:
                 paras_can_be_none.append(len(args) - 1)
             arg_index += 1
-    return type_change, args, attr_types, paras_can_be_none, ins_vector, outs_vector, out_ptr
+    return type_change, args, attr_types, paras_can_be_none, ins_vector, outs_vector, out_ptr, var_len_array_out
 
 
 def get_export(content, ft, exports):
@@ -116,7 +127,7 @@ def get_export(content, ft, exports):
                 idx2 = row1.find(")")
                 temp_content += row1.replace(';', '')
                 idx += 1
-            type_change, args, attr_types, paras_none, ins_vector, outs_vector, out_ptr = get_func_info(temp_content)
+            type_change, args, attr_types, paras_none, ins_vector, outs_vector, out_ptr, var_len_array_out = get_func_info(temp_content)
             call_args = copy.deepcopy(args)
             type_change = True
             if type_change:
@@ -142,6 +153,8 @@ def get_export(content, ft, exports):
                     out_copy += "if ({param}.get() != nullptr && {param}Handle != nullptr)\n \
     *{param} = *{param}Handle;\n".format(param=call_args[out])
                     call_args[out] = '&' + call_args[out] + 'Handle'
+                for out_array in var_len_array_out.values():
+                    out_copy += OT.var_len_array_out_template.substitute(param=out_array['param'], param_num=out_array['param_num'])
                 call_func = func_name + '(' + ', '.join(call_args) + ')'
                 exports.append(ft.substitute(env=dict(func_name=func_name, attrs=', '.join(attrs), convert=convert,
                                                       out_copy=out_copy, call_func=call_func)))
 
@@ -48,4 +48,10 @@ class OpTemplate(object):
 for (int i = 0; i < ${param_num}; ++i)
     ${param}V[i] = ${param}[i].cast<PtrWrapper<diopiTensor>>().get();
 auto ${param}DIOPI = ${param}V.data();
+""")
+
+    var_len_array_out_template = CodeTemplate("""\
+for (int i = 0; i < ${param_num}; ++i) {
+    ${param}[i] = ${param}DIOPI[i];
+}
 """)
@@ -8974,6 +8974,170 @@
         ),
     ),
 
+    'attention': dict(
+        name=['attention'],
+        interface=['CustomizedTest'],
+        dtype=[np.float16],
+        saved_args=dict(out=0),
+        atol_half=5e-2,
+        rtol_half=5e-2,
+        para=dict(
+            dropout_p=[0, 0, 0, 0,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0],
+            is_causal=[True, False, True, False,
+                       True, False, True, True,
+                       True, True, True, False,
+                       False, True, False, True,
+                       True, False, True, False,
+                       False, True, False, True,],
+            scale=[0.0883, None, 0.125, None,
+                   0.0883, None, 0.125, 0.0625,
+                   0.0883, 0.0221, None, 0.0625,
+                   None, None, None, None,
+                   None, None, None, None,
+                   None, None, None, None],
+        ),
+        tensor_para=dict(
+            gen_fn='Genfunc.rand',
+            args=[
+                {
+                    "ins": ['query'],
+                    "shape": ((1, 64, 64, 128), (1, 64, 32, 128), (1, 32, 64, 512), (8, 128, 32, 256),
+                              (2, 64, 128, 128), (4, 16, 256, 128), (6, 32, 32, 128), (8, 8, 256, 64),
+                              (2, 128, 64, 128), (4, 512, 128, 64), (6, 32, 128, 256), (8, 1024, 8, 64),
+                              (2, 64, 128, 128), (4, 16, 256, 128), (6, 128, 32, 128), (8, 8, 256, 64),
+                              (64, 8, 8, 16), (8, 32, 256, 512), (16, 8, 256, 128), (8, 16, 256, 64),
+                              (1, 64, 64, 128), (1, 256, 16, 128), (1, 64, 32, 128), (1, 16, 8, 64),),
+                    "requires_grad": [True],
+                },
+                {
+                    "ins": ['key'],
+                    "shape": ((1, 64, 64, 128), (1, 64, 32, 128), (1, 32, 64, 512), (8, 128, 32, 256),
+                              (2, 64, 128, 128), (4, 16, 256, 128), (6, 32, 32, 128), (8, 8, 256, 64),
+                              (2, 128, 64, 128), (4, 512, 128, 64), (6, 32, 128, 256), (8, 512, 8, 64),
+                              (2, 64, 128, 128), (4, 16, 256, 128), (6, 32, 32, 128), (8, 8, 256, 64),
+                              (64, 8, 8, 16), (8, 32, 256, 512), (16, 8, 256, 128), (8, 16, 256, 64),
+                              (1, 64, 64, 128), (1, 256, 16, 128), (1, 64, 32, 128), (1, 16, 8, 64),),
+                    "requires_grad": [True],
+                },
+                {
+                    "ins": ['value'],
+                    "shape": ((1, 64, 64, 128), (1, 64, 32, 128), (1, 32, 64, 512), (8, 128, 32, 256),
+                              (2, 64, 128, 128), (4, 16, 256, 128), (6, 32, 32, 128), (8, 8, 256, 64),
+                              (2, 128, 64, 128), (4, 512, 128, 64), (6, 32, 128, 256), (8, 512, 8, 64),
+                              (2, 64, 128, 128), (4, 16, 256, 128), (6, 32, 32, 128), (8, 8, 256, 64),
+                              (64, 8, 8, 16), (8, 32, 256, 512), (16, 8, 256, 128), (8, 16, 256, 64),
+                              (1, 64, 64, 128), (1, 256, 16, 128), (1, 64, 32, 128), (1, 16, 8, 64),),
+                    "requires_grad": [True],
+                },
+                {
+                    "ins": ['attn_bias'],
+                    "shape": (None, None, None, (8, 32, 128, 128),
+                              None, None, None, None,
+                               None, None, None, None,
+                              None, None, None, None,
+                              None, None, None, None,
+                               None, None, None, None,),
+                    "requires_grad": [False],
+                },
+            ],
+        ),
+    ),
+
+    'attention_varlen': dict(
+        name=['attention_varlen'],
+        interface=['CustomizedTest'],
+        dtype=[np.float16],
+        saved_args=dict(out=0),
+        atol=1e-3,
+        rtol=1e-4,
+        para=dict(
+            dropout_p=[0, 0, 0, 0,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0,
+                       0, 0, 0, 0],
+            is_causal=[False, True, False, True,
+                       True, False, True, False,
+                       True, False, True, True,
+                       True, True, False, True,
+                       False, True, False, False],
+            scale=[None, 0.0883, None, 0.125,
+                   None, None, None, None,
+                   None, None, None, None,
+                   None, None, None, None,
+                   None, None, None, None],
+            max_seqlen_q=[32, 32, 128, 64,
+                          32, 32, 128, 64,
+                          384, 384, 64, 53,
+                          400, 200, 64, 131,
+                          1024, 1024, 256, 72],
+            max_seqlen_kv=[32, 32, 128, 64,
+                           32, 32, 128, 64,
+                           384, 384, 64, 53,
+                           400, 200, 64, 131,
+                           1024, 1024, 256, 72],
+        ),
+        tensor_para=dict(
+            gen_fn='Genfunc.randn',
+            args=[
+                {
+                    "ins": ['query'],
+                    "shape": ((32, 32, 128), (64, 64, 128), (256, 16, 128), (128, 8, 64),
+                              (32, 32, 128), (64, 64, 128), (256, 16, 128), (128, 8, 64),
+                              (1098, 64, 256), (128, 64, 128), (128, 16, 128), (128, 8, 32),
+                              (2048, 32, 128), (2048, 32, 8), (256, 256, 128), (512, 256, 128),
+                              (4096, 128, 64), (4096, 128, 64), (512, 128, 8), (256, 128, 128),),
+                    "requires_grad": [True],
+                },
+                {
+                    "ins": ['key'],
+                    "shape": ((32, 32, 128), (64, 64, 128), (256, 16, 128), (128, 8, 64),
+                              (32, 32, 128), (64, 64, 128), (256, 16, 128), (128, 8, 64),
+                              (1098, 64, 256), (128, 64, 128), (128, 16, 128), (128, 8, 32),
+                              (2048, 32, 128), (2048, 32, 8), (256, 256, 128), (512, 256, 128),
+                              (4096, 128, 64), (4096, 128, 64), (512, 128, 8), (256, 128, 128),),
+                    "requires_grad": [True],
+                },
+                {
+                    "ins": ['value'],
+                    "shape": ((32, 32, 128), (64, 64, 128), (256, 16, 128), (128, 8, 64),
+                              (32, 32, 128), (64, 64, 128), (256, 16, 128), (128, 8, 64),
+                              (1098, 64, 256), (128, 64, 128), (128, 16, 128), (128, 8, 32),
+                              (2048, 32, 128), (2048, 32, 8), (256, 256, 128), (512, 256, 128),
+                              (4096, 128, 64), (4096, 128, 64), (512, 128, 8), (256, 128, 128),),
+                    "requires_grad": [True],
+                },
+                {
+                    "ins": ['cu_seqlens_q'],
+                    "value": ([0, 32], [0, 16, 48, 64], [0, 32, 64, 128, 256], [0, 16, 48, 64, 128],
+                              [0, 32], [0, 16, 48, 64], [0, 32, 64, 128, 256], [0, 16, 48, 64, 128],
+                              [0, 200, 352, 600, 616, 1000, 1098], [0, 16, 48, 64, 128], [0, 16, 48, 64, 128], [0, 16, 48, 64, 75, 128],
+                              [0, 100, 300, 600, 1000, 1250, 1490, 1800, 1900, 2048], [0, 100, 150, 300, 500, 600, 800, 1000, 1150, 1250, 1300, 1490, 1600, 1800, 1900, 2048], [0, 32, 64, 96, 128, 160, 192, 256], [0, 2, 7, 19, 32, 64, 96, 128, 256, 387, 512],
+                              [0, 1024, 2048, 3072, 4000, 4096], [0, 1024, 2048, 3072, 4096], [0, 26, 52, 79, 112, 128, 256, 512], [0, 11, 32, 90, 128, 200, 256],),
+                    "gen_policy": "gen_tensor_by_value",
+                    "dtype": [np.int64],
+                    "requires_grad": [False],
+                },
+                {
+                    "ins": ['cu_seqlens_kv'],
+                    "value": ([0, 32], [0, 16, 48, 64], [0, 32, 64, 128, 256], [0, 16, 48, 64, 128],
+                              [0, 32], [0, 16, 48, 64], [0, 32, 64, 128, 256], [0, 16, 48, 64, 128],
+                              [0, 200, 352, 600, 616, 1000, 1098], [0, 16, 48, 64, 128], [0, 16, 48, 64, 128], [0, 16, 48, 64, 75, 128],
+                              [0, 100, 300, 600, 1000, 1250, 1490, 1800, 1900, 2048], [0, 100, 150, 300, 500, 600, 800, 1000, 1150, 1250, 1300, 1490, 1600, 1800, 1900, 2048], [0, 32, 64, 96, 128, 160, 192, 256], [0, 2, 7, 19, 32, 64, 96, 128, 256, 387, 512],
+                              [0, 1024, 2048, 3072, 4000, 4096], [0, 1024, 2048, 3072, 4096], [0, 26, 52, 79, 112, 128, 256, 512], [0, 11, 32, 90, 128, 200, 256],),
+                    "dtype": [np.int64],
+                    "gen_policy": "gen_tensor_by_value",
+                    "requires_grad": [False],
+                },
+            ],
+        ),
+    ),
+
     'flash_attention_varlen': dict(
         name=['flash_attention_varlen'],
         interface=['CustomizedTest'],
@@ -8982,28 +9146,28 @@
         atol=1e-3,
         rtol=1e-4,
         para=dict(
-            p_dropout=[0, 0, 0, 0],
-            is_causal=[True, True, False, True],
-            softmax_scale=[None, 0.0883, None, 0.125],
-            max_seqlen_q=[32, 32, 128, 64],
-            max_seqlen_kv=[32, 32, 128, 64],
-            cu_seqlens_q=[[0, 32], [0, 16, 48, 64], [0, 32, 64, 128, 256], [0, 16, 48, 64, 128]],
-            cu_seqlens_kv=[[0, 32], [0, 16, 48, 64], [0, 32, 64, 128, 256], [0, 16, 48, 64, 128]],
+            p_dropout=[0, 0, 0, 0, 0],
+            is_causal=[True, True, False, True, False],
+            softmax_scale=[None, 0.0883, None, 0.125, None],
+            max_seqlen_q=[32, 32, 128, 64, 256],
+            max_seqlen_kv=[32, 32, 128, 64, 256],
+            cu_seqlens_q=[[0, 32], [0, 16, 48, 64], [0, 32, 64, 128, 256], [0, 16, 48, 64, 128], [0, 2, 7, 19, 32, 64, 96, 128, 256, 512]],
+            cu_seqlens_kv=[[0, 32], [0, 16, 48, 64], [0, 32, 64, 128, 256], [0, 16, 48, 64, 128], [0, 2, 7, 19, 32, 64, 96, 128, 256, 512]],
         ),
         tensor_para=dict(
             gen_fn='Genfunc.randn',
             args=[
                 {
                     "ins": ['q'],
-                    "shape": ((32, 32, 128), (64, 64, 128), (256, 16, 128), (128, 8, 64)),
+                    "shape": ((32, 32, 128), (64, 64, 128), (256, 16, 128), (128, 8, 64), (512, 8, 64)),
                 },
                 {
                     "ins": ['k'],
-                    "shape": ((32, 32, 128), (64, 64, 128), (256, 16, 128), (128, 8, 64)),
+                    "shape": ((32, 32, 128), (64, 64, 128), (256, 16, 128), (128, 8, 64), (512, 8, 64)),
                 },
                 {
                     "ins": ['v'],
-                    "shape": ((32, 32, 128), (64, 64, 128), (256, 16, 128), (128, 8, 64)),
+                    "shape": ((32, 32, 128), (64, 64, 128), (256, 16, 128), (128, 8, 64), (512, 8, 64)),
                 },
             ],
         ),