File tree Expand file tree Collapse file tree 1 file changed +4
-4
lines changed
tritonbench/operators/gdpa Expand file tree Collapse file tree 1 file changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -289,7 +289,7 @@ def gdpa_kernel_tma_ws_blackwell(
289289
290290 with tlx .async_tasks ():
291291 # activation calculation
292- with tlx .async_task ("default" ):
292+ with tlx .async_task ("default" , registers = 192 ):
293293 accum_cnt = 0
294294 accum_cnt_outer = 0
295295 for _ in range (0 , tiles_per_sm ):
@@ -387,7 +387,7 @@ def gdpa_kernel_tma_ws_blackwell(
387387 accum_cnt_outer += 1
388388 tile_idx += num_progs
389389
390- with tlx .async_task (num_warps = 4 ):
390+ with tlx .async_task (num_warps = 4 , registers = 192 ):
391391 accum_cnt = 0
392392 accum_cnt_outer = 0
393393 for _ in range (0 , tiles_per_sm ):
@@ -477,7 +477,7 @@ def gdpa_kernel_tma_ws_blackwell(
477477 accum_cnt_outer += 1
478478 tile_idx += num_progs
479479
480- with tlx .async_task (num_warps = 1 ): # gemm
480+ with tlx .async_task (num_warps = 1 , registers = 24 ): # gemm
481481 accum_cnt_q = 0
482482 accum_cnt_kv = 0
483483 accum_cnt_o = 0
@@ -832,7 +832,7 @@ def gdpa_kernel_tma_ws_blackwell(
832832 # to wait for the completion
833833 tile_idx += num_progs
834834
835- with tlx .async_task (num_warps = 1 ): # load
835+ with tlx .async_task (num_warps = 1 , registers = 24 ): # load
836836 accum_count_q = 0
837837 accum_cnt_kv = 0
838838 for _ in range (0 , tiles_per_sm ):
You can’t perform that action at this time.
0 commit comments