Skip to content

Commit d7950cd

Browse files
committed
add registers
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags:
1 parent d9a0b4c commit d7950cd

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

tritonbench/operators/gdpa/gdpa_blackwell_tlx.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ def gdpa_kernel_tma_ws_blackwell(
289289

290290
with tlx.async_tasks():
291291
# activation calculation
292-
with tlx.async_task("default"):
292+
with tlx.async_task("default", registers=192):
293293
accum_cnt = 0
294294
accum_cnt_outer = 0
295295
for _ in range(0, tiles_per_sm):
@@ -387,7 +387,7 @@ def gdpa_kernel_tma_ws_blackwell(
387387
accum_cnt_outer += 1
388388
tile_idx += num_progs
389389

390-
with tlx.async_task(num_warps=4):
390+
with tlx.async_task(num_warps=4, registers=192):
391391
accum_cnt = 0
392392
accum_cnt_outer = 0
393393
for _ in range(0, tiles_per_sm):
@@ -477,7 +477,7 @@ def gdpa_kernel_tma_ws_blackwell(
477477
accum_cnt_outer += 1
478478
tile_idx += num_progs
479479

480-
with tlx.async_task(num_warps=1): # gemm
480+
with tlx.async_task(num_warps=1, registers=24): # gemm
481481
accum_cnt_q = 0
482482
accum_cnt_kv = 0
483483
accum_cnt_o = 0
@@ -832,7 +832,7 @@ def gdpa_kernel_tma_ws_blackwell(
832832
# to wait for the completion
833833
tile_idx += num_progs
834834

835-
with tlx.async_task(num_warps=1): # load
835+
with tlx.async_task(num_warps=1, registers=24): # load
836836
accum_count_q = 0
837837
accum_cnt_kv = 0
838838
for _ in range(0, tiles_per_sm):

0 commit comments

Comments
 (0)