File tree Expand file tree Collapse file tree 1 file changed +3
-3
lines changed
tritonbench/operators/grouped_gemm/cutedsl Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -573,7 +573,7 @@ def kernel(
573
573
574
574
# init barrier for loading A, B with TMA
575
575
if warp_idx == self .epilog_warp_id [0 ]:
576
- for k_stage in range (self .num_ab_stage ):
576
+ for k_stage in range_constexpr (self .num_ab_stage ): # noqa: F821
577
577
num_tma_producer = self .num_mcast_ctas_a + self .num_mcast_ctas_b - 1
578
578
with cute .arch .elect_one ():
579
579
cute .arch .mbarrier_init (ab_full_mbar_ptr + k_stage , 1 )
@@ -582,7 +582,7 @@ def kernel(
582
582
)
583
583
# Accumulator barrier init
584
584
if warp_idx == self .mma_warp_id :
585
- for acc_stage in range (self .num_acc_stage ):
585
+ for acc_stage in range_constexpr (self .num_acc_stage ): # noqa: F821
586
586
with cute .arch .elect_one ():
587
587
cute .arch .mbarrier_init (acc_full_mbar_ptr + acc_stage , 1 )
588
588
cute .arch .mbarrier_init (
@@ -1287,7 +1287,7 @@ def kernel(
1287
1287
#
1288
1288
subtile_cnt = cute .size (tTR_tAcc .shape , mode = [3 ])
1289
1289
num_prev_subtiles = tile_sched .num_tiles_executed * subtile_cnt
1290
- for subtile_idx in range (subtile_cnt ):
1290
+ for subtile_idx in range_constexpr (subtile_cnt ): # noqa: F821
1291
1291
#
1292
1292
# Load accumulator from tensor memory buffer to register
1293
1293
#
You can’t perform that action at this time.
0 commit comments