|
| 1 | +## Check Bank Conflicts via NCU |
| 2 | + |
| 3 | +- 检查device支持的metrics |
| 4 | +```bash |
| 5 | +# ncu check bank conflicts |
| 6 | +# 先查看当前devices支持的metrics有哪些 |
| 7 | +ncu --query-metrics | grep data | grep bank | grep l1tex |
| 8 | +``` |
| 9 | +metrics: |
| 10 | +```bash |
| 11 | +ncu --query-metrics | grep data | grep bank | grep l1tex |
| 12 | +l1tex__data_bank_conflicts_pipe_lsu Counter # of data bank conflicts generated by LSU pipe |
| 13 | +l1tex__data_bank_conflicts_pipe_lsu_cmd_read Counter # of data bank conflicts generated by LSU reads |
| 14 | +l1tex__data_bank_conflicts_pipe_lsu_cmd_write Counter # of data bank conflicts generated by LSU writes |
| 15 | +l1tex__data_bank_conflicts_pipe_lsu_mem_global Counter # of data bank conflicts generated by global ops |
| 16 | +l1tex__data_bank_conflicts_pipe_lsu_mem_global_op_atom Counter # of data bank conflicts generated by global atomics |
| 17 | +l1tex__data_bank_conflicts_pipe_lsu_mem_global_op_ld Counter # of data bank conflicts generated by global loads |
| 18 | +l1tex__data_bank_conflicts_pipe_lsu_mem_global_op_red Counter # of data bank conflicts generated by global reductions |
| 19 | +l1tex__data_bank_conflicts_pipe_lsu_mem_global_op_st Counter # of data bank conflicts generated by global stores |
| 20 | +l1tex__data_bank_conflicts_pipe_lsu_mem_shared Counter # of shared memory data bank conflicts generated by LDS, LD, 3D |
| 21 | +l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_atom Counter # of shared memory data bank conflicts generated by ATOMS, ATOM |
| 22 | +l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld Counter # of shared memory data bank conflicts generated by LDS, LD, 3D |
| 23 | +l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ldgsts Counter # of data bank conflicts generated by shared ldgsts ops |
| 24 | +l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st Counter # of shared memory data bank conflicts generated by STS, ST, 3D |
| 25 | +l1tex__data_bank_reads Counter # of data bank reads |
| 26 | +l1tex__data_bank_writes Counter # of data bank writes |
| 27 | +sm__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldgsts Counter # of shared memory data bank conflicts generated by LDGSTS |
| 28 | +sm__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldgsts_cache_access Counter # of shared memory data bank conflicts generated by LDGSTS.ACCESS |
| 29 | +sm__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldgsts_cache_bypass Counter # of shared memory data bank conflicts generated by LDGSTS.BYPASS |
| 30 | +sm__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldsm Counter # of shared memory data bank conflicts generated by LDSM |
| 31 | +sm__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_st Counter # of shared memory data bank conflicts generated by STS, ST |
| 32 | +sm__sass_l1tex_data_bank_writes_pipe_lsu_mem_shared_op_ldgsts_cache_access Counter # of LDGSTS.ACCESS shared data bank writes |
| 33 | +smsp__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldgsts Counter # of shared memory data bank conflicts generated by LDGSTS |
| 34 | +smsp__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldgsts_cache_access Counter # of shared memory data bank conflicts generated by LDGSTS.ACCESS |
| 35 | +smsp__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldgsts_cache_bypass Counter # of shared memory data bank conflicts generated by LDGSTS.BYPASS |
| 36 | +smsp__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldsm Counter # of shared memory data bank conflicts generated by LDSM |
| 37 | +smsp__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_st Counter # of shared memory data bank conflicts generated by STS, ST |
| 38 | +smsp__sass_l1tex_data_bank_writes_pipe_lsu_mem_shared_op_ldgsts_cache_access Counter # of LDGSTS.ACCESS shared data bank writes |
| 39 | +``` |
| 40 | + |
| 41 | +- 由LD指令产生的bank conflicts |
| 42 | +```bash |
| 43 | +# profile l1tex smem data bank conflicts |
| 44 | +# 由LDS, LD指令产生的bank conflicts |
| 45 | +ncu --metrics l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum hgemm_mma_stage.89.bin |
| 46 | +ncu --metrics l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum hgemm_cute.89.debug.bin |
| 47 | +ncu --metrics l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld \ |
| 48 | + python3 flash_attn_mma.py --B 1 --H 1 --D 64 --N 4096 --w 0 --i 1 |
| 49 | +``` |
| 50 | +log: |
| 51 | +```bash |
| 52 | +void flash_fwd_splitkv_combine_kernel<Flash_fwd_kernel_traits<64, 64, 256, 4, 0, 0, cutlass::half_t, Flash_kernel_traits<64, 64, 256, 4, cutlass::half_t>>, 8, 3, 1>(Flash_fwd_params) (512, 1, 1)x(128, 1, 1), Context 1, Stream 7, Device 0, CC 8.9 |
| 53 | + Section: Command line profiler metrics |
| 54 | + -------------------------------------------------------- ----------- ------------ |
| 55 | + Metric Name Metric Unit Metric Value |
| 56 | + -------------------------------------------------------- ----------- ------------ |
| 57 | + l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.avg 11.18 |
| 58 | + l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.max 13 |
| 59 | + l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.min 10 |
| 60 | + l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum 1029 |
| 61 | + -------------------------------------------------------- ----------- ------------ |
| 62 | +``` |
| 63 | + |
| 64 | +- 由LDSM指令产生的bank conflicts |
| 65 | + |
| 66 | +```bash |
| 67 | +# 由LDSM(ldmatrix)指令产生的bank conflicts |
| 68 | +ncu --metrics sm__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldsm \ |
| 69 | + python3 flash_attn_mma.py --B 1 --H 1 --D 64 --N 4096 --w 0 --i 1 |
| 70 | +ncu --metrics smsp__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldsm \ |
| 71 | + python3 flash_attn_mma.py --B 1 --H 1 --D 64 --N 4096 --w 0 --i 1 |
| 72 | +``` |
| 73 | +log: |
| 74 | +```bash |
| 75 | +void flash_fwd_splitkv_combine_kernel<Flash_fwd_kernel_traits<64, 64, 256, 4, 0, 0, cutlass::half_t, Flash_kernel_traits<64, 64, 256, 4, cutlass::half_t>>, 8, 3, 1>(Flash_fwd_params) (512, 1, 1)x(128, 1, 1), Context 1, Stream 7, Device 0, CC 8.9 |
| 76 | + Section: Command line profiler metrics |
| 77 | + ------------------------------------------------------------------ ----------- ------------ |
| 78 | + Metric Name Metric Unit Metric Value |
| 79 | + ------------------------------------------------------------------ ----------- ------------ |
| 80 | + sm__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldsm.avg 0 |
| 81 | + sm__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldsm.max 0 |
| 82 | + sm__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldsm.min 0 |
| 83 | + sm__sass_l1tex_data_bank_conflicts_pipe_lsu_mem_shared_op_ldsm.sum 0 |
| 84 | + ------------------------------------------------------------------ ----------- ------------ |
| 85 | +``` |
0 commit comments