Skip to content

Commit d373543

Browse files
committed
remove multi-block mode for precompiled XQA, and spec-dec but not QGMMA path
Signed-off-by: Jhao-Ting Chen <[email protected]>
1 parent bec4406 commit d373543

File tree

2 files changed

+2
-8
lines changed

2 files changed

+2
-8
lines changed

cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/decoderXQAImplJIT.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ void DecoderXQAImplJIT::runImpl(XQAParams const& xqaParams, KVCacheBuffer const&
446446
multi_block = computeMultiBlockCountSpecDecGMMA(
447447
xqaParams, xqaParams.batch_size, multiprocessor_count, specDecBlocks);
448448
}
449-
else
449+
else if (!isSpecDec)
450450
{
451451
multi_block = computeMultiBlockCount(xqaParams, xqaParams.batch_size, multiprocessor_count);
452452
}

cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplPrecompiled.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -287,14 +287,8 @@ class XQAKernelList
287287
void* kernelParams[] = {&maxQSeqLen, &launchParams.num_k_heads, &headGrpSize, &cuQSeqLens,
288288
&launchParams.output, &xqa_q_input_ptr, &maskPtr, &launchParams.kvCacheParams, &launchParams.batch_size,
289289
&launchParams.kv_scale_quant_orig, &launchParams.scratch};
290+
// precompiled XQA Spec-dec kernel does not support multi-block mode
290291
int multi_block = 1;
291-
if (xqaParams.multi_block_mode)
292-
{
293-
multi_block = computeMultiBlockCount(xqaParams, xqaParams.batch_size, multiprocessor_count);
294-
check_cuda_error(cudaMemsetAsync(xqaParams.workspaces, 0,
295-
sizeof(int) * xqaParams.batch_size * qSeqLen * xqaParams.num_kv_heads, stream));
296-
sync_check_cuda_error(stream);
297-
}
298292
TLLM_CU_CHECK(mDriver->cuLaunchKernel(func, multi_block, xqaParams.num_kv_heads * nbTokenBlocksPerGrp,
299293
xqaParams.batch_size, 128, 1, 2, shared_mem_bytes, stream, kernelParams, nullptr));
300294
}

0 commit comments

Comments
 (0)