Skip to content

Commit 9c19647

Browse files
committed
Cleanup for PR
Signed-off-by: Dom Brown <[email protected]>
1 parent acd7a23 commit 9c19647

File tree

4 files changed

+3
-17
lines changed

4 files changed

+3
-17
lines changed

cpp/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ endif()
358358

359359
setup_sanitizers()
360360

361-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda -lineinfo")
361+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
362362
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
363363
if(FAST_MATH)
364364
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math")

cpp/tensorrt_llm/common/attentionOp.cpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
#include "tensorrt_llm/runtime/utils/mpiUtils.h"
3131
#include <algorithm>
3232
#include <cstdint>
33-
#include <cstdlib>
3433
#include <type_traits>
3534

3635
using namespace tensorrt_llm::kernels;
@@ -286,12 +285,8 @@ bool AttentionOp::convertMMHAParamsToXQAParams(tensorrt_llm::kernels::XQAParams&
286285
xqaParams.fp4_out_sf_scale = generationsParams.attention_output_sf_scale;
287286
xqaParams.start_token_idx_sf = generationsParams.start_token_idx_sf;
288287

289-
xqaParams.num_tokens = generationsParams.num_tokens;
290288
// Cross attention parameters.
291289
xqaParams.encoder_input_lengths = generationsParams.encoder_input_lengths;
292-
// xqaParams.cross_kv = generationsParams.cross_kv;
293-
// xqaParams.cross_kv_length = generationsParams.cross_kv_length;
294-
// xqaParams.num_encoder_tokens = generationsParams.num_encoder_tokens;
295290

296291
return true;
297292
}
@@ -2239,7 +2234,7 @@ int AttentionOp::enqueueGeneration(EnqueueGenerationParams<T> const& params, cud
22392234
}
22402235
else
22412236
{
2242-
TLLM_LOG_DEBUG("XQA kernels are not selected in the generation phase. mEnableXQA: %d", mEnableXQA);
2237+
TLLM_LOG_DEBUG("XQA kernels are not selected in the generation phase.");
22432238
}
22442239
}
22452240

cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/xqaParams.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,8 @@ struct XQAParams
106106

107107
void* quant_q_buffer_ptr = nullptr;
108108

109-
int num_tokens;
110109
// for cross attention
111110
int32_t const* encoder_input_lengths = nullptr;
112-
// void const* cross_kv = nullptr;
113-
// int32_t cross_kv_length = 0;
114-
// int32_t num_encoder_tokens = 0;
115111

116112
cudaStream_t stream = 0;
117113

@@ -182,11 +178,7 @@ struct XQAParams
182178
<< "total_num_input_tokens :" << total_num_input_tokens << std ::endl
183179
<< "is_fp8_output :" << (is_fp8_output ? "true" : "false") << std ::endl
184180
<< "fp8_out_scale :" << fp8_out_scale << std ::endl
185-
<< "encoder_input_lengths: " << encoder_input_lengths
186-
<< std::endl
187-
//<< "cross_kv: " << cross_kv << std::endl
188-
//<< "cross_kv_length: " << cross_kv_length << std::endl
189-
//<< "num_encoder_tokens: " << num_encoder_tokens << std::endl
181+
<< "encoder_input_lengths: " << encoder_input_lengths << std::endl
190182
<< "stream :" << stream;
191183

192184
return ss.str();

cpp/tensorrt_llm/plugins/gptAttentionPlugin/gptAttentionPlugin.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,6 @@ int GPTAttentionPlugin::enqueueSome(int32_t seqIdxBeg, int32_t localNbSeq, int32
10411041

10421042
common_enqueue_params.input_seq_length = max_context_q_len;
10431043
common_enqueue_params.max_past_kv_length = max_context_kv_len;
1044-
10451044
EnqueueContextParams<T> enqueue_params{common_enqueue_params};
10461045
enqueue_params.attention_packed_mask = attention_packed_mask;
10471046
enqueue_params.host_block_offsets = host_block_offsets;

0 commit comments

Comments
 (0)