Skip to content

Commit b8719fe

Browse files
authored
[nvbug/5374773] chore: Update nanobind with fail_fast_on_attention_window_too_large changes (#6491)
Signed-off-by: Michal Guzek <[email protected]>
1 parent 6d5da9f commit b8719fe

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

cpp/tensorrt_llm/nanobind/executor/executorConfig.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ void initConfigBindings(nb::module_& m)
477477
c.getExtendedRuntimePerfKnobConfig(), c.getDebugConfig(), c.getRecvPollPeriodMs(),
478478
c.getMaxSeqIdleMicroseconds(), c.getSpecDecConfig(), c.getGuidedDecodingConfig(),
479479
c.getAdditionalModelOutputs(), c.getCacheTransceiverConfig(), c.getGatherGenerationLogits(),
480-
c.getPromptTableOffloading(), c.getEnableTrtOverlap());
480+
c.getPromptTableOffloading(), c.getEnableTrtOverlap(), c.getFailFastOnAttentionWindowTooLarge());
481481
auto pickle_tuple = nb::make_tuple(cpp_states, nb::getattr(self, "__dict__"));
482482
return pickle_tuple;
483483
};
@@ -490,7 +490,7 @@ void initConfigBindings(nb::module_& m)
490490
}
491491

492492
auto cpp_states = nb::cast<nb::tuple>(state[0]);
493-
if (cpp_states.size() != 28)
493+
if (cpp_states.size() != 29)
494494
{
495495
throw std::runtime_error("Invalid cpp_states!");
496496
}
@@ -525,7 +525,8 @@ void initConfigBindings(nb::module_& m)
525525
nb::cast<std::optional<tle::CacheTransceiverConfig>>(cpp_states[24]), // CacheTransceiverConfig
526526
nb::cast<bool>(cpp_states[25]), // GatherGenerationLogits
527527
nb::cast<bool>(cpp_states[26]), // PromptTableOffloading
528-
nb::cast<bool>(cpp_states[27]) // EnableTrtOverlap
528+
nb::cast<bool>(cpp_states[27]), // EnableTrtOverlap
529+
nb::cast<bool>(cpp_states[28]) // FailFastOnAttentionWindowTooLarge
529530
);
530531

531532
// Restore Python data
@@ -564,7 +565,8 @@ void initConfigBindings(nb::module_& m)
564565
std::optional<tle::CacheTransceiverConfig>, // CacheTransceiverConfig
565566
bool, // GatherGenerationLogits
566567
bool, // PromptTableOffloading
567-
bool // EnableTrtOverlap
568+
bool, // EnableTrtOverlap
569+
bool // FailFastOnAttentionWindowTooLarge
568570
>(),
569571
nb::arg("max_beam_width") = 1, nb::arg("scheduler_config") = tle::SchedulerConfig(),
570572
nb::arg("kv_cache_config") = tle::KvCacheConfig(), nb::arg("enable_chunked_context") = false,
@@ -582,7 +584,7 @@ void initConfigBindings(nb::module_& m)
582584
nb::arg("spec_dec_config") = nb::none(), nb::arg("guided_decoding_config") = nb::none(),
583585
nb::arg("additional_model_outputs") = nb::none(), nb::arg("cache_transceiver_config") = nb::none(),
584586
nb::arg("gather_generation_logits") = false, nb::arg("mm_embedding_offloading") = false,
585-
nb::arg("enable_trt_overlap") = false)
587+
nb::arg("enable_trt_overlap") = false, nb::arg("fail_fast_on_attention_window_too_large") = false)
586588
.def_prop_rw("max_beam_width", &tle::ExecutorConfig::getMaxBeamWidth, &tle::ExecutorConfig::setMaxBeamWidth)
587589
.def_prop_rw("max_batch_size", &tle::ExecutorConfig::getMaxBatchSize, &tle::ExecutorConfig::setMaxBatchSize)
588590
.def_prop_rw("max_num_tokens", &tle::ExecutorConfig::getMaxNumTokens, &tle::ExecutorConfig::setMaxNumTokens)
@@ -632,6 +634,9 @@ void initConfigBindings(nb::module_& m)
632634
&tle::ExecutorConfig::setPromptTableOffloading)
633635
.def_prop_rw(
634636
"enable_trt_overlap", &tle::ExecutorConfig::getEnableTrtOverlap, &tle::ExecutorConfig::setEnableTrtOverlap)
637+
.def_prop_rw("fail_fast_on_attention_window_too_large",
638+
&tle::ExecutorConfig::getFailFastOnAttentionWindowTooLarge,
639+
&tle::ExecutorConfig::setFailFastOnAttentionWindowTooLarge)
635640
.def("__getstate__", executorConfigGetState)
636641
.def("__setstate__", executorConfigSetState);
637642
}

0 commit comments

Comments
 (0)