[TRTLLM-5252][fix] Propagate mapping to intermediate layers

2ez4bz · 2ez4bz · commit 2a96ae1b9a30 · 2025-08-04T15:22:34.000-07:00
Signed-off-by: William Zhang &lt;133824995+2ez4bz@users.noreply.github.com&gt;
diff --git a/examples/llm-api/quickstart_advanced.py b/examples/llm-api/quickstart_advanced.py
@@ -150,7 +150,9 @@ def parse_arguments():
 def setup_llm(args, **kwargs):
     kv_cache_config = KvCacheConfig(
         enable_block_reuse=not args.disable_kv_cache_reuse,
-        free_gpu_memory_fraction=args.kv_cache_fraction,
+        # free_gpu_memory_fraction=args.kv_cache_fraction,
+        free_gpu_memory_fraction=0.5,
+        max_tokens=10_000,
         dtype=args.kv_cache_dtype,
     )
 
diff --git a/tensorrt_llm/_torch/models/modeling_mistral.py b/tensorrt_llm/_torch/models/modeling_mistral.py
@@ -475,6 +475,7 @@ def __init__(self, model_config: ModelConfig[Mistral3Config]):
             out_features=hidden_size,
             bias=False,
             dtype=config.torch_dtype,
+            mapping=model_config.mapping,
         )
 
     @torch.inference_mode()
@@ -546,6 +547,7 @@ def __init__(self, model_config: ModelConfig[Mistral3Config]):
             out_features=config.text_config.hidden_size,
             bias=config.multimodal_projector_bias,
             dtype=dtype,
+            mapping=model_config.mapping,
         )
 
     @torch.inference_mode()
diff --git a/tests/integration/test_lists/test-db/l0_dgx_h100.yml b/tests/integration/test_lists/test-db/l0_dgx_h100.yml
@@ -51,6 +51,7 @@ l0_dgx_h100:
   - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=2]
   - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=2-ctx_pp=2]
   - test_e2e.py::test_ptp_quickstart_advanced_bs1
+  - unittest/_torch/modeling/test_modeling_pixtral.py::test_tensor_parallelism
 - condition:
     ranges:
       system_gpu_count:

Original file line number	Diff line number	Diff line change
`@@ -475,6 +475,7 @@ def __init__(self, model_config: ModelConfig[Mistral3Config]):`
`475`	`475`	`out_features=hidden_size,`
`476`	`476`	`bias=False,`
`477`	`477`	`dtype=config.torch_dtype,`
	`478`	`+ mapping=model_config.mapping,`
`478`	`479`	`)`
`479`	`480`
`480`	`481`	`@torch.inference_mode()`
`@@ -546,6 +547,7 @@ def __init__(self, model_config: ModelConfig[Mistral3Config]):`
`546`	`547`	`out_features=config.text_config.hidden_size,`
`547`	`548`	`bias=config.multimodal_projector_bias,`
`548`	`549`	`dtype=dtype,`
	`550`	`+ mapping=model_config.mapping,`
`549`	`551`	`)`
`550`	`552`
`551`	`553`	`@torch.inference_mode()`