reviewer feedback

lucaslie · lucaslie · commit 64fca5d77d5a · 2025-10-09T12:51:25.000-07:00
Signed-off-by: Lucas Liebenwein &lt;11156568+lucaslie@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/auto_deploy/models/hf.py b/tensorrt_llm/_torch/auto_deploy/models/hf.py
@@ -525,12 +525,12 @@ def post_process(self, sub_mod: nn.Module, sub_gm: GraphModule):
         )
 
     def _init_dynamic_shape_lookup(self) -> Dict[str, DynamicShape]:
-        batch_size_dyn = Dim.DYNAMIC
-        seq_len_dyn = Dim.DYNAMIC
+        batch_size_dynamic = Dim.DYNAMIC
+        seq_len_dynamic = Dim.DYNAMIC
         return {
-            "input_ids": {0: batch_size_dyn, 1: seq_len_dyn},
-            "inputs_embeds": {0: batch_size_dyn, 1: seq_len_dyn},
-            "position_ids": {0: batch_size_dyn, 1: seq_len_dyn},
+            "input_ids": {0: batch_size_dynamic, 1: seq_len_dynamic},
+            "inputs_embeds": {0: batch_size_dynamic, 1: seq_len_dynamic},
+            "position_ids": {0: batch_size_dynamic, 1: seq_len_dynamic},
         }
 
     @classmethod
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/export_to_gm.py b/tensorrt_llm/_torch/auto_deploy/transform/library/export_to_gm.py
@@ -88,7 +88,7 @@ def set_exact_signature(mod: nn.Module, kwargs: Dict[str, Any]):
 
     reset_signature = False
     if hasattr(forward_func, "__signature__"):
-        signature_attribute = mod.forward.__signature__
+        signature_attribute = forward_func.__signature__
         reset_signature = True
 
     # construct signature object from kwargs
@@ -139,8 +139,13 @@ def _apply_to_full_model(
         # independent, which would conflict with graph capture logic, i.e., you cannot graph-capture
         # "model" and "model.text_model" for example. However, you can export "model.text_model" and
         # "model.vision_model" separately.
+        def _is_child(child: str, parent: str) -> bool:
+            """Check if ``child`` is a child of ``parent``."""
+            # covers "a.b.c" is a parent of "a.b" or parent being "", i.e., root (a parent of all!)
+            return parent == "" or child.startswith(f"{parent}.")
+
         sub_keys = [info.submodule_name for info in export_infos]
-        assert all(not k1.startswith(k2) for k1 in sub_keys for k2 in sub_keys if k1 != k2), (
+        assert all(not _is_child(k1, k2) for k1 in sub_keys for k2 in sub_keys if k1 != k2), (
             f"Cannot export submodules of already exported submodules, {sub_keys=}"
         )
 
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/kvcache_transformers.py b/tensorrt_llm/_torch/auto_deploy/transform/library/kvcache_transformers.py
@@ -113,7 +113,7 @@ def _apply_to_full_model(
         cm: CachedSequenceInterface,
         factory: ModelFactory,
         shared_config: SharedConfig,
-    ) -> Tuple[GraphModule, TransformInfo]:
+    ) -> Tuple[nn.Module, TransformInfo]:
         # Register profiler attn operator
         ALL_ATTENTION_FUNCTIONS.register("ad_profile_mha", fake_profiler_mha)
 
diff --git a/tensorrt_llm/_torch/auto_deploy/transformations/_graph.py b/tensorrt_llm/_torch/auto_deploy/transformations/_graph.py
@@ -137,6 +137,9 @@ def move_to_device(mod: nn.Module, device: DeviceLikeType) -> None:
     # get device
     device = torch.device(device)
 
+    # move the model to the device
+    mod.to(device)
+
     for _, subgm in reversed(list(named_graphmodules(mod))):
         # recompile graph to update self generated codes in subgraph
         _move_single_gm_to_device(subgm, device)