refactor: simplify test_trtllm_sampler by removing unused test_case fixture

Funatiq · Funatiq · commit c56d9260819a · 2025-08-19T18:52:03.000Z
- Removed the test_case fixture and its associated JSON loading, simplifying the test setup.
- Hardcoded sampling parameters for clarity and consistency in the test execution.

Signed-off-by: Robin Kobus &lt;19427718+Funatiq@users.noreply.github.com&gt;
diff --git a/tests/unittest/_torch/sampler/test_trtllm_sampler.py b/tests/unittest/_torch/sampler/test_trtllm_sampler.py
@@ -1,6 +1,3 @@
-import json
-from pathlib import Path
-
 import pytest
 from utils.llm_data import llm_models_root
 from utils.util import similar
@@ -10,15 +7,6 @@
 from tensorrt_llm.llmapi import KvCacheConfig as TRT_KvCacheConfig
 
 
-# A test case of mmlu_llama from lm_eval
-@pytest.fixture(scope="module")
-def test_case():
-    with open(
-            Path(__file__).parent.parent / "executor" /
-            "test_overlap_scheduler_input.json") as f:
-        return json.load(f)
-
-
 @pytest.fixture(scope="module")
 def model_path():
     return llm_models_root() / "llama-models-v2/TinyLlama-1.1B-Chat-v1.0"
@@ -41,7 +29,7 @@ def create_llm(model_dir):
 
 
 @pytest.mark.high_cuda_memory
-def test_trtllm_sampler(model_path, test_case):
+def test_trtllm_sampler(model_path):
     prompts = [
         "Magellan and Elcano lead the first",
         "The capital of France is",
@@ -52,10 +40,10 @@ def test_trtllm_sampler(model_path, test_case):
                         ["La Paz"]]
 
     # Test configuration
-    max_new_tokens = test_case["max_new_tokens"]
-    temperature = test_case["temperature"]
-    top_p = test_case["top_p"]
-    stop_words = test_case["stop_words"]
+    max_new_tokens = 10
+    temperature = 1.0
+    top_p = None
+    stop_words = ["."]
 
     sampling_config = SamplingParams(max_tokens=max_new_tokens,
                                      n=1,