Skip to content

Commit f167b1f

Browse files
authored
[https://nvbugs/5453727][fix] Fix bug of how GPT-OSS setup the parameters in CI (#7151)
Signed-off-by: bhsueh <[email protected]>
1 parent e08c7cf commit f167b1f

File tree

1 file changed

+18
-16
lines changed

1 file changed

+18
-16
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2699,22 +2699,21 @@ def test_auto_dtype_long_rope(self):
26992699

27002700
class TestGPTOSS(LlmapiAccuracyTestHarness):
27012701
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5)
2702+
extra_evaluator_kwargs = {
2703+
"fewshot_as_multiturn": True,
2704+
"apply_chat_template": True,
2705+
"scores_filter": "exact_match,flexible-extract",
2706+
"MAX_OUTPUT_LEN": 8192
2707+
}
27022708

27032709
MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-120b"
27042710

2705-
def update_task_kwargs(self, task):
2706-
task.EVALUATOR_KWARGS["fewshot_as_multiturn"] = True
2707-
task.EVALUATOR_KWARGS["apply_chat_template"] = True
2708-
task.EVALUATE_KWARGS["scores_filter"] = "exact_match,flexible-extract"
2709-
task.MAX_OUTPUT_LEN = 8192
2710-
return task
2711-
27122711
@pytest.mark.parametrize("moe_backend", ["CUTLASS", "TRTLLM", "TRITON"],
27132712
ids=["cutlass", "trtllm", "triton"])
27142713
@pytest.mark.parametrize("cuda_graph,overlap_scheduler", [
27152714
(True, True),
27162715
])
2717-
def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler):
2716+
def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
27182717
if moe_backend == "TRITON" and not IS_TRITON_KERNELS_AVAILABLE:
27192718
pytest.skip("Triton kernels are not available")
27202719

@@ -2732,9 +2731,10 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler):
27322731

27332732
with llm:
27342733
model_name = "GPT-OSS/MXFP4"
2734+
mocker.patch.object(GSM8K, {"MAX_OUTPUT_LEN": 8192})
27352735
task = GSM8K(model_name)
2736-
task = self.update_task_kwargs(task)
2737-
task.evaluate(llm)
2736+
task.evaluate(llm,
2737+
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
27382738

27392739
@pytest.mark.skip_less_device(4)
27402740
@pytest.mark.parametrize("moe_backend", ["CUTLASS", "TRTLLM", "TRITON"])
@@ -2746,7 +2746,7 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler):
27462746
],
27472747
ids=["tp4", "ep4", "dp4"])
27482748
def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
2749-
attention_dp, cuda_graph, overlap_scheduler):
2749+
attention_dp, cuda_graph, overlap_scheduler, mocker):
27502750
if moe_backend == "TRITON":
27512751
if not IS_TRITON_KERNELS_AVAILABLE:
27522752
pytest.skip("Triton kernels are not available")
@@ -2767,8 +2767,9 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
27672767
with llm:
27682768
model_name = "GPT-OSS/MXFP4"
27692769
task = GSM8K(model_name)
2770-
task = self.update_task_kwargs(task)
2771-
task.evaluate(llm)
2770+
mocker.patch.object(GSM8K, {"MAX_OUTPUT_LEN": 8192})
2771+
task.evaluate(llm,
2772+
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
27722773

27732774
@pytest.mark.skip_less_device(4)
27742775
@pytest.mark.parametrize(
@@ -2777,7 +2778,7 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
27772778
],
27782779
ids=["dp4"])
27792780
def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
2780-
overlap_scheduler, monkeypatch):
2781+
overlap_scheduler, monkeypatch, mocker):
27812782
if not IS_TRITON_KERNELS_AVAILABLE:
27822783
pytest.skip("Triton kernels are not available")
27832784
monkeypatch.setenv("OVERRIDE_QUANT_ALGO", "W4A16_MXFP4")
@@ -2797,8 +2798,9 @@ def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
27972798
with llm:
27982799
model_name = "GPT-OSS/BF16"
27992800
task = GSM8K(model_name)
2800-
task = self.update_task_kwargs(task)
2801-
task.evaluate(llm)
2801+
mocker.patch.object(GSM8K, {"MAX_OUTPUT_LEN": 8192})
2802+
task.evaluate(llm,
2803+
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
28022804

28032805

28042806
class TestEXAONE4(LlmapiAccuracyTestHarness):

0 commit comments

Comments
 (0)