Skip to content

Commit e358239

Browse files
committed
fix minor issues
Signed-off-by: bhsueh <[email protected]>
1 parent e5be549 commit e358239

File tree

2 files changed

+10
-10
lines changed

2 files changed

+10
-10
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2718,6 +2718,9 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
27182718
])
27192719
def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
27202720
pytest.skip("https://nvbugs/5481087")
2721+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2722+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2723+
{"scores_filter": "exact_match,flexible-extract"})
27212724
if moe_backend == "TRITON" and not IS_TRITON_KERNELS_AVAILABLE:
27222725
pytest.skip("Triton kernels are not available")
27232726

@@ -2735,9 +2738,6 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
27352738

27362739
with llm:
27372740
model_name = "GPT-OSS/MXFP4"
2738-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2739-
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2740-
{"scores_filter": "exact_match,flexible-extract"})
27412741
task = GSM8K(model_name)
27422742
task.evaluate(llm,
27432743
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
@@ -2757,7 +2757,9 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
27572757
ids=["tp4", "ep4", "dp4"])
27582758
def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
27592759
attention_dp, cuda_graph, overlap_scheduler, mocker):
2760-
pytest.skip("https://nvbugs/5481087")
2760+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2761+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2762+
{"scores_filter": "exact_match,flexible-extract"})
27612763
if moe_backend == "TRITON":
27622764
if not IS_TRITON_KERNELS_AVAILABLE:
27632765
pytest.skip("Triton kernels are not available")
@@ -2778,9 +2780,6 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
27782780
with llm:
27792781
model_name = "GPT-OSS/MXFP4"
27802782
task = GSM8K(model_name)
2781-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2782-
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2783-
{"scores_filter": "exact_match,flexible-extract"})
27842783
task.evaluate(llm,
27852784
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
27862785

@@ -2792,6 +2791,9 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
27922791
ids=["dp4"])
27932792
def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
27942793
overlap_scheduler, monkeypatch, mocker):
2794+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2795+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2796+
{"scores_filter": "exact_match,flexible-extract"})
27952797
if not IS_TRITON_KERNELS_AVAILABLE:
27962798
pytest.skip("Triton kernels are not available")
27972799
monkeypatch.setenv("OVERRIDE_QUANT_ALGO", "W4A16_MXFP4")
@@ -2811,9 +2813,6 @@ def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
28112813
with llm:
28122814
model_name = "GPT-OSS/BF16"
28132815
task = GSM8K(model_name)
2814-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2815-
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2816-
{"scores_filter": "exact_match,flexible-extract"})
28172816
task.evaluate(llm,
28182817
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
28192818

tests/integration/test_lists/waives.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ accuracy/test_cli_flow.py::TestLongAlpaca7B::test_auto_dtype SKIP (https://nvbug
326326
accuracy/test_llm_api.py::TestPhi4MiniInstruct::test_fp8 SKIP (https://nvbugs/5465143)
327327
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] SKIP (https://nvbugs/5471106)
328328
accuracy/test_llm_api_pytorch.py::TestEXAONE4::test_auto_dtype SKIP (https://nvbugs/5481090)
329+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass] SKIP (https://nvbugs/5481080)
329330
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-False] SKIP (https://nvbugs/5481094)
330331
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-True] SKIP (https://nvbugs/5481094)
331332
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Scout-17B-16E-Instruct-FP8-llama4-models/Llama-4-Scout-17B-16E-Instruct-FP8-True] SKIP (https://nvbugs/5481094)

0 commit comments

Comments
 (0)