Skip to content

Commit 06b84ee

Browse files
committed
fix minor issues
Signed-off-by: bhsueh <[email protected]>
1 parent fd167ec commit 06b84ee

File tree

2 files changed

+10
-10
lines changed

2 files changed

+10
-10
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2793,6 +2793,9 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
27932793
])
27942794
def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
27952795
pytest.skip("https://nvbugs/5481087")
2796+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2797+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2798+
{"scores_filter": "exact_match,flexible-extract"})
27962799
if moe_backend == "TRITON" and not IS_TRITON_KERNELS_AVAILABLE:
27972800
pytest.skip("Triton kernels are not available")
27982801

@@ -2810,9 +2813,6 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
28102813

28112814
with llm:
28122815
model_name = "GPT-OSS/MXFP4"
2813-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2814-
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2815-
{"scores_filter": "exact_match,flexible-extract"})
28162816
task = GSM8K(model_name)
28172817
task.evaluate(llm,
28182818
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
@@ -2832,7 +2832,9 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
28322832
ids=["tp4", "ep4", "dp4"])
28332833
def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
28342834
attention_dp, cuda_graph, overlap_scheduler, mocker):
2835-
pytest.skip("https://nvbugs/5481087")
2835+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2836+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2837+
{"scores_filter": "exact_match,flexible-extract"})
28362838
if moe_backend == "TRITON":
28372839
if not IS_TRITON_KERNELS_AVAILABLE:
28382840
pytest.skip("Triton kernels are not available")
@@ -2853,9 +2855,6 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
28532855
with llm:
28542856
model_name = "GPT-OSS/MXFP4"
28552857
task = GSM8K(model_name)
2856-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2857-
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2858-
{"scores_filter": "exact_match,flexible-extract"})
28592858
task.evaluate(llm,
28602859
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
28612860

@@ -2867,6 +2866,9 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
28672866
ids=["dp4"])
28682867
def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
28692868
overlap_scheduler, monkeypatch, mocker):
2869+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2870+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2871+
{"scores_filter": "exact_match,flexible-extract"})
28702872
if not IS_TRITON_KERNELS_AVAILABLE:
28712873
pytest.skip("Triton kernels are not available")
28722874
monkeypatch.setenv("OVERRIDE_QUANT_ALGO", "W4A16_MXFP4")
@@ -2886,9 +2888,6 @@ def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
28862888
with llm:
28872889
model_name = "GPT-OSS/BF16"
28882890
task = GSM8K(model_name)
2889-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2890-
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2891-
{"scores_filter": "exact_match,flexible-extract"})
28922891
task.evaluate(llm,
28932892
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
28942893

tests/integration/test_lists/waives.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ accuracy/test_cli_flow.py::TestLongAlpaca7B::test_auto_dtype SKIP (https://nvbug
328328
accuracy/test_llm_api.py::TestPhi4MiniInstruct::test_fp8 SKIP (https://nvbugs/5465143)
329329
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] SKIP (https://nvbugs/5471106)
330330
accuracy/test_llm_api_pytorch.py::TestEXAONE4::test_auto_dtype SKIP (https://nvbugs/5481090)
331+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass] SKIP (https://nvbugs/5481080)
331332
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-False] SKIP (https://nvbugs/5481094)
332333
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-True] SKIP (https://nvbugs/5481094)
333334
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Scout-17B-16E-Instruct-FP8-llama4-models/Llama-4-Scout-17B-16E-Instruct-FP8-True] SKIP (https://nvbugs/5481094)

0 commit comments

Comments
 (0)