Skip to content

Commit 16e9d11

Browse files
authored
[https://nvbugs/5481087][fix] fix bug of ci when we use mocker (#7332)
Signed-off-by: bhsueh <[email protected]>
1 parent 2b286ae commit 16e9d11

File tree

2 files changed

+10
-11
lines changed

2 files changed

+10
-11
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2822,8 +2822,6 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
28222822
extra_evaluator_kwargs = {
28232823
"fewshot_as_multiturn": True,
28242824
"apply_chat_template": True,
2825-
"scores_filter": "exact_match,flexible-extract",
2826-
"MAX_OUTPUT_LEN": 8192
28272825
}
28282826

28292827
MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-120b"
@@ -2837,7 +2835,9 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
28372835
(True, True),
28382836
])
28392837
def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
2840-
pytest.skip("https://nvbugs/5481087")
2838+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2839+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2840+
{"scores_filter": "exact_match,flexible-extract"})
28412841
if moe_backend == "TRITON" and not IS_TRITON_KERNELS_AVAILABLE:
28422842
pytest.skip("Triton kernels are not available")
28432843

@@ -2855,7 +2855,6 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
28552855

28562856
with llm:
28572857
model_name = "GPT-OSS/MXFP4"
2858-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
28592858
task = GSM8K(model_name)
28602859
task.evaluate(llm,
28612860
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
@@ -2875,7 +2874,9 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
28752874
ids=["tp4", "ep4", "dp4"])
28762875
def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
28772876
attention_dp, cuda_graph, overlap_scheduler, mocker):
2878-
pytest.skip("https://nvbugs/5481087")
2877+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2878+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2879+
{"scores_filter": "exact_match,flexible-extract"})
28792880
if moe_backend == "TRITON":
28802881
if not IS_TRITON_KERNELS_AVAILABLE:
28812882
pytest.skip("Triton kernels are not available")
@@ -2896,7 +2897,6 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
28962897
with llm:
28972898
model_name = "GPT-OSS/MXFP4"
28982899
task = GSM8K(model_name)
2899-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
29002900
task.evaluate(llm,
29012901
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
29022902

@@ -2908,6 +2908,9 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
29082908
ids=["dp4"])
29092909
def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
29102910
overlap_scheduler, monkeypatch, mocker):
2911+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2912+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2913+
{"scores_filter": "exact_match,flexible-extract"})
29112914
if not IS_TRITON_KERNELS_AVAILABLE:
29122915
pytest.skip("Triton kernels are not available")
29132916
monkeypatch.setenv("OVERRIDE_QUANT_ALGO", "W4A16_MXFP4")
@@ -2927,7 +2930,6 @@ def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
29272930
with llm:
29282931
model_name = "GPT-OSS/BF16"
29292932
task = GSM8K(model_name)
2930-
mocker.patch.object(GSM8K, {"MAX_OUTPUT_LEN": 8192})
29312933
task.evaluate(llm,
29322934
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
29332935

tests/integration/test_lists/waives.txt

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,11 +331,8 @@ accuracy/test_cli_flow.py::TestPhi4MiniInstruct::test_tp2 SKIP (https://nvbugs/5
331331
accuracy/test_cli_flow.py::TestLongAlpaca7B::test_auto_dtype SKIP (https://nvbugs/5481075)
332332
accuracy/test_llm_api.py::TestPhi4MiniInstruct::test_fp8 SKIP (https://nvbugs/5465143)
333333
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] SKIP (https://nvbugs/5471106)
334-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass] SKIP (https://nvbugs/5481080)
335-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass] SKIP (https://nvbugs/5481080)
336-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-cutlass] SKIP (https://nvbugs/5481080)
337-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass] SKIP (https://nvbugs/5481080)
338334
accuracy/test_llm_api_pytorch.py::TestEXAONE4::test_auto_dtype SKIP (https://nvbugs/5481090)
335+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass] SKIP (https://nvbugs/5481080)
339336
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-False] SKIP (https://nvbugs/5481094)
340337
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-True] SKIP (https://nvbugs/5481094)
341338
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Scout-17B-16E-Instruct-FP8-llama4-models/Llama-4-Scout-17B-16E-Instruct-FP8-True] SKIP (https://nvbugs/5481094)

0 commit comments

Comments
 (0)