Skip to content

Commit d211459

Browse files
byshiuedongfengy
authored andcommitted
[https://nvbugs/5481087][fix] fix bug of ci when we use mocker (NVIDIA#7332)
Signed-off-by: bhsueh <[email protected]>
1 parent 2d34733 commit d211459

File tree

2 files changed

+10
-11
lines changed

2 files changed

+10
-11
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2858,8 +2858,6 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
28582858
extra_evaluator_kwargs = {
28592859
"fewshot_as_multiturn": True,
28602860
"apply_chat_template": True,
2861-
"scores_filter": "exact_match,flexible-extract",
2862-
"MAX_OUTPUT_LEN": 8192
28632861
}
28642862

28652863
MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-120b"
@@ -2873,7 +2871,9 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
28732871
(True, True),
28742872
])
28752873
def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
2876-
pytest.skip("https://nvbugs/5481087")
2874+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2875+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2876+
{"scores_filter": "exact_match,flexible-extract"})
28772877
if moe_backend == "TRITON" and not IS_TRITON_KERNELS_AVAILABLE:
28782878
pytest.skip("Triton kernels are not available")
28792879

@@ -2891,7 +2891,6 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
28912891

28922892
with llm:
28932893
model_name = "GPT-OSS/MXFP4"
2894-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
28952894
task = GSM8K(model_name)
28962895
task.evaluate(llm,
28972896
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
@@ -2911,7 +2910,9 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
29112910
ids=["tp4", "ep4", "dp4"])
29122911
def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
29132912
attention_dp, cuda_graph, overlap_scheduler, mocker):
2914-
pytest.skip("https://nvbugs/5481087")
2913+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2914+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2915+
{"scores_filter": "exact_match,flexible-extract"})
29152916
if moe_backend == "TRITON":
29162917
if not IS_TRITON_KERNELS_AVAILABLE:
29172918
pytest.skip("Triton kernels are not available")
@@ -2932,7 +2933,6 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
29322933
with llm:
29332934
model_name = "GPT-OSS/MXFP4"
29342935
task = GSM8K(model_name)
2935-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
29362936
task.evaluate(llm,
29372937
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
29382938

@@ -2944,6 +2944,9 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
29442944
ids=["dp4"])
29452945
def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
29462946
overlap_scheduler, monkeypatch, mocker):
2947+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2948+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
2949+
{"scores_filter": "exact_match,flexible-extract"})
29472950
if not IS_TRITON_KERNELS_AVAILABLE:
29482951
pytest.skip("Triton kernels are not available")
29492952
monkeypatch.setenv("OVERRIDE_QUANT_ALGO", "W4A16_MXFP4")
@@ -2963,7 +2966,6 @@ def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
29632966
with llm:
29642967
model_name = "GPT-OSS/BF16"
29652968
task = GSM8K(model_name)
2966-
mocker.patch.object(GSM8K, {"MAX_OUTPUT_LEN": 8192})
29672969
task.evaluate(llm,
29682970
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
29692971

tests/integration/test_lists/waives.txt

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,11 +331,8 @@ accuracy/test_cli_flow.py::TestPhi4MiniInstruct::test_tp2 SKIP (https://nvbugs/5
331331
accuracy/test_cli_flow.py::TestLongAlpaca7B::test_auto_dtype SKIP (https://nvbugs/5481075)
332332
accuracy/test_llm_api.py::TestPhi4MiniInstruct::test_fp8 SKIP (https://nvbugs/5465143)
333333
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] SKIP (https://nvbugs/5471106)
334-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass] SKIP (https://nvbugs/5481080)
335-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass] SKIP (https://nvbugs/5481080)
336-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-cutlass] SKIP (https://nvbugs/5481080)
337-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass] SKIP (https://nvbugs/5481080)
338334
accuracy/test_llm_api_pytorch.py::TestEXAONE4::test_auto_dtype SKIP (https://nvbugs/5481090)
335+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass] SKIP (https://nvbugs/5481080)
339336
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-False] SKIP (https://nvbugs/5481094)
340337
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-True] SKIP (https://nvbugs/5481094)
341338
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Scout-17B-16E-Instruct-FP8-llama4-models/Llama-4-Scout-17B-16E-Instruct-FP8-True] SKIP (https://nvbugs/5481094)

0 commit comments

Comments
 (0)