Skip to content

Commit 64132ed

Browse files
committed
fix bugs of GPTOSS CIs and unwaive them
Signed-off-by: bhsueh <[email protected]>
1 parent 8e9ccd9 commit 64132ed

File tree

2 files changed

+6
-9
lines changed

2 files changed

+6
-9
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2704,8 +2704,6 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
27042704
extra_evaluator_kwargs = {
27052705
"fewshot_as_multiturn": True,
27062706
"apply_chat_template": True,
2707-
"scores_filter": "exact_match,flexible-extract",
2708-
"MAX_OUTPUT_LEN": 8192
27092707
}
27102708

27112709
MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-120b"
@@ -2736,7 +2734,8 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
27362734

27372735
with llm:
27382736
model_name = "GPT-OSS/MXFP4"
2739-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN": 8192)
2737+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2738+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS, {"scores_filter": "exact_match,flexible-extract"})
27402739
task = GSM8K(model_name)
27412740
task.evaluate(llm,
27422741
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
@@ -2776,7 +2775,8 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
27762775
with llm:
27772776
model_name = "GPT-OSS/MXFP4"
27782777
task = GSM8K(model_name)
2779-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN": 8192)
2778+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2779+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS, {"scores_filter": "exact_match,flexible-extract"})
27802780
task.evaluate(llm,
27812781
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
27822782

@@ -2807,7 +2807,8 @@ def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
28072807
with llm:
28082808
model_name = "GPT-OSS/BF16"
28092809
task = GSM8K(model_name)
2810-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN": 8192)
2810+
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2811+
mocker.patch.dict(GSM8K.EVALUATE_KWARGS, {"scores_filter": "exact_match,flexible-extract"})
28112812
task.evaluate(llm,
28122813
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
28132814

tests/integration/test_lists/waives.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -325,10 +325,6 @@ accuracy/test_cli_flow.py::TestPhi4MiniInstruct::test_tp2 SKIP (https://nvbugs/5
325325
accuracy/test_cli_flow.py::TestLongAlpaca7B::test_auto_dtype SKIP (https://nvbugs/5481075)
326326
accuracy/test_llm_api.py::TestPhi4MiniInstruct::test_fp8 SKIP (https://nvbugs/5465143)
327327
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] SKIP (https://nvbugs/5471106)
328-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass] SKIP (https://nvbugs/5481080)
329-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass] SKIP (https://nvbugs/5481080)
330-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-cutlass] SKIP (https://nvbugs/5481080)
331-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass] SKIP (https://nvbugs/5481080)
332328
accuracy/test_llm_api_pytorch.py::TestEXAONE4::test_auto_dtype SKIP (https://nvbugs/5481090)
333329
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-False] SKIP (https://nvbugs/5481094)
334330
test_e2e.py::test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k[Llama-4-Maverick-17B-128E-Instruct-FP8-llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-True] SKIP (https://nvbugs/5481094)

0 commit comments

Comments
 (0)