@@ -2719,6 +2719,7 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
2719
2719
(True , True ),
2720
2720
])
2721
2721
def test_w4_1gpu (self , moe_backend , cuda_graph , overlap_scheduler , mocker ):
2722
+ pytest .skip ("https://nvbugs/5481087" )
2722
2723
if moe_backend == "TRITON" and not IS_TRITON_KERNELS_AVAILABLE :
2723
2724
pytest .skip ("Triton kernels are not available" )
2724
2725
@@ -2736,7 +2737,7 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
2736
2737
2737
2738
with llm :
2738
2739
model_name = "GPT-OSS/MXFP4"
2739
- mocker .patch .object (GSM8K , { "MAX_OUTPUT_LEN" : 8192 } )
2740
+ mocker .patch .object (GSM8K , "MAX_OUTPUT_LEN" , 8192 )
2740
2741
task = GSM8K (model_name )
2741
2742
task .evaluate (llm ,
2742
2743
extra_evaluator_kwargs = self .extra_evaluator_kwargs )
@@ -2756,6 +2757,7 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
2756
2757
ids = ["tp4" , "ep4" , "dp4" ])
2757
2758
def test_w4_4gpus (self , moe_backend , tp_size , pp_size , ep_size ,
2758
2759
attention_dp , cuda_graph , overlap_scheduler , mocker ):
2760
+ pytest .skip ("https://nvbugs/5481087" )
2759
2761
if moe_backend == "TRITON" :
2760
2762
if not IS_TRITON_KERNELS_AVAILABLE :
2761
2763
pytest .skip ("Triton kernels are not available" )
@@ -2776,7 +2778,7 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
2776
2778
with llm :
2777
2779
model_name = "GPT-OSS/MXFP4"
2778
2780
task = GSM8K (model_name )
2779
- mocker .patch .object (GSM8K , { "MAX_OUTPUT_LEN" : 8192 } )
2781
+ mocker .patch .object (GSM8K , "MAX_OUTPUT_LEN" , 8192 )
2780
2782
task .evaluate (llm ,
2781
2783
extra_evaluator_kwargs = self .extra_evaluator_kwargs )
2782
2784
0 commit comments