@@ -345,6 +345,7 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
345
345
MODEL_PATH = f"{ llm_models_root ()} /llama-3.1-model/Llama-3.1-8B-Instruct"
346
346
347
347
@pytest .mark .skip_less_device_memory (32000 )
348
+ @pytest .mark .skip_less_device (2 )
348
349
@pytest .mark .parametrize ("disable_overlap_scheduler" , [False , True ])
349
350
def test_auto_dtype (self , disable_overlap_scheduler ):
350
351
ctx_server_config = {"disable_overlap_scheduler" : True }
@@ -374,6 +375,8 @@ def test_auto_dtype(self, disable_overlap_scheduler):
374
375
task = GSM8K (self .MODEL_NAME )
375
376
task .evaluate (llm )
376
377
378
+ @pytest .mark .skip_less_device (2 )
379
+ @skip_pre_hopper
377
380
def test_ngram (self ):
378
381
speculative_decoding_config = {
379
382
"decoding_type" : "NGram" ,
@@ -424,6 +427,7 @@ def test_ngram(self):
424
427
@skip_pre_hopper
425
428
@parametrize_with_ids ("overlap_scheduler" , [True , False ])
426
429
@parametrize_with_ids ("eagle3_one_model" , [True , False ])
430
+ @pytest .mark .skip_less_device (2 )
427
431
def test_eagle3 (self , overlap_scheduler , eagle3_one_model ):
428
432
speculative_decoding_config = {
429
433
"decoding_type" : "Eagle" ,
@@ -578,7 +582,6 @@ def test_tp_pp_symmetric(self, tp, pp, testset):
578
582
return run_parallel_test (self .MODEL_NAME , self .MODEL_PATH , pp , tp , pp ,
579
583
tp , 1 , 1 , [get_accuracy_task (testset )])
580
584
581
- @pytest .mark .skip_less_device (4 )
582
585
@parametrize_with_ids ("ctx_pp" , [2 , 4 ])
583
586
@parametrize_with_ids ("gen_tp" , [1 , 2 ])
584
587
@pytest .mark .parametrize ("testset" , ["GSM8K" , "MMLU" ])
@@ -589,20 +592,18 @@ def test_ctx_pp_gen_tp_asymmetric(self, ctx_pp, gen_tp, testset):
589
592
return run_parallel_test (self .MODEL_NAME , self .MODEL_PATH , ctx_pp , 1 , 1 ,
590
593
gen_tp , 1 , 1 , [get_accuracy_task (testset )])
591
594
592
- @pytest .mark .skip_less_device (4 )
593
595
@pytest .mark .parametrize ("testset" , ["GSM8K" , "MMLU" ])
594
596
def test_multi_instance (self , testset ):
595
597
return run_parallel_test (self .MODEL_NAME , self .MODEL_PATH , 1 , 1 , 1 , 1 ,
596
598
2 , 2 , [get_accuracy_task (testset )])
597
599
598
600
599
- @pytest .mark .skip_less_device_memory (140000 )
600
- @pytest .mark .timeout (3600 )
601
- @pytest .mark .skip_less_device (4 )
602
601
class TestLlama4ScoutInstruct (LlmapiAccuracyTestHarness ):
603
602
MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
604
603
MODEL_PATH = f"{ llm_models_root ()} /llama4-models/Llama-4-Scout-17B-16E-Instruct"
605
604
605
+ @pytest .mark .skip_less_device_memory (140000 )
606
+ @pytest .mark .timeout (3600 )
606
607
@pytest .mark .skip_less_device (8 )
607
608
@pytest .mark .parametrize ("overlap_scheduler" , [False , True ])
608
609
def test_auto_dtype (self , overlap_scheduler ):
@@ -683,7 +684,7 @@ def test_nixl_backend(self):
683
684
@parametrize_with_ids ("overlap_scheduler" , [True , False ])
684
685
@parametrize_with_ids ("mtp_nextn" ,
685
686
[0 , pytest .param (2 , marks = skip_pre_hopper )])
686
- @pytest .mark .skip_less_device (4 )
687
+ @pytest .mark .skip_less_device (8 )
687
688
def test_auto_dtype (self , overlap_scheduler , mtp_nextn ):
688
689
ctx_server_config = {"disable_overlap_scheduler" : True }
689
690
gen_server_config = {"disable_overlap_scheduler" : not overlap_scheduler }
@@ -727,6 +728,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness):
727
728
MODEL_NAME = "google/gemma-3-1b-it"
728
729
MODEL_PATH = f"{ llm_models_root ()} /gemma/gemma-3-1b-it/"
729
730
731
+ @pytest .mark .skip_less_device (2 )
730
732
@pytest .mark .parametrize ("overlap_scheduler" , [False , True ])
731
733
def test_auto_dtype (self , overlap_scheduler ):
732
734
pytest .skip (
@@ -816,8 +818,9 @@ def test_nixl_backend(self):
816
818
task = GSM8K (self .MODEL_NAME )
817
819
task .evaluate (llm )
818
820
819
- @pytest .mark .parametrize ("overlap_scheduler" , [False , True ])
820
821
@skip_pre_hopper
822
+ @pytest .mark .skip_less_device (2 )
823
+ @pytest .mark .parametrize ("overlap_scheduler" , [False , True ])
821
824
def test_auto_dtype (self , overlap_scheduler ):
822
825
ctx_server_config = {
823
826
"disable_overlap_scheduler" : True ,
0 commit comments