@@ -345,6 +345,7 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
345345 MODEL_PATH = f"{ llm_models_root ()} /llama-3.1-model/Llama-3.1-8B-Instruct"
346346
347347 @pytest .mark .skip_less_device_memory (32000 )
348+ @pytest .mark .skip_less_device (2 )
348349 @pytest .mark .parametrize ("disable_overlap_scheduler" , [False , True ])
349350 def test_auto_dtype (self , disable_overlap_scheduler ):
350351 ctx_server_config = {"disable_overlap_scheduler" : True }
@@ -374,6 +375,8 @@ def test_auto_dtype(self, disable_overlap_scheduler):
374375 task = GSM8K (self .MODEL_NAME )
375376 task .evaluate (llm )
376377
378+ @pytest .mark .skip_less_device (2 )
379+ @skip_pre_hopper
377380 def test_ngram (self ):
378381 speculative_decoding_config = {
379382 "decoding_type" : "NGram" ,
@@ -424,6 +427,7 @@ def test_ngram(self):
424427 @skip_pre_hopper
425428 @parametrize_with_ids ("overlap_scheduler" , [True , False ])
426429 @parametrize_with_ids ("eagle3_one_model" , [True , False ])
430+ @pytest .mark .skip_less_device (2 )
427431 def test_eagle3 (self , overlap_scheduler , eagle3_one_model ):
428432 speculative_decoding_config = {
429433 "decoding_type" : "Eagle" ,
@@ -578,7 +582,6 @@ def test_tp_pp_symmetric(self, tp, pp, testset):
578582 return run_parallel_test (self .MODEL_NAME , self .MODEL_PATH , pp , tp , pp ,
579583 tp , 1 , 1 , [get_accuracy_task (testset )])
580584
581- @pytest .mark .skip_less_device (4 )
582585 @parametrize_with_ids ("ctx_pp" , [2 , 4 ])
583586 @parametrize_with_ids ("gen_tp" , [1 , 2 ])
584587 @pytest .mark .parametrize ("testset" , ["GSM8K" , "MMLU" ])
@@ -589,20 +592,18 @@ def test_ctx_pp_gen_tp_asymmetric(self, ctx_pp, gen_tp, testset):
589592 return run_parallel_test (self .MODEL_NAME , self .MODEL_PATH , ctx_pp , 1 , 1 ,
590593 gen_tp , 1 , 1 , [get_accuracy_task (testset )])
591594
592- @pytest .mark .skip_less_device (4 )
593595 @pytest .mark .parametrize ("testset" , ["GSM8K" , "MMLU" ])
594596 def test_multi_instance (self , testset ):
595597 return run_parallel_test (self .MODEL_NAME , self .MODEL_PATH , 1 , 1 , 1 , 1 ,
596598 2 , 2 , [get_accuracy_task (testset )])
597599
598600
599- @pytest .mark .skip_less_device_memory (140000 )
600- @pytest .mark .timeout (3600 )
601- @pytest .mark .skip_less_device (4 )
602601class TestLlama4ScoutInstruct (LlmapiAccuracyTestHarness ):
603602 MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
604603 MODEL_PATH = f"{ llm_models_root ()} /llama4-models/Llama-4-Scout-17B-16E-Instruct"
605604
605+ @pytest .mark .skip_less_device_memory (140000 )
606+ @pytest .mark .timeout (3600 )
606607 @pytest .mark .skip_less_device (8 )
607608 @pytest .mark .parametrize ("overlap_scheduler" , [False , True ])
608609 def test_auto_dtype (self , overlap_scheduler ):
@@ -683,7 +684,7 @@ def test_nixl_backend(self):
683684 @parametrize_with_ids ("overlap_scheduler" , [True , False ])
684685 @parametrize_with_ids ("mtp_nextn" ,
685686 [0 , pytest .param (2 , marks = skip_pre_hopper )])
686- @pytest .mark .skip_less_device (4 )
687+ @pytest .mark .skip_less_device (8 )
687688 def test_auto_dtype (self , overlap_scheduler , mtp_nextn ):
688689 ctx_server_config = {"disable_overlap_scheduler" : True }
689690 gen_server_config = {"disable_overlap_scheduler" : not overlap_scheduler }
@@ -727,6 +728,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness):
727728 MODEL_NAME = "google/gemma-3-1b-it"
728729 MODEL_PATH = f"{ llm_models_root ()} /gemma/gemma-3-1b-it/"
729730
731+ @pytest .mark .skip_less_device (2 )
730732 @pytest .mark .parametrize ("overlap_scheduler" , [False , True ])
731733 def test_auto_dtype (self , overlap_scheduler ):
732734 pytest .skip (
@@ -816,8 +818,9 @@ def test_nixl_backend(self):
816818 task = GSM8K (self .MODEL_NAME )
817819 task .evaluate (llm )
818820
819- @pytest .mark .parametrize ("overlap_scheduler" , [False , True ])
820821 @skip_pre_hopper
822+ @pytest .mark .skip_less_device (2 )
823+ @pytest .mark .parametrize ("overlap_scheduler" , [False , True ])
821824 def test_auto_dtype (self , overlap_scheduler ):
822825 ctx_server_config = {
823826 "disable_overlap_scheduler" : True ,
0 commit comments