Skip to content

Commit 61783c7

Browse files
committed
add gpt-oss 20g tests
Signed-off-by: Xin He (SW-GPU) <[email protected]>
1 parent 05c9e8a commit 61783c7

File tree

9 files changed

+97
-191
lines changed

9 files changed

+97
-191
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 17 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -2835,8 +2835,8 @@ def test_auto_dtype_long_rope(self):
28352835

28362836

28372837
@skip_pre_hopper
2838-
@pytest.mark.skip_less_device_memory(100000)
2839-
class TestGPTOSS_120B(LlmapiAccuracyTestHarness):
2838+
@pytest.mark.skip_less_device_memory(80000)
2839+
class TestGPTOSS(LlmapiAccuracyTestHarness):
28402840
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5)
28412841
extra_evaluator_kwargs = {
28422842
"fewshot_as_multiturn": True,
@@ -2845,16 +2845,16 @@ class TestGPTOSS_120B(LlmapiAccuracyTestHarness):
28452845

28462846
MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-120b"
28472847

2848-
@pytest.mark.parametrize("moe_backend", [
2849-
"CUTLASS",
2850-
pytest.param("TRTLLM", marks=skip_pre_blackwell),
2851-
pytest.param("TRITON", marks=pytest.mark.install_triton)
2852-
],
2853-
ids=["cutlass", "trtllm", "triton"])
2848+
@pytest.mark.parametrize(
2849+
"moe_backend",
2850+
["CUTLASS",
2851+
pytest.param("TRTLLM", marks=skip_pre_blackwell), "TRITON"],
2852+
ids=["cutlass", "trtllm", "triton"])
28542853
@pytest.mark.parametrize("cuda_graph,overlap_scheduler", [
28552854
(True, True),
28562855
])
28572856
def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
2857+
MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-20b"
28582858
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
28592859
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
28602860
{"scores_filter": "exact_match,flexible-extract"})
@@ -2865,7 +2865,7 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
28652865
disable_overlap_scheduler=not overlap_scheduler,
28662866
cuda_graph_config=CudaGraphConfig() if cuda_graph else None)
28672867

2868-
llm = LLM(self.MODEL_PATH,
2868+
llm = LLM(MODEL_PATH,
28692869
tensor_parallel_size=1,
28702870
pipeline_parallel_size=1,
28712871
moe_expert_parallel_size=1,
@@ -2880,12 +2880,11 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
28802880
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
28812881

28822882
@pytest.mark.skip_less_device(4)
2883-
@pytest.mark.parametrize("moe_backend", [
2884-
"CUTLASS",
2885-
pytest.param("TRTLLM", marks=skip_pre_blackwell),
2886-
pytest.param("TRITON", marks=pytest.mark.install_triton)
2887-
],
2888-
ids=["cutlass", "trtllm", "triton"])
2883+
@pytest.mark.parametrize(
2884+
"moe_backend",
2885+
["CUTLASS",
2886+
pytest.param("TRTLLM", marks=skip_pre_blackwell), "TRITON"],
2887+
ids=["cutlass", "trtllm", "triton"])
28892888
@pytest.mark.parametrize(
28902889
"tp_size,pp_size,ep_size,attention_dp,cuda_graph,overlap_scheduler", [
28912890
(4, 1, 1, False, True, True),
@@ -2954,52 +2953,6 @@ def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
29542953
task.evaluate(llm,
29552954
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
29562955

2957-
2958-
@skip_pre_hopper
2959-
@pytest.mark.skip_less_device_memory(100000)
2960-
class TestGPTOSS_20B(LlmapiAccuracyTestHarness):
2961-
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5)
2962-
extra_evaluator_kwargs = {
2963-
"fewshot_as_multiturn": True,
2964-
"apply_chat_template": True,
2965-
"scores_filter": "exact_match,flexible-extract",
2966-
"MAX_OUTPUT_LEN": 8192
2967-
}
2968-
2969-
MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-20b"
2970-
2971-
@pytest.mark.parametrize(
2972-
"moe_backend",
2973-
["CUTLASS",
2974-
pytest.param("TRTLLM", marks=skip_pre_blackwell), "TRITON"],
2975-
ids=["cutlass", "trtllm", "triton"])
2976-
@pytest.mark.parametrize("cuda_graph,overlap_scheduler", [
2977-
(True, True),
2978-
])
2979-
def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
2980-
if moe_backend == "TRITON" and not IS_TRITON_KERNELS_AVAILABLE:
2981-
pytest.skip("Triton kernels are not available")
2982-
2983-
pytorch_config = dict(
2984-
disable_overlap_scheduler=not overlap_scheduler,
2985-
cuda_graph_config=CudaGraphConfig() if cuda_graph else None)
2986-
2987-
llm = LLM(self.MODEL_PATH,
2988-
tensor_parallel_size=1,
2989-
pipeline_parallel_size=1,
2990-
moe_expert_parallel_size=1,
2991-
kv_cache_config=self.kv_cache_config,
2992-
max_seq_len=8192,
2993-
**pytorch_config,
2994-
moe_config=MoeConfig(backend=moe_backend))
2995-
2996-
with llm:
2997-
model_name = "GPT-OSS/MXFP4"
2998-
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
2999-
task = GSM8K(model_name)
3000-
task.evaluate(llm,
3001-
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
3002-
30032956
@pytest.mark.skip_less_device(2)
30042957
@pytest.mark.parametrize(
30052958
"moe_backend",
@@ -3013,8 +2966,9 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
30132966
(2, 1, 2, True, True, True),
30142967
],
30152968
ids=["tp2", "ep2", "dp2"])
3016-
def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
2969+
def test_w4_2gpus(self, moe_backend, tp_size, pp_size, ep_size,
30172970
attention_dp, cuda_graph, overlap_scheduler, mocker):
2971+
MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-20b"
30182972
if moe_backend == "TRITON":
30192973
if not IS_TRITON_KERNELS_AVAILABLE:
30202974
pytest.skip("Triton kernels are not available")
@@ -3023,7 +2977,7 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
30232977
disable_overlap_scheduler=not overlap_scheduler,
30242978
cuda_graph_config=CudaGraphConfig() if cuda_graph else None)
30252979

3026-
llm = LLM(self.MODEL_PATH,
2980+
llm = LLM(MODEL_PATH,
30272981
tensor_parallel_size=tp_size,
30282982
pipeline_parallel_size=pp_size,
30292983
moe_expert_parallel_size=ep_size,

tests/integration/defs/conftest.py

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2131,10 +2131,6 @@ def pytest_configure(config):
21312131
# avoid thread leak of tqdm's TMonitor
21322132
tqdm.tqdm.monitor_interval = 0
21332133

2134-
# Register custom marks
2135-
config.addinivalue_line(
2136-
"markers", "install_triton: mark test to install triton from source")
2137-
21382134

21392135
def deselect_by_regex(regexp, items, test_prefix, config):
21402136
"""Filter out tests based on the patterns specified in the given list of regular expressions.
@@ -2399,36 +2395,3 @@ def torch_empty_cache() -> None:
23992395
"""
24002396
if torch.cuda.is_available():
24012397
torch.cuda.empty_cache()
2402-
2403-
2404-
@pytest.fixture
2405-
def install_triton(trt_llm_root, llm_venv):
2406-
"""
2407-
Install triton from source before each test.
2408-
"""
2409-
2410-
triton_root = f"{trt_llm_root}/triton"
2411-
2412-
if not os.path.exists(triton_root):
2413-
raise FileNotFoundError(f"Triton root {triton_root} does not exist")
2414-
2415-
llm_venv.run_cmd(["-m", "pip", "install", f"{triton_root}/dist/*.whl"])
2416-
os.environ["TRITON_ROOT"] = triton_root
2417-
2418-
yield
2419-
2420-
llm_venv.run_cmd(["-m", "pip", "uninstall", "-y", "triton"])
2421-
llm_venv.run_cmd(
2422-
["-m", "pip", "install", f"{trt_llm_root}/requirements.txt"])
2423-
os.environ.pop("TRITON_ROOT")
2424-
2425-
2426-
def pytest_collection_modifyitems(config, items):
2427-
"""
2428-
Automatically apply the install_triton fixture to tests marked with install_triton.
2429-
"""
2430-
for item in items:
2431-
if item.get_closest_marker("install_triton"):
2432-
# Add the install_triton fixture to the test's fixturenames
2433-
if "install_triton" not in item.fixturenames:
2434-
item.fixturenames.append("install_triton")

tests/integration/test_lists/qa/llm_function_full.txt

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -548,31 +548,28 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[fp8-latency
548548
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-latency-TRTLLM]
549549
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-latency-CUTLASS]
550550
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a16_mxfp4[latency-TRTLLM]
551-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_1gpu[True-True-cutlass]
552-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_1gpu[True-True-trtllm]
553-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_1gpu[True-True-triton]
554-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[tp4-cutlass]
555-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[tp4-trtllm]
556-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[tp4-triton]
557-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[ep4-cutlass]
558-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[ep4-trtllm]
559-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[ep4-triton]
560-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[dp4-cutlass]
561-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[dp4-trtllm]
562-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[dp4-triton]
563-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4a16[dp4]
564-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-cutlass]
565-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-trtllm]
566-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-triton]
567-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-cutlass]
568-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-trtllm]
569-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-triton]
570-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-cutlass]
571-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-trtllm]
572-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-triton]
573-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-cutlass]
574-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-trtllm]
575-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-triton]
551+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass]
552+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-triton]
553+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm]
554+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-cutlass]
555+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-trtllm]
556+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-triton]
557+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-cutlass]
558+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-trtllm]
559+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-triton]
560+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-cutlass]
561+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-trtllm]
562+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-triton]
563+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass]
564+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton]
565+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-trtllm]
566+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-cutlass]
567+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-triton]
568+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-trtllm]
569+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass]
570+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton]
571+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-trtllm]
572+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4]
576573
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False]
577574
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True]
578575
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram

tests/integration/test_lists/qa/llm_function_nim.txt

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,31 +10,28 @@ accuracy/test_cli_flow.py::TestLlama3_1NemotronNano8Bv1::test_fp8_prequantized
1010
accuracy/test_cli_flow.py::TestNemotronMini4BInstruct::test_fp8_prequantized
1111
accuracy/test_cli_flow.py::TestNemotronUltra::test_auto_dtype[tp8-cuda_graph=True] TIMEOUT (240)
1212
accuracy/test_cli_flow.py::TestNemotronUltra::test_fp8_prequantized[tp8-cuda_graph=True]
13-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_1gpu[True-True-cutlass]
14-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_1gpu[True-True-triton]
15-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_1gpu[True-True-trtllm]
16-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[dp4-cutlass]
17-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[dp4-triton]
18-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[dp4-trtllm]
19-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[ep4-cutlass]
20-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[ep4-triton]
21-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[ep4-trtllm]
22-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[tp4-cutlass]
23-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[tp4-triton]
24-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[tp4-trtllm]
25-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4a16[dp4]
26-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-cutlass]
27-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-trtllm]
28-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-triton]
29-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-cutlass]
30-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-trtllm]
31-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-triton]
32-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-cutlass]
33-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-trtllm]
34-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-triton]
35-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-cutlass]
36-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-trtllm]
37-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-triton]
13+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass]
14+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-triton]
15+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm]
16+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-cutlass]
17+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-trtllm]
18+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-triton]
19+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-cutlass]
20+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-trtllm]
21+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-triton]
22+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-cutlass]
23+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-trtllm]
24+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-triton]
25+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass]
26+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton]
27+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-trtllm]
28+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-cutlass]
29+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-triton]
30+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-trtllm]
31+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass]
32+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton]
33+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-trtllm]
34+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4]
3835
accuracy/test_llm_api_pytorch.py::TestLlama3_3NemotronSuper49Bv1::test_fp8_prequantized_tp2
3936
accuracy/test_llm_api_pytorch.py::TestLlama3_1NemotronNano8Bv1::test_auto_dtype
4037
accuracy/test_llm_api_pytorch.py::TestLlama3_1NemotronNano8Bv1::test_fp8_prequantized

tests/integration/test_lists/qa/llm_function_sanity.txt

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,18 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_ep
4545
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
4646
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
4747
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
48-
<<<<<<< HEAD
4948
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass]
5049
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-triton]
5150
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm]
51+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-cutlass]
52+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-trtllm]
53+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-triton]
54+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-cutlass]
55+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-trtllm]
56+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-triton]
57+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-cutlass]
58+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-trtllm]
59+
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-triton]
5260
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass]
5361
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton]
5462
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-trtllm]
@@ -60,21 +68,6 @@ accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton]
6068
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-trtllm]
6169
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4]
6270
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
63-
=======
64-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4a16[dp4]
65-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-cutlass]
66-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-trtllm]
67-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-triton]
68-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-cutlass]
69-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-trtllm]
70-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-triton]
71-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-cutlass]
72-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-trtllm]
73-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-triton]
74-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-cutlass]
75-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-trtllm]
76-
accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-triton]
77-
>>>>>>> 7856a2591 (add gptoss 20g tests)
7871
accuracy/test_llm_api_pytorch.py::TestKanana_Instruct::test_auto_dtype
7972
accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency]
8073
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4

0 commit comments

Comments
 (0)