NVIDIA
diff --git a/‎tests/integration/defs/accuracy/test_llm_api_pytorch.py‎
Lines changed: 17 additions & 63 deletions b/‎tests/integration/defs/accuracy/test_llm_api_pytorch.py‎
Lines changed: 17 additions & 63 deletions
diff --git a/‎tests/integration/defs/conftest.py‎
Lines changed: 0 additions & 37 deletions b/‎tests/integration/defs/conftest.py‎
Lines changed: 0 additions & 37 deletions
diff --git a/‎tests/integration/test_lists/qa/llm_function_full.txt‎
Lines changed: 22 additions & 25 deletions b/‎tests/integration/test_lists/qa/llm_function_full.txt‎
Lines changed: 22 additions & 25 deletions
diff --git a/‎tests/integration/test_lists/qa/llm_function_nim.txt‎
Lines changed: 22 additions & 25 deletions b/‎tests/integration/test_lists/qa/llm_function_nim.txt‎
Lines changed: 22 additions & 25 deletions
diff --git a/‎tests/integration/test_lists/qa/llm_function_sanity.txt‎
Lines changed: 9 additions & 16 deletions b/‎tests/integration/test_lists/qa/llm_function_sanity.txt‎
Lines changed: 9 additions & 16 deletions
@@ -2835,8 +2835,8 @@ def test_auto_dtype_long_rope(self):
 
 
 @skip_pre_hopper
-@pytest.mark.skip_less_device_memory(100000)
-class TestGPTOSS_120B(LlmapiAccuracyTestHarness):
+@pytest.mark.skip_less_device_memory(80000)
+class TestGPTOSS(LlmapiAccuracyTestHarness):
     kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5)
     extra_evaluator_kwargs = {
         "fewshot_as_multiturn": True,
@@ -2845,16 +2845,16 @@ class TestGPTOSS_120B(LlmapiAccuracyTestHarness):
 
     MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-120b"
 
-    @pytest.mark.parametrize("moe_backend", [
-        "CUTLASS",
-        pytest.param("TRTLLM", marks=skip_pre_blackwell),
-        pytest.param("TRITON", marks=pytest.mark.install_triton)
-    ],
-                             ids=["cutlass", "trtllm", "triton"])
+    @pytest.mark.parametrize(
+        "moe_backend",
+        ["CUTLASS",
+         pytest.param("TRTLLM", marks=skip_pre_blackwell), "TRITON"],
+        ids=["cutlass", "trtllm", "triton"])
     @pytest.mark.parametrize("cuda_graph,overlap_scheduler", [
         (True, True),
     ])
     def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
+        MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-20b"
         mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
         mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
                           {"scores_filter": "exact_match,flexible-extract"})
@@ -2865,7 +2865,7 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
             disable_overlap_scheduler=not overlap_scheduler,
             cuda_graph_config=CudaGraphConfig() if cuda_graph else None)
 
-        llm = LLM(self.MODEL_PATH,
+        llm = LLM(MODEL_PATH,
                   tensor_parallel_size=1,
                   pipeline_parallel_size=1,
                   moe_expert_parallel_size=1,
@@ -2880,12 +2880,11 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
                           extra_evaluator_kwargs=self.extra_evaluator_kwargs)
 
     @pytest.mark.skip_less_device(4)
-    @pytest.mark.parametrize("moe_backend", [
-        "CUTLASS",
-        pytest.param("TRTLLM", marks=skip_pre_blackwell),
-        pytest.param("TRITON", marks=pytest.mark.install_triton)
-    ],
-                             ids=["cutlass", "trtllm", "triton"])
+    @pytest.mark.parametrize(
+        "moe_backend",
+        ["CUTLASS",
+         pytest.param("TRTLLM", marks=skip_pre_blackwell), "TRITON"],
+        ids=["cutlass", "trtllm", "triton"])
     @pytest.mark.parametrize(
         "tp_size,pp_size,ep_size,attention_dp,cuda_graph,overlap_scheduler", [
             (4, 1, 1, False, True, True),
@@ -2954,52 +2953,6 @@ def test_w4a16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
             task.evaluate(llm,
                           extra_evaluator_kwargs=self.extra_evaluator_kwargs)
 
-
-@skip_pre_hopper
-@pytest.mark.skip_less_device_memory(100000)
-class TestGPTOSS_20B(LlmapiAccuracyTestHarness):
-    kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5)
-    extra_evaluator_kwargs = {
-        "fewshot_as_multiturn": True,
-        "apply_chat_template": True,
-        "scores_filter": "exact_match,flexible-extract",
-        "MAX_OUTPUT_LEN": 8192
-    }
-
-    MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-20b"
-
-    @pytest.mark.parametrize(
-        "moe_backend",
-        ["CUTLASS",
-         pytest.param("TRTLLM", marks=skip_pre_blackwell), "TRITON"],
-        ids=["cutlass", "trtllm", "triton"])
-    @pytest.mark.parametrize("cuda_graph,overlap_scheduler", [
-        (True, True),
-    ])
-    def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
-        if moe_backend == "TRITON" and not IS_TRITON_KERNELS_AVAILABLE:
-            pytest.skip("Triton kernels are not available")
-
-        pytorch_config = dict(
-            disable_overlap_scheduler=not overlap_scheduler,
-            cuda_graph_config=CudaGraphConfig() if cuda_graph else None)
-
-        llm = LLM(self.MODEL_PATH,
-                  tensor_parallel_size=1,
-                  pipeline_parallel_size=1,
-                  moe_expert_parallel_size=1,
-                  kv_cache_config=self.kv_cache_config,
-                  max_seq_len=8192,
-                  **pytorch_config,
-                  moe_config=MoeConfig(backend=moe_backend))
-
-        with llm:
-            model_name = "GPT-OSS/MXFP4"
-            mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
-            task = GSM8K(model_name)
-            task.evaluate(llm,
-                          extra_evaluator_kwargs=self.extra_evaluator_kwargs)
-
     @pytest.mark.skip_less_device(2)
     @pytest.mark.parametrize(
         "moe_backend",
@@ -3013,8 +2966,9 @@ def test_w4_1gpu(self, moe_backend, cuda_graph, overlap_scheduler, mocker):
             (2, 1, 2, True, True, True),
         ],
         ids=["tp2", "ep2", "dp2"])
-    def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
+    def test_w4_2gpus(self, moe_backend, tp_size, pp_size, ep_size,
                       attention_dp, cuda_graph, overlap_scheduler, mocker):
+        MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-20b"
         if moe_backend == "TRITON":
             if not IS_TRITON_KERNELS_AVAILABLE:
                 pytest.skip("Triton kernels are not available")
@@ -3023,7 +2977,7 @@ def test_w4_4gpus(self, moe_backend, tp_size, pp_size, ep_size,
             disable_overlap_scheduler=not overlap_scheduler,
             cuda_graph_config=CudaGraphConfig() if cuda_graph else None)
 
-        llm = LLM(self.MODEL_PATH,
+        llm = LLM(MODEL_PATH,
                   tensor_parallel_size=tp_size,
                   pipeline_parallel_size=pp_size,
                   moe_expert_parallel_size=ep_size,
 
@@ -2131,10 +2131,6 @@ def pytest_configure(config):
     # avoid thread leak of tqdm's TMonitor
     tqdm.tqdm.monitor_interval = 0
 
-    # Register custom marks
-    config.addinivalue_line(
-        "markers", "install_triton: mark test to install triton from source")
-
 
 def deselect_by_regex(regexp, items, test_prefix, config):
     """Filter out tests based on the patterns specified in the given list of regular expressions.
@@ -2399,36 +2395,3 @@ def torch_empty_cache() -> None:
     """
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
-
-
-@pytest.fixture
-def install_triton(trt_llm_root, llm_venv):
-    """
-    Install triton from source before each test.
-    """
-
-    triton_root = f"{trt_llm_root}/triton"
-
-    if not os.path.exists(triton_root):
-        raise FileNotFoundError(f"Triton root {triton_root} does not exist")
-
-    llm_venv.run_cmd(["-m", "pip", "install", f"{triton_root}/dist/*.whl"])
-    os.environ["TRITON_ROOT"] = triton_root
-
-    yield
-
-    llm_venv.run_cmd(["-m", "pip", "uninstall", "-y", "triton"])
-    llm_venv.run_cmd(
-        ["-m", "pip", "install", f"{trt_llm_root}/requirements.txt"])
-    os.environ.pop("TRITON_ROOT")
-
-
-def pytest_collection_modifyitems(config, items):
-    """
-    Automatically apply the install_triton fixture to tests marked with install_triton.
-    """
-    for item in items:
-        if item.get_closest_marker("install_triton"):
-            # Add the install_triton fixture to the test's fixturenames
-            if "install_triton" not in item.fixturenames:
-                item.fixturenames.append("install_triton")
@@ -548,31 +548,28 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[fp8-latency
 accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-latency-TRTLLM]
 accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a8_mxfp4[mxfp8-latency-CUTLASS]
 accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a16_mxfp4[latency-TRTLLM]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_1gpu[True-True-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_1gpu[True-True-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_1gpu[True-True-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[tp4-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[tp4-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[tp4-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[ep4-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[ep4-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[ep4-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[dp4-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[dp4-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[dp4-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4a16[dp4]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4]
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False]
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True]
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram
 
@@ -10,31 +10,28 @@ accuracy/test_cli_flow.py::TestLlama3_1NemotronNano8Bv1::test_fp8_prequantized
 accuracy/test_cli_flow.py::TestNemotronMini4BInstruct::test_fp8_prequantized
 accuracy/test_cli_flow.py::TestNemotronUltra::test_auto_dtype[tp8-cuda_graph=True] TIMEOUT (240)
 accuracy/test_cli_flow.py::TestNemotronUltra::test_fp8_prequantized[tp8-cuda_graph=True]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_1gpu[True-True-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_1gpu[True-True-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_1gpu[True-True-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[dp4-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[dp4-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[dp4-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[ep4-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[ep4-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[ep4-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[tp4-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[tp4-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4_4gpus[tp4-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4a16[dp4]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4]
 accuracy/test_llm_api_pytorch.py::TestLlama3_3NemotronSuper49Bv1::test_fp8_prequantized_tp2
 accuracy/test_llm_api_pytorch.py::TestLlama3_1NemotronNano8Bv1::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestLlama3_1NemotronNano8Bv1::test_fp8_prequantized
 
@@ -45,10 +45,18 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_ep
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
 accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
-<<<<<<< HEAD
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass]
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-triton]
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-triton]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-cutlass]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-trtllm]
+accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-triton]
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass]
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton]
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-trtllm]
@@ -60,21 +68,6 @@ accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton]
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-trtllm]
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4]
 accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
-=======
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_120B::test_w4a16[dp4]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_1gpu[True-True-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[tp2-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[ep2-triton]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-cutlass]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-trtllm]
-accuracy/test_llm_api_pytorch.py::TestGPTOSS_20B::test_w4_2gpus[dp2-triton]
->>>>>>> 7856a2591 (add gptoss 20g tests)
 accuracy/test_llm_api_pytorch.py::TestKanana_Instruct::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency]
 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4