Skip to content

Commit ea2f804

Browse files
authored
Merge branch 'main' into 2-model-perf
2 parents bbad89a + 15ec2b8 commit ea2f804

File tree

5 files changed

+11
-1
lines changed

5 files changed

+11
-1
lines changed

tests/integration/test_lists/qa/llm_function_l20.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ accuracy/test_llm_api.py::TestMistralNemo12B::test_fp8
1919
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[attn_backend=FLASHINFER]
2020
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[attn_backend=TRTLLM]
2121
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_sampler
22-
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search
2322
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=True-overlap_scheduler=True]
2423
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=False-overlap_scheduler=False]
2524
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_ngram

tests/integration/test_lists/waives.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,3 +346,6 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=False-
346346
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=True-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5485102)
347347
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5485109)
348348
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=False-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5485116)
349+
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5444687)
350+
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True] SKIP (https://nvbugs/5444687)
351+
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=True-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5488580)

tests/unittest/llmapi/test_executor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def llama_7b_tp2_path(engine_path: Path) -> Path:
7878
return path
7979

8080

81+
@pytest.mark.skip(reason="https://nvbugs/5488280")
8182
@pytest.mark.skipif(WORLD_SIZE != 1, reason="Must run on single MPI rank")
8283
def test_generation_bs2(llama_7b_bs2_path: Path):
8384
tokenizer = TransformersTokenizer.from_pretrained(llama_7b_bs2_path)
@@ -99,6 +100,7 @@ def test_generation_bs2(llama_7b_bs2_path: Path):
99100
'E F G H I K L M')
100101

101102

103+
@pytest.mark.skip(reason="https://nvbugs/5488280")
102104
@pytest.mark.skipif(WORLD_SIZE != 1, reason="Must run on single MPI rank")
103105
def test_sync_generation(llama_7b_path: Path):
104106
tokenizer = TransformersTokenizer.from_pretrained(llama_7b_path)

tests/unittest/trt/model_api/test_model_level_api.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import tempfile
44
from contextlib import contextmanager
55

6+
import pytest
67
from profile_utils import profile
78
from transformers import AutoTokenizer
89
from utils.llm_data import llm_models_root
@@ -42,6 +43,7 @@ def workspace(suffix, prefix="./trtllm_workspace"):
4243
# 233s on ipp1-1197: loading weights 37s, network/engine 27s, save engine: 35s, load engine (14GB) about 100s
4344
@profile("save-and-load")
4445
@force_ampere
46+
@pytest.mark.skip(reason="https://nvbugs/5488280")
4547
def test_save_load():
4648
'''When the engine_dir parameter of to_trt and generate is not None
4749
to_trt() saves the engine to disk.
@@ -102,6 +104,7 @@ def test_high_level_fake_weights():
102104

103105

104106
@force_ampere
107+
@pytest.mark.skip(reason="https://nvbugs/5488280")
105108
def test_async_io():
106109
max_batch_size, max_isl, max_osl = 8, 256, 256
107110
hf_model_dir = str(llm_models_root() / "llama-models/llama-7b-hf")

tests/unittest/trt/model_api/test_model_quantization.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import tempfile
22

3+
import pytest
34
from transformers import AutoTokenizer
45
from utils.llm_data import llm_models_root
56
from utils.util import force_ampere, skip_no_modelopt, skip_pre_ada
@@ -20,6 +21,7 @@
2021
]
2122

2223

24+
@pytest.mark.skip(reason="https://nvbugs/5488280")
2325
@force_ampere
2426
@skip_no_modelopt
2527
def test_int4_awq_quantization():
@@ -63,6 +65,7 @@ def test_int4_awq_quantization():
6365
# TODO: TRTLLM-185, check the score when the test infra is ready, hard coded value is not stable, cause flaky tests in L0
6466

6567

68+
@pytest.mark.skip(reason="https://nvbugs/5488280")
6669
@skip_pre_ada
6770
@skip_no_modelopt
6871
def test_fp8_quantization():

0 commit comments

Comments
 (0)