Skip to content

Commit 7026803

Browse files
committed
Use backend fixture for llmapi
1 parent c84d215 commit 7026803

File tree

6 files changed

+47
-38
lines changed

6 files changed

+47
-38
lines changed

python/openai/tests/conftest.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@
3131
from fastapi.testclient import TestClient
3232
from tests.utils import OpenAIServer, setup_fastapi_app, setup_server
3333

34-
### TEST ENVIRONMENT SETUP ###
35-
LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0)
36-
3734

3835
def infer_test_environment():
3936
# Infer the test environment for simplicity in local dev/testing.
@@ -49,10 +46,14 @@ def infer_test_environment():
4946
try:
5047
import tensorrt_llm as _
5148

52-
backend = "tensorrtllm"
49+
# TODO: Refactor away from environment variables
50+
LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0)
51+
5352
if LLMAPI_SETUP:
53+
backend = "llmapi"
5454
model = "tensorrt_llm"
5555
else:
56+
backend = "tensorrtllm"
5657
model = "tensorrt_llm_bls"
5758
return backend, model
5859
except ImportError:
@@ -62,10 +63,7 @@ def infer_test_environment():
6263

6364

6465
def infer_test_model_repository(backend):
65-
if LLMAPI_SETUP:
66-
model_repository = str(Path(__file__).parent / f"{backend}_llmapi_models")
67-
else:
68-
model_repository = str(Path(__file__).parent / f"{backend}_models")
66+
model_repository = str(Path(__file__).parent / f"{backend}_models")
6967
return model_repository
7068

7169

@@ -92,13 +90,23 @@ def infer_test_model_repository(backend):
9290
# only once for all the tests below.
9391
@pytest.fixture(scope="module")
9492
def server():
93+
# TODO: tensorrllm and llmapi backends both use "tensorrtllm" as the backend flag for OpenAI server.
94+
# In the future if the backend are consolidated, this check can be updated or removed.
95+
# key: the TEST_BACKEND value
96+
# value: the corresponding backend flag for OpenAI server
97+
backend_map = {
98+
"tensorrtllm": "tensorrtllm",
99+
"llmapi": "tensorrtllm",
100+
"vllm": "vllm",
101+
}
102+
95103
args = [
96104
"--model-repository",
97105
TEST_MODEL_REPOSITORY,
98106
"--tokenizer",
99107
TEST_TOKENIZER,
100108
"--backend",
101-
TEST_BACKEND,
109+
backend_map[TEST_BACKEND],
102110
]
103111
# TODO: Incorporate kserve frontend binding smoke tests to catch any
104112
# breakage with default values or slight cli arg variations

python/openai/tests/test_chat_completions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def test_chat_completions_temperature_vllm(
311311
def test_chat_completions_temperature_tensorrtllm(
312312
self, client, backend: str, model: str, messages: List[dict]
313313
):
314-
if backend != "tensorrtllm":
314+
if backend != "tensorrtllm" and backend != "llmapi":
315315
pytest.skip(
316316
reason="Only used to test TRT-LLM-specific temperature behavior"
317317
)
@@ -371,7 +371,7 @@ def test_chat_completions_temperature_tensorrtllm(
371371

372372
# TODO: Remove xfail for LLM API when it's verified.
373373
@pytest.mark.xfail(
374-
condition=os.getenv("LLMAPI_SETUP") == "1",
374+
condition=lambda backend: backend == "llmapi",
375375
reason="Seed parameter support to be verified for LLM API",
376376
)
377377
# Simple tests to verify random seed roughly behaves as expected

python/openai/tests/test_completions.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,8 @@ def test_completions_temperature_vllm(
192192
def test_completions_temperature_tensorrtllm(
193193
self, client, backend: str, model: str, prompt: str
194194
):
195-
if backend != "tensorrtllm":
196-
pytest.skip(reason="Only used to test vLLM-specific temperature behavior")
195+
if backend != "tensorrtllm" and backend != "llmapi":
196+
pytest.skip(reason="Only used to test TRTLLM-specific temperature behavior")
197197

198198
responses = []
199199
payload1 = {
@@ -241,7 +241,7 @@ def test_completions_temperature_tensorrtllm(
241241

242242
# TODO: Remove xfail for LLM API when it's verified.
243243
@pytest.mark.xfail(
244-
condition=os.getenv("LLMAPI_SETUP") == "1",
244+
condition=lambda backend: backend == "llmapi",
245245
reason="Seed parameter support to be verified for LLM API",
246246
)
247247
# Simple tests to verify seed roughly behaves as expected

python/openai/tests/test_openai_client.py

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,12 @@ def test_openai_client_models(self, client: openai.OpenAI, backend: str):
3939
models = list(client.models.list())
4040
print(f"Models: {models}")
4141
if backend == "tensorrtllm":
42-
import os
43-
44-
LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0)
45-
if LLMAPI_SETUP:
46-
# LLM API setup only has the tensorrt_llm model
47-
assert len(models) == 1
48-
else:
49-
# tensorrt_llm_bls +
50-
# preprocess -> tensorrt_llm -> postprocess
51-
assert len(models) == 4
42+
# tensorrt_llm_bls +
43+
# preprocess -> tensorrt_llm -> postprocess
44+
assert len(models) == 4
45+
elif backend == "llmapi":
46+
# Only has one tensorrt_llm model.
47+
assert len(models) == 1
5248
elif backend == "vllm":
5349
assert len(models) == 1
5450
else:
@@ -82,7 +78,7 @@ def test_openai_client_chat_completion(
8278
def test_openai_client_completion_echo(
8379
self, client: openai.OpenAI, echo: bool, backend: str, model: str, prompt: str
8480
):
85-
if backend == "tensorrtllm":
81+
if backend == "tensorrtllm" or backend == "llmapi":
8682
pytest.skip(
8783
reason="TRT-LLM backend currently only supports setting this parameter at model load time",
8884
)
@@ -112,16 +108,12 @@ async def test_openai_client_models(self, client: openai.AsyncOpenAI, backend: s
112108
models = [model async for model in async_models]
113109
print(f"Models: {models}")
114110
if backend == "tensorrtllm":
115-
import os
116-
117-
LLMAPI_SETUP = os.environ.get("LLMAPI_SETUP", 0)
118-
if LLMAPI_SETUP:
119-
# LLM API setup only has the tensorrt_llm model
120-
assert len(models) == 1
121-
else:
122-
# tensorrt_llm_bls +
123-
# preprocess -> tensorrt_llm -> postprocess
124-
assert len(models) == 4
111+
# tensorrt_llm_bls +
112+
# preprocess -> tensorrt_llm -> postprocess
113+
assert len(models) == 4
114+
elif backend == "llmapi":
115+
# Only has one tensorrt_llm model.
116+
assert len(models) == 1
125117
elif backend == "vllm":
126118
assert len(models) == 1
127119
else:

python/openai/tests/utils.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -53,8 +53,17 @@ def setup_server(model_repository: str):
5353

5454

5555
def setup_fastapi_app(tokenizer: str, server: tritonserver.Server, backend: str):
56+
# TODO: tensorrllm and llmapi backends both use "tensorrtllm" as the backend flag for OpenAI server.
57+
# In the future if the backends are consolidated, this check can be updated or removed.
58+
# key: the backend value
59+
# value: the corresponding backend flag for OpenAI server
60+
backend_map = {
61+
"tensorrtllm": "tensorrtllm",
62+
"llmapi": "tensorrtllm",
63+
"vllm": "vllm",
64+
}
5665
engine: TritonLLMEngine = TritonLLMEngine(
57-
server=server, tokenizer=tokenizer, backend=backend
66+
server=server, tokenizer=tokenizer, backend=backend_map[backend]
5867
)
5968
frontend: FastApiFrontend = FastApiFrontend(engine=engine)
6069
return frontend.app

qa/L0_openai/test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ function prepare_tensorrtllm() {
8787
python3 ${FILL_TEMPLATE} -i ${MODEL_REPO}/tensorrt_llm/config.pbtxt triton_backend:tensorrtllm,triton_max_batch_size:64,decoupled_mode:True,max_beam_width:1,engine_dir:${ENGINE_PATH},batching_strategy:inflight_fused_batching,max_queue_size:0,max_queue_delay_microseconds:1000,encoder_input_features_data_type:TYPE_FP16,logits_datatype:TYPE_FP32,exclude_input_in_output:True
8888

8989
# Prepare LLM API setup
90-
LLMAPI_MODEL_REPO="tests/tensorrtllm_llmapi_models"
90+
LLMAPI_MODEL_REPO="tests/llmapi_models"
9191
mkdir -p ${LLMAPI_MODEL_REPO}
9292
cp /app/all_models/llmapi/* "${LLMAPI_MODEL_REPO}" -r
9393
sed -i 's#"model":"TinyLlama/TinyLlama-1.1B-Chat-v1.0"#"model":"meta-llama/Meta-Llama-3.1-8B-Instruct"#g' ${LLMAPI_MODEL_REPO}/tensorrt_llm/1/model.json

0 commit comments

Comments
 (0)