Skip to content

Commit d640d0b

Browse files
committed
add missing config file
Signed-off-by: Bo Deng <[email protected]>
1 parent e1c04c2 commit d640d0b

File tree

1 file changed

+29
-0
lines changed

1 file changed

+29
-0
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
model: DeepSeek-V3-Lite/fp8
2+
hostname: localhost
3+
port: 8000
4+
backend: "pytorch"
5+
context_servers:
6+
num_instances: 1
7+
max_batch_size: 2
8+
max_num_tokens: 384
9+
max_seq_len: 320
10+
tensor_parallel_size: 1
11+
pipeline_parallel_size: 1
12+
disable_overlap_scheduler: true
13+
cache_transceiver_config:
14+
backend: ucx
15+
max_tokens_in_buffer: 512
16+
urls:
17+
- "localhost:8001"
18+
generation_servers:
19+
num_instances: 1
20+
tensor_parallel_size: 1
21+
pipeline_parallel_size: 1
22+
max_batch_size: 2
23+
max_num_tokens: 384
24+
max_seq_len: 320
25+
cache_transceiver_config:
26+
backend: ucx
27+
max_tokens_in_buffer: 512
28+
urls:
29+
- "localhost:8002"

0 commit comments

Comments
 (0)