We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e1c04c2 commit d640d0bCopy full SHA for d640d0b
tests/integration/defs/disaggregated/test_configs/disagg_config_for_benchmark.yaml
@@ -0,0 +1,29 @@
1
+model: DeepSeek-V3-Lite/fp8
2
+hostname: localhost
3
+port: 8000
4
+backend: "pytorch"
5
+context_servers:
6
+ num_instances: 1
7
+ max_batch_size: 2
8
+ max_num_tokens: 384
9
+ max_seq_len: 320
10
+ tensor_parallel_size: 1
11
+ pipeline_parallel_size: 1
12
+ disable_overlap_scheduler: true
13
+ cache_transceiver_config:
14
+ backend: ucx
15
+ max_tokens_in_buffer: 512
16
+ urls:
17
+ - "localhost:8001"
18
+generation_servers:
19
20
21
22
23
24
25
26
27
28
29
+ - "localhost:8002"
0 commit comments