|
1 | 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
2 | 2 | # SPDX-License-Identifier: Apache-2.0
|
3 | 3 |
|
4 |
| -from dataclasses import dataclass |
5 | 4 | from pathlib import Path
|
6 |
| -from typing import Callable, Dict, List, Tuple |
| 5 | +from typing import Dict, List |
7 | 6 |
|
8 | 7 | from benchmarks.utils.genai import run_concurrency_sweep
|
9 | 8 | from benchmarks.utils.plot import generate_plots
|
10 |
| -from deploy.utils.dynamo_deployment import DynamoDeploymentClient |
11 |
| - |
12 |
| - |
13 |
| -@dataclass |
14 |
| -class DeploymentConfig: |
15 |
| - """Configuration for a single deployment type""" |
16 |
| - |
17 |
| - name: str # Human-readable name (e.g., "aggregated") |
18 |
| - manifest_path: str # Path to deployment manifest |
19 |
| - output_subdir: str # Subdirectory name for results (e.g., "agg") |
20 |
| - client_factory: Callable # Function to create the client |
21 |
| - deploy_func: Callable # Function to deploy the client |
22 |
| - |
23 |
| - |
24 |
| -def create_dynamo_client( |
25 |
| - namespace: str, deployment_name: str |
26 |
| -) -> DynamoDeploymentClient: |
27 |
| - """Factory function for DynamoDeploymentClient""" |
28 |
| - return DynamoDeploymentClient(namespace=namespace, deployment_name=deployment_name) |
29 |
| - |
30 |
| - |
31 |
| -async def deploy_dynamo_client( |
32 |
| - client: DynamoDeploymentClient, manifest_path: str |
33 |
| -) -> None: |
34 |
| - """Deploy a DynamoDeploymentClient""" |
35 |
| - await client.create_deployment(manifest_path) |
36 |
| - await client.wait_for_deployment_ready(timeout=1800) |
37 |
| - |
38 |
| - |
39 |
| -async def teardown(client) -> None: |
40 |
| - """Clean up deployment and stop port forwarding""" |
41 |
| - try: |
42 |
| - if hasattr(client, "stop_port_forward"): |
43 |
| - client.stop_port_forward() |
44 |
| - await client.delete_deployment() |
45 |
| - except Exception: |
46 |
| - pass |
47 |
| - |
48 |
| - |
49 |
| -def print_deployment_start(config: DeploymentConfig, output_dir: str) -> None: |
50 |
| - """Print deployment start messages""" |
51 |
| - print(f"🚀 Starting {config.name} deployment benchmark...") |
52 |
| - print(f"📄 Manifest: {config.manifest_path}") |
53 |
| - print(f"📁 Results will be saved to: {Path(output_dir) / config.output_subdir}") |
54 | 9 |
|
55 | 10 |
|
56 | 11 | def print_concurrency_start(
|
57 |
| - deployment_name: str, model: str, isl: int, osl: int, std: int |
| 12 | + label: str, model: str, isl: int, osl: int, std: int |
58 | 13 | ) -> None:
|
59 | 14 | """Print concurrency sweep start messages"""
|
60 |
| - print(f"⚙️ Starting {deployment_name} concurrency sweep!", flush=True) |
| 15 | + print(f"⚙️ Starting {label} concurrency sweep!", flush=True) |
61 | 16 | print(
|
62 | 17 | "⏱️ This may take several minutes - running through multiple concurrency levels...",
|
63 | 18 | flush=True,
|
64 | 19 | )
|
65 | 20 | print(f"🎯 Model: {model} | ISL: {isl} | OSL: {osl} | StdDev: {std}")
|
66 | 21 |
|
67 | 22 |
|
68 |
| -def print_deployment_complete(config: DeploymentConfig) -> None: |
69 |
| - """Print deployment completion message""" |
70 |
| - print(f"✅ {config.name.title()} deployment benchmark completed successfully!") |
71 |
| - |
72 |
| - |
73 |
| -def print_deployment_skip(deployment_type: str) -> None: |
74 |
| - """Print deployment skip message""" |
75 |
| - print(f"⏭️ Skipping {deployment_type} deployment (not specified)") |
76 |
| - |
77 |
| - |
78 |
| -async def run_single_deployment_benchmark( |
79 |
| - config: DeploymentConfig, |
80 |
| - namespace: str, |
81 |
| - output_dir: str, |
82 |
| - model: str, |
83 |
| - isl: int, |
84 |
| - osl: int, |
85 |
| - std: int, |
86 |
| -) -> None: |
87 |
| - """Run benchmark for a single deployment type""" |
88 |
| - print_deployment_start(config, output_dir) |
89 |
| - |
90 |
| - # Create and deploy client |
91 |
| - client = config.client_factory(namespace, config.output_subdir) |
92 |
| - await config.deploy_func(client, config.manifest_path) |
93 |
| - |
94 |
| - try: |
95 |
| - print_concurrency_start(config.name, model, isl, osl, std) |
96 |
| - |
97 |
| - # Run concurrency sweep |
98 |
| - (Path(output_dir) / config.output_subdir).mkdir(parents=True, exist_ok=True) |
99 |
| - run_concurrency_sweep( |
100 |
| - service_url=client.port_forward_frontend(quiet=True), |
101 |
| - model_name=model, |
102 |
| - isl=isl, |
103 |
| - osl=osl, |
104 |
| - stddev=std, |
105 |
| - output_dir=Path(output_dir) / config.output_subdir, |
106 |
| - ) |
107 |
| - |
108 |
| - finally: |
109 |
| - await teardown(client) |
110 |
| - |
111 |
| - print_deployment_complete(config) |
112 |
| - |
113 |
| - |
114 |
| -async def run_endpoint_benchmark( |
| 23 | +def run_endpoint_benchmark( |
115 | 24 | label: str,
|
116 | 25 | endpoint: str,
|
117 | 26 | model: str,
|
118 | 27 | isl: int,
|
119 | 28 | osl: int,
|
120 | 29 | std: int,
|
121 |
| - output_dir: str, |
| 30 | + output_dir: Path, |
122 | 31 | ) -> None:
|
123 | 32 | """Run benchmark for an existing endpoint with custom label"""
|
124 | 33 | print(f"🚀 Starting benchmark of endpoint '{label}': {endpoint}")
|
125 |
| - print(f"📁 Results will be saved to: {Path(output_dir) / label}") |
126 |
| - print_concurrency_start(f"endpoint ({label})", model, isl, osl, std) |
| 34 | + print(f"📁 Results will be saved to: {output_dir / label}") |
| 35 | + print_concurrency_start(label, model, isl, osl, std) |
| 36 | + |
| 37 | + # Create output directory |
| 38 | + (output_dir / label).mkdir(parents=True, exist_ok=True) |
127 | 39 |
|
128 | 40 | run_concurrency_sweep(
|
129 | 41 | service_url=endpoint,
|
130 | 42 | model_name=model,
|
131 | 43 | isl=isl,
|
132 | 44 | osl=osl,
|
133 | 45 | stddev=std,
|
134 |
| - output_dir=Path(output_dir) / label, |
| 46 | + output_dir=output_dir / label, |
135 | 47 | )
|
136 | 48 | print("✅ Endpoint benchmark completed successfully!")
|
137 | 49 |
|
138 | 50 |
|
139 |
| -def print_final_summary(output_dir: str, deployed_types: List[str]) -> None: |
| 51 | +def print_final_summary(output_dir: Path, labels: List[str]) -> None: |
140 | 52 | """Print final benchmark summary"""
|
141 | 53 | print("📊 Generating performance plots...")
|
142 |
| - generate_plots( |
143 |
| - base_output_dir=Path(output_dir), output_dir=Path(output_dir) / "plots" |
144 |
| - ) |
145 |
| - print(f"📈 Plots saved to: {Path(output_dir) / 'plots'}") |
146 |
| - print(f"📋 Summary saved to: {Path(output_dir) / 'SUMMARY.txt'}") |
| 54 | + generate_plots(base_output_dir=output_dir, output_dir=output_dir / "plots") |
| 55 | + print(f"📈 Plots saved to: {output_dir / 'plots'}") |
| 56 | + print(f"📋 Summary saved to: {output_dir / 'plots' / 'SUMMARY.txt'}") |
147 | 57 |
|
148 | 58 | print()
|
149 | 59 | print("🎉 Benchmark workflow completed successfully!")
|
150 | 60 | print(f"📁 All results available at: {output_dir}")
|
151 | 61 |
|
152 |
| - if deployed_types: |
153 |
| - print(f"🚀 Benchmarked deployments: {', '.join(deployed_types)}") |
| 62 | + if labels: |
| 63 | + print(f"🚀 Benchmarked: {', '.join(labels)}") |
154 | 64 |
|
155 |
| - print(f"📊 View plots at: {Path(output_dir) / 'plots'}") |
| 65 | + print(f"📊 View plots at: {output_dir / 'plots'}") |
156 | 66 |
|
157 | 67 |
|
158 |
| -def categorize_inputs(inputs: Dict[str, str]) -> Tuple[Dict[str, str], Dict[str, str]]: |
159 |
| - """Categorize inputs into endpoints and manifests""" |
160 |
| - endpoints = {} |
161 |
| - manifests = {} |
162 |
| - |
163 |
| - for label, value in inputs.items(): |
164 |
| - # Validate reserved labels |
165 |
| - if label.lower() == "plots": |
166 |
| - raise ValueError( |
167 |
| - "Label 'plots' is reserved and cannot be used. Please choose a different label." |
168 |
| - ) |
169 |
| - |
170 |
| - if value.startswith(("http://", "https://")): |
171 |
| - endpoints[label] = value |
172 |
| - else: |
173 |
| - # It should be a file path - validate it exists |
174 |
| - if not Path(value).is_file(): |
175 |
| - raise FileNotFoundError( |
176 |
| - f"Manifest file not found for input '{label}': {value}" |
177 |
| - ) |
178 |
| - manifests[label] = value |
179 |
| - |
180 |
| - return endpoints, manifests |
181 |
| - |
182 |
| - |
183 |
| -def validate_dynamo_manifest(manifest_path: str) -> None: |
184 |
| - """Validate that the manifest is a DynamoGraphDeployment""" |
185 |
| - try: |
186 |
| - with open(manifest_path, "r") as f: |
187 |
| - content = f.read() |
188 |
| - |
189 |
| - # Check for DynamoGraphDeployment |
190 |
| - if "kind: DynamoGraphDeployment" not in content: |
191 |
| - raise ValueError( |
192 |
| - f"Manifest {manifest_path} is not a DynamoGraphDeployment. Only DynamoGraphDeployments are supported for deployment benchmarking." |
193 |
| - ) |
194 |
| - |
195 |
| - except FileNotFoundError: |
196 |
| - raise FileNotFoundError(f"Manifest file not found: {manifest_path}") |
197 |
| - except Exception as e: |
198 |
| - raise ValueError(f"Error reading manifest {manifest_path}: {e}") |
199 |
| - |
200 |
| - |
201 |
| -async def run_benchmark_workflow( |
202 |
| - namespace: str, |
| 68 | +def run_benchmark_workflow( |
203 | 69 | inputs: Dict[str, str],
|
204 |
| - isl: int = 200, |
| 70 | + isl: int = 2000, |
205 | 71 | std: int = 10,
|
206 |
| - osl: int = 200, |
207 |
| - model: str = "nvidia/Llama-3.1-8B-Instruct-FP8", |
| 72 | + osl: int = 256, |
| 73 | + model: str = "Qwen/Qwen3-0.6B", |
208 | 74 | output_dir: str = "benchmarks/results",
|
209 | 75 | ) -> None:
|
210 |
| - """Main benchmark workflow orchestrator with dynamic inputs""" |
211 |
| - Path(output_dir).mkdir(parents=True, exist_ok=True) |
212 |
| - |
213 |
| - # Categorize inputs into endpoints and manifests |
214 |
| - endpoints, manifests = categorize_inputs(inputs) |
| 76 | + """Main benchmark workflow orchestrator for HTTP endpoints only""" |
| 77 | + output_dir_path = Path(output_dir) |
| 78 | + output_dir_path.mkdir(parents=True, exist_ok=True) |
215 | 79 |
|
216 | 80 | # Run endpoint benchmarks
|
217 |
| - for label, endpoint in endpoints.items(): |
218 |
| - await run_endpoint_benchmark(label, endpoint, model, isl, osl, std, output_dir) |
219 |
| - |
220 |
| - # Create deployment configurations for manifests |
221 |
| - deployment_configs = [] |
222 |
| - |
223 |
| - for label, manifest_path in manifests.items(): |
224 |
| - # Validate that it's a DynamoGraphDeployment |
225 |
| - validate_dynamo_manifest(manifest_path) |
226 |
| - |
227 |
| - config = DeploymentConfig( |
228 |
| - name=label, |
229 |
| - manifest_path=manifest_path, |
230 |
| - output_subdir=label, |
231 |
| - client_factory=create_dynamo_client, |
232 |
| - deploy_func=deploy_dynamo_client, |
233 |
| - ) |
234 |
| - |
235 |
| - deployment_configs.append(config) |
236 |
| - |
237 |
| - # Run benchmarks for each deployment type |
238 |
| - deployed_labels = list(endpoints.keys()) |
239 |
| - for config in deployment_configs: |
240 |
| - await run_single_deployment_benchmark( |
241 |
| - config=config, |
242 |
| - namespace=namespace, |
243 |
| - output_dir=output_dir, |
244 |
| - model=model, |
245 |
| - isl=isl, |
246 |
| - osl=osl, |
247 |
| - std=std, |
248 |
| - ) |
249 |
| - deployed_labels.append(config.name) |
| 81 | + benchmarked_labels = [] |
| 82 | + for label, endpoint in inputs.items(): |
| 83 | + run_endpoint_benchmark(label, endpoint, model, isl, osl, std, output_dir_path) |
| 84 | + benchmarked_labels.append(label) |
250 | 85 |
|
251 | 86 | # Generate final summary
|
252 |
| - print_final_summary(output_dir, deployed_labels) |
| 87 | + print_final_summary(output_dir_path, benchmarked_labels) |
0 commit comments