Skip to content

Commit 20b7a8a

Browse files
feat: remove kubectl dependencies from benchmarking (#3098)
Signed-off-by: Hannah Zhang <[email protected]> Signed-off-by: hhzhang16 <[email protected]> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
1 parent 3b6dbef commit 20b7a8a

File tree

4 files changed

+80
-285
lines changed

4 files changed

+80
-285
lines changed

benchmarks/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ First, deploy your DynamoGraphDeployment using the [deployment documentation](..
2424

2525
```bash
2626
# Port-forward your deployment to http://localhost:8000
27-
kubectl port-forward -n <namespace> svc/<frontend-service-name> 8000:8000 &
27+
kubectl port-forward -n <namespace> svc/<frontend-service-name> 8000:8000 > /dev/null 2>&1 &
2828

2929
# Run benchmark
30-
python3 -m benchmarks.utils.benchmark --namespace <namespace> \
30+
python3 -m benchmarks.utils.benchmark \
3131
--input my-benchmark=http://localhost:8000 \
3232
--model "<your-model>"
3333

benchmarks/utils/benchmark.py

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,39 @@
44
# SPDX-License-Identifier: Apache-2.0
55

66
import argparse
7-
import asyncio
7+
import re
88
import sys
9-
from typing import Tuple
9+
from typing import Dict, Tuple
1010

11-
from benchmarks.utils.workflow import categorize_inputs, run_benchmark_workflow
11+
from benchmarks.utils.workflow import run_benchmark_workflow
12+
13+
14+
def validate_inputs(inputs: Dict[str, str]) -> None:
15+
"""Validate that all inputs are HTTP endpoints"""
16+
for label, value in inputs.items():
17+
if not value.lower().startswith(("http://", "https://")):
18+
raise ValueError(
19+
f"Input '{label}' must be an HTTP endpoint (starting with http:// or https://). Got: {value}"
20+
)
21+
22+
# Validate reserved labels
23+
if label.lower() == "plots":
24+
raise ValueError(
25+
"Label 'plots' is reserved and cannot be used. Please choose a different label."
26+
)
1227

1328

1429
def parse_input(input_str: str) -> Tuple[str, str]:
1530
"""Parse input string in format key=value with additional validation"""
1631
if "=" not in input_str:
1732
raise ValueError(
18-
f"Invalid input format. Expected: <label>=<manifest_path_or_endpoint>, got: {input_str}"
33+
f"Invalid input format. Expected: <label>=<endpoint>, got: {input_str}"
1934
)
2035

2136
parts = input_str.split("=", 1) # Split on first '=' only
2237
if len(parts) != 2:
2338
raise ValueError(
24-
f"Invalid input format. Expected: <label>=<manifest_path_or_endpoint>, got: {input_str}"
39+
f"Invalid input format. Expected: <label>=<endpoint>, got: {input_str}"
2540
)
2641

2742
label, value = parts
@@ -35,8 +50,6 @@ def parse_input(input_str: str) -> Tuple[str, str]:
3550
value = value.strip()
3651

3752
# Validate label characters
38-
import re
39-
4053
if not re.match(r"^[a-zA-Z0-9_-]+$", label):
4154
raise ValueError(
4255
f"Label must contain only letters, numbers, hyphens, and underscores. Invalid label: {label}"
@@ -51,9 +64,8 @@ def main() -> int:
5164
"--input",
5265
action="append",
5366
dest="inputs",
54-
help="Input in format <label>=<manifest_path_or_endpoint>. Can be specified multiple times for comparisons.",
67+
help="Input in format <label>=<endpoint>. Can be specified multiple times for comparisons.",
5568
)
56-
parser.add_argument("--namespace", required=True, help="Kubernetes namespace")
5769
parser.add_argument("--isl", type=int, default=2000, help="Input sequence length")
5870
parser.add_argument(
5971
"--std",
@@ -102,23 +114,21 @@ def main() -> int:
102114
)
103115
print()
104116

105-
endpoints, manifests = categorize_inputs(parsed_inputs)
117+
# Validate that all inputs are HTTP endpoints
118+
validate_inputs(parsed_inputs)
106119

107-
except (ValueError, FileNotFoundError) as e:
120+
except ValueError as e:
108121
print(f"ERROR: {e}")
109122
return 1
110123

111124
# Run the benchmark workflow with the parsed inputs
112-
asyncio.run(
113-
run_benchmark_workflow(
114-
namespace=args.namespace,
115-
inputs=parsed_inputs,
116-
isl=args.isl,
117-
std=args.std,
118-
osl=args.osl,
119-
model=args.model,
120-
output_dir=args.output_dir,
121-
)
125+
run_benchmark_workflow(
126+
inputs=parsed_inputs,
127+
isl=args.isl,
128+
std=args.std,
129+
osl=args.osl,
130+
model=args.model,
131+
output_dir=args.output_dir,
122132
)
123133
return 0
124134

benchmarks/utils/workflow.py

Lines changed: 30 additions & 195 deletions
Original file line numberDiff line numberDiff line change
@@ -1,252 +1,87 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

4-
from dataclasses import dataclass
54
from pathlib import Path
6-
from typing import Callable, Dict, List, Tuple
5+
from typing import Dict, List
76

87
from benchmarks.utils.genai import run_concurrency_sweep
98
from benchmarks.utils.plot import generate_plots
10-
from deploy.utils.dynamo_deployment import DynamoDeploymentClient
11-
12-
13-
@dataclass
14-
class DeploymentConfig:
15-
"""Configuration for a single deployment type"""
16-
17-
name: str # Human-readable name (e.g., "aggregated")
18-
manifest_path: str # Path to deployment manifest
19-
output_subdir: str # Subdirectory name for results (e.g., "agg")
20-
client_factory: Callable # Function to create the client
21-
deploy_func: Callable # Function to deploy the client
22-
23-
24-
def create_dynamo_client(
25-
namespace: str, deployment_name: str
26-
) -> DynamoDeploymentClient:
27-
"""Factory function for DynamoDeploymentClient"""
28-
return DynamoDeploymentClient(namespace=namespace, deployment_name=deployment_name)
29-
30-
31-
async def deploy_dynamo_client(
32-
client: DynamoDeploymentClient, manifest_path: str
33-
) -> None:
34-
"""Deploy a DynamoDeploymentClient"""
35-
await client.create_deployment(manifest_path)
36-
await client.wait_for_deployment_ready(timeout=1800)
37-
38-
39-
async def teardown(client) -> None:
40-
"""Clean up deployment and stop port forwarding"""
41-
try:
42-
if hasattr(client, "stop_port_forward"):
43-
client.stop_port_forward()
44-
await client.delete_deployment()
45-
except Exception:
46-
pass
47-
48-
49-
def print_deployment_start(config: DeploymentConfig, output_dir: str) -> None:
50-
"""Print deployment start messages"""
51-
print(f"🚀 Starting {config.name} deployment benchmark...")
52-
print(f"📄 Manifest: {config.manifest_path}")
53-
print(f"📁 Results will be saved to: {Path(output_dir) / config.output_subdir}")
549

5510

5611
def print_concurrency_start(
57-
deployment_name: str, model: str, isl: int, osl: int, std: int
12+
label: str, model: str, isl: int, osl: int, std: int
5813
) -> None:
5914
"""Print concurrency sweep start messages"""
60-
print(f"⚙️ Starting {deployment_name} concurrency sweep!", flush=True)
15+
print(f"⚙️ Starting {label} concurrency sweep!", flush=True)
6116
print(
6217
"⏱️ This may take several minutes - running through multiple concurrency levels...",
6318
flush=True,
6419
)
6520
print(f"🎯 Model: {model} | ISL: {isl} | OSL: {osl} | StdDev: {std}")
6621

6722

68-
def print_deployment_complete(config: DeploymentConfig) -> None:
69-
"""Print deployment completion message"""
70-
print(f"✅ {config.name.title()} deployment benchmark completed successfully!")
71-
72-
73-
def print_deployment_skip(deployment_type: str) -> None:
74-
"""Print deployment skip message"""
75-
print(f"⏭️ Skipping {deployment_type} deployment (not specified)")
76-
77-
78-
async def run_single_deployment_benchmark(
79-
config: DeploymentConfig,
80-
namespace: str,
81-
output_dir: str,
82-
model: str,
83-
isl: int,
84-
osl: int,
85-
std: int,
86-
) -> None:
87-
"""Run benchmark for a single deployment type"""
88-
print_deployment_start(config, output_dir)
89-
90-
# Create and deploy client
91-
client = config.client_factory(namespace, config.output_subdir)
92-
await config.deploy_func(client, config.manifest_path)
93-
94-
try:
95-
print_concurrency_start(config.name, model, isl, osl, std)
96-
97-
# Run concurrency sweep
98-
(Path(output_dir) / config.output_subdir).mkdir(parents=True, exist_ok=True)
99-
run_concurrency_sweep(
100-
service_url=client.port_forward_frontend(quiet=True),
101-
model_name=model,
102-
isl=isl,
103-
osl=osl,
104-
stddev=std,
105-
output_dir=Path(output_dir) / config.output_subdir,
106-
)
107-
108-
finally:
109-
await teardown(client)
110-
111-
print_deployment_complete(config)
112-
113-
114-
async def run_endpoint_benchmark(
23+
def run_endpoint_benchmark(
11524
label: str,
11625
endpoint: str,
11726
model: str,
11827
isl: int,
11928
osl: int,
12029
std: int,
121-
output_dir: str,
30+
output_dir: Path,
12231
) -> None:
12332
"""Run benchmark for an existing endpoint with custom label"""
12433
print(f"🚀 Starting benchmark of endpoint '{label}': {endpoint}")
125-
print(f"📁 Results will be saved to: {Path(output_dir) / label}")
126-
print_concurrency_start(f"endpoint ({label})", model, isl, osl, std)
34+
print(f"📁 Results will be saved to: {output_dir / label}")
35+
print_concurrency_start(label, model, isl, osl, std)
36+
37+
# Create output directory
38+
(output_dir / label).mkdir(parents=True, exist_ok=True)
12739

12840
run_concurrency_sweep(
12941
service_url=endpoint,
13042
model_name=model,
13143
isl=isl,
13244
osl=osl,
13345
stddev=std,
134-
output_dir=Path(output_dir) / label,
46+
output_dir=output_dir / label,
13547
)
13648
print("✅ Endpoint benchmark completed successfully!")
13749

13850

139-
def print_final_summary(output_dir: str, deployed_types: List[str]) -> None:
51+
def print_final_summary(output_dir: Path, labels: List[str]) -> None:
14052
"""Print final benchmark summary"""
14153
print("📊 Generating performance plots...")
142-
generate_plots(
143-
base_output_dir=Path(output_dir), output_dir=Path(output_dir) / "plots"
144-
)
145-
print(f"📈 Plots saved to: {Path(output_dir) / 'plots'}")
146-
print(f"📋 Summary saved to: {Path(output_dir) / 'SUMMARY.txt'}")
54+
generate_plots(base_output_dir=output_dir, output_dir=output_dir / "plots")
55+
print(f"📈 Plots saved to: {output_dir / 'plots'}")
56+
print(f"📋 Summary saved to: {output_dir / 'plots' / 'SUMMARY.txt'}")
14757

14858
print()
14959
print("🎉 Benchmark workflow completed successfully!")
15060
print(f"📁 All results available at: {output_dir}")
15161

152-
if deployed_types:
153-
print(f"🚀 Benchmarked deployments: {', '.join(deployed_types)}")
62+
if labels:
63+
print(f"🚀 Benchmarked: {', '.join(labels)}")
15464

155-
print(f"📊 View plots at: {Path(output_dir) / 'plots'}")
65+
print(f"📊 View plots at: {output_dir / 'plots'}")
15666

15767

158-
def categorize_inputs(inputs: Dict[str, str]) -> Tuple[Dict[str, str], Dict[str, str]]:
159-
"""Categorize inputs into endpoints and manifests"""
160-
endpoints = {}
161-
manifests = {}
162-
163-
for label, value in inputs.items():
164-
# Validate reserved labels
165-
if label.lower() == "plots":
166-
raise ValueError(
167-
"Label 'plots' is reserved and cannot be used. Please choose a different label."
168-
)
169-
170-
if value.startswith(("http://", "https://")):
171-
endpoints[label] = value
172-
else:
173-
# It should be a file path - validate it exists
174-
if not Path(value).is_file():
175-
raise FileNotFoundError(
176-
f"Manifest file not found for input '{label}': {value}"
177-
)
178-
manifests[label] = value
179-
180-
return endpoints, manifests
181-
182-
183-
def validate_dynamo_manifest(manifest_path: str) -> None:
184-
"""Validate that the manifest is a DynamoGraphDeployment"""
185-
try:
186-
with open(manifest_path, "r") as f:
187-
content = f.read()
188-
189-
# Check for DynamoGraphDeployment
190-
if "kind: DynamoGraphDeployment" not in content:
191-
raise ValueError(
192-
f"Manifest {manifest_path} is not a DynamoGraphDeployment. Only DynamoGraphDeployments are supported for deployment benchmarking."
193-
)
194-
195-
except FileNotFoundError:
196-
raise FileNotFoundError(f"Manifest file not found: {manifest_path}")
197-
except Exception as e:
198-
raise ValueError(f"Error reading manifest {manifest_path}: {e}")
199-
200-
201-
async def run_benchmark_workflow(
202-
namespace: str,
68+
def run_benchmark_workflow(
20369
inputs: Dict[str, str],
204-
isl: int = 200,
70+
isl: int = 2000,
20571
std: int = 10,
206-
osl: int = 200,
207-
model: str = "nvidia/Llama-3.1-8B-Instruct-FP8",
72+
osl: int = 256,
73+
model: str = "Qwen/Qwen3-0.6B",
20874
output_dir: str = "benchmarks/results",
20975
) -> None:
210-
"""Main benchmark workflow orchestrator with dynamic inputs"""
211-
Path(output_dir).mkdir(parents=True, exist_ok=True)
212-
213-
# Categorize inputs into endpoints and manifests
214-
endpoints, manifests = categorize_inputs(inputs)
76+
"""Main benchmark workflow orchestrator for HTTP endpoints only"""
77+
output_dir_path = Path(output_dir)
78+
output_dir_path.mkdir(parents=True, exist_ok=True)
21579

21680
# Run endpoint benchmarks
217-
for label, endpoint in endpoints.items():
218-
await run_endpoint_benchmark(label, endpoint, model, isl, osl, std, output_dir)
219-
220-
# Create deployment configurations for manifests
221-
deployment_configs = []
222-
223-
for label, manifest_path in manifests.items():
224-
# Validate that it's a DynamoGraphDeployment
225-
validate_dynamo_manifest(manifest_path)
226-
227-
config = DeploymentConfig(
228-
name=label,
229-
manifest_path=manifest_path,
230-
output_subdir=label,
231-
client_factory=create_dynamo_client,
232-
deploy_func=deploy_dynamo_client,
233-
)
234-
235-
deployment_configs.append(config)
236-
237-
# Run benchmarks for each deployment type
238-
deployed_labels = list(endpoints.keys())
239-
for config in deployment_configs:
240-
await run_single_deployment_benchmark(
241-
config=config,
242-
namespace=namespace,
243-
output_dir=output_dir,
244-
model=model,
245-
isl=isl,
246-
osl=osl,
247-
std=std,
248-
)
249-
deployed_labels.append(config.name)
81+
benchmarked_labels = []
82+
for label, endpoint in inputs.items():
83+
run_endpoint_benchmark(label, endpoint, model, isl, osl, std, output_dir_path)
84+
benchmarked_labels.append(label)
25085

25186
# Generate final summary
252-
print_final_summary(output_dir, deployed_labels)
87+
print_final_summary(output_dir_path, benchmarked_labels)

0 commit comments

Comments
 (0)