Skip to content

Commit 6224574

Browse files
committed
add benchmarking
1 parent 3aee6d7 commit 6224574

File tree

27 files changed

+1203
-29
lines changed

27 files changed

+1203
-29
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
name: 'Aggregate Benchmark Results'
2+
description: 'Aggregates benchmark results from multiple matrix jobs and renders cross-comparison tables'
3+
inputs:
4+
results-dir:
5+
description: 'Directory containing benchmark result files'
6+
required: true
7+
default: '.'
8+
runs:
9+
using: 'composite'
10+
steps:
11+
- name: Aggregate and render benchmark comparison
12+
shell: bash
13+
run: |
14+
node ${{ github.action_path }}/aggregate.js "${{ inputs.results-dir }}" >> $GITHUB_STEP_SUMMARY
Lines changed: 365 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,365 @@
1+
#!/usr/bin/env node
2+
3+
const fs = require('fs');
4+
const path = require('path');
5+
6+
const [, , resultsDir = '.'] = process.argv;
7+
8+
// Backend display config
9+
const backendConfig = {
10+
local: { emoji: '💻', label: 'Local' },
11+
postgres: { emoji: '🐘', label: 'Postgres' },
12+
vercel: { emoji: '▲', label: 'Vercel' },
13+
};
14+
15+
// Framework display config
16+
const frameworkConfig = {
17+
'nextjs-turbopack': { label: 'Next.js (Turbopack)' },
18+
nitro: { label: 'Nitro' },
19+
express: { label: 'Express' },
20+
};
21+
22+
// Format milliseconds as seconds
23+
function formatSec(ms, decimals = 3) {
24+
return (ms / 1000).toFixed(decimals);
25+
}
26+
27+
// Find all benchmark result files
28+
function findBenchmarkFiles(dir) {
29+
const files = [];
30+
try {
31+
const entries = fs.readdirSync(dir, { withFileTypes: true });
32+
for (const entry of entries) {
33+
const fullPath = path.join(dir, entry.name);
34+
if (entry.isDirectory()) {
35+
files.push(...findBenchmarkFiles(fullPath));
36+
} else if (
37+
entry.name.startsWith('bench-results-') &&
38+
entry.name.endsWith('.json')
39+
) {
40+
files.push(fullPath);
41+
}
42+
}
43+
} catch (e) {
44+
console.error(`Warning: Could not read directory ${dir}: ${e.message}`);
45+
}
46+
return files;
47+
}
48+
49+
// Parse filename to extract app and backend
50+
function parseFilename(filename) {
51+
// Format: bench-results-{app}-{backend}.json
52+
const match = filename.match(/bench-results-(.+)-(\w+)\.json$/);
53+
if (!match) return null;
54+
return { app: match[1], backend: match[2] };
55+
}
56+
57+
// Load timing data for a benchmark file
58+
function loadTimingData(benchmarkFile) {
59+
// Only replace filename, not directory name
60+
const timingFilename = path
61+
.basename(benchmarkFile)
62+
.replace('bench-results-', 'bench-timings-');
63+
const timingFile = path.join(path.dirname(benchmarkFile), timingFilename);
64+
if (fs.existsSync(timingFile)) {
65+
try {
66+
return JSON.parse(fs.readFileSync(timingFile, 'utf-8'));
67+
} catch (e) {
68+
console.error(
69+
`Warning: Could not parse timing file ${timingFile}: ${e.message}`
70+
);
71+
}
72+
}
73+
return null;
74+
}
75+
76+
// Collect all benchmark data
77+
function collectBenchmarkData(resultFiles) {
78+
// Structure: { [benchmarkName]: { [app]: { [backend]: { wallTime, workflowTime, overhead, min, max, samples } } } }
79+
const data = {};
80+
81+
for (const file of resultFiles) {
82+
const parsed = parseFilename(path.basename(file));
83+
if (!parsed) continue;
84+
85+
const { app, backend } = parsed;
86+
87+
try {
88+
const results = JSON.parse(fs.readFileSync(file, 'utf-8'));
89+
const timings = loadTimingData(file);
90+
91+
for (const fileData of results.files || []) {
92+
for (const group of fileData.groups || []) {
93+
for (const bench of group.benchmarks || []) {
94+
// Skip benchmarks without valid timing data (failed or timed out)
95+
if (bench.mean === undefined || bench.mean === null) {
96+
continue;
97+
}
98+
99+
const benchName = bench.name;
100+
101+
if (!data[benchName]) {
102+
data[benchName] = {};
103+
}
104+
if (!data[benchName][app]) {
105+
data[benchName][app] = {};
106+
}
107+
108+
// Get workflow timing if available
109+
let workflowTimeMs = null;
110+
if (timings?.summary?.[benchName]) {
111+
workflowTimeMs = timings.summary[benchName].avgExecutionTimeMs;
112+
}
113+
114+
data[benchName][app][backend] = {
115+
wallTime: bench.mean,
116+
workflowTime: workflowTimeMs,
117+
overhead:
118+
workflowTimeMs !== null ? bench.mean - workflowTimeMs : null,
119+
min: bench.min,
120+
max: bench.max,
121+
samples: bench.sampleCount,
122+
};
123+
}
124+
}
125+
}
126+
} catch (e) {
127+
console.error(
128+
`Warning: Could not parse benchmark file ${file}: ${e.message}`
129+
);
130+
}
131+
}
132+
133+
return data;
134+
}
135+
136+
// Get all apps and backends from the data
137+
function getAppsAndBackends(data) {
138+
const apps = new Set();
139+
const backends = new Set();
140+
141+
for (const benchData of Object.values(data)) {
142+
for (const app of Object.keys(benchData)) {
143+
apps.add(app);
144+
for (const backend of Object.keys(benchData[app])) {
145+
backends.add(backend);
146+
}
147+
}
148+
}
149+
150+
// Sort: local, postgres, vercel for backends
151+
const backendOrder = ['local', 'postgres', 'vercel'];
152+
const sortedBackends = [...backends].sort(
153+
(a, b) => backendOrder.indexOf(a) - backendOrder.indexOf(b)
154+
);
155+
156+
// Sort apps alphabetically
157+
const sortedApps = [...apps].sort();
158+
159+
return { apps: sortedApps, backends: sortedBackends };
160+
}
161+
162+
// Render the comparison tables
163+
function renderComparison(data) {
164+
const { apps, backends } = getAppsAndBackends(data);
165+
166+
if (Object.keys(data).length === 0) {
167+
console.log('No benchmark data found.\n');
168+
return;
169+
}
170+
171+
console.log('# 📊 Benchmark Comparison\n');
172+
console.log(
173+
'Cross-matrix comparison of workflow performance across frameworks and backends.\n'
174+
);
175+
176+
// For each benchmark, create a comparison table
177+
for (const [benchName, benchData] of Object.entries(data)) {
178+
console.log(`## ${benchName}\n`);
179+
180+
// Collect all data points with their wall times for ranking
181+
const dataPoints = [];
182+
for (const app of apps) {
183+
for (const backend of backends) {
184+
const metrics = benchData[app]?.[backend];
185+
if (metrics) {
186+
dataPoints.push({ app, backend, metrics });
187+
}
188+
}
189+
}
190+
191+
if (dataPoints.length === 0) {
192+
console.log('_No data available_\n');
193+
continue;
194+
}
195+
196+
// Sort by workflow time (primary metric), fall back to wall time if workflow time unavailable
197+
dataPoints.sort((a, b) => {
198+
const aTime = a.metrics.workflowTime ?? a.metrics.wallTime;
199+
const bTime = b.metrics.workflowTime ?? b.metrics.wallTime;
200+
return aTime - bTime;
201+
});
202+
const fastest = dataPoints[0];
203+
const fastestTime =
204+
fastest.metrics.workflowTime ?? fastest.metrics.wallTime;
205+
206+
// Render table - Workflow Time is primary metric
207+
console.log(
208+
'| Backend | Framework | Workflow Time | Wall Time | Overhead | vs Fastest |'
209+
);
210+
console.log(
211+
'|:--------|:----------|--------------:|----------:|---------:|-----------:|'
212+
);
213+
214+
for (const { app, backend, metrics } of dataPoints) {
215+
const backendInfo = backendConfig[backend] || {
216+
emoji: '',
217+
label: backend,
218+
};
219+
const frameworkInfo = frameworkConfig[app] || { label: app };
220+
221+
const isFastest = metrics === fastest.metrics;
222+
const medal = isFastest ? '🥇 ' : '';
223+
224+
const workflowTimeSec =
225+
metrics.workflowTime !== null ? formatSec(metrics.workflowTime) : '-';
226+
const wallTimeSec = formatSec(metrics.wallTime);
227+
const overheadSec =
228+
metrics.overhead !== null ? formatSec(metrics.overhead) : '-';
229+
230+
const currentTime = metrics.workflowTime ?? metrics.wallTime;
231+
const factor = isFastest
232+
? '1.00x'
233+
: `${(currentTime / fastestTime).toFixed(2)}x`;
234+
235+
console.log(
236+
`| ${backendInfo.emoji} ${backendInfo.label} | ${medal}${frameworkInfo.label} | ${workflowTimeSec}s | ${wallTimeSec}s | ${overheadSec}s | ${factor} |`
237+
);
238+
}
239+
console.log('');
240+
}
241+
242+
// Summary: Best framework per backend (by Workflow Time)
243+
console.log('## Summary: Fastest Framework by Backend\n');
244+
console.log('| Backend | Fastest Framework | Workflow Time |');
245+
console.log('|:--------|:------------------|---------------:|');
246+
247+
for (const backend of backends) {
248+
const backendInfo = backendConfig[backend] || { emoji: '', label: backend };
249+
let fastestApp = null;
250+
let fastestTime = Infinity;
251+
252+
// Average workflow time across all benchmarks for this backend
253+
const appTotals = {};
254+
const appCounts = {};
255+
256+
for (const benchData of Object.values(data)) {
257+
for (const app of apps) {
258+
const metrics = benchData[app]?.[backend];
259+
if (metrics) {
260+
const time = metrics.workflowTime ?? metrics.wallTime;
261+
appTotals[app] = (appTotals[app] || 0) + time;
262+
appCounts[app] = (appCounts[app] || 0) + 1;
263+
}
264+
}
265+
}
266+
267+
for (const app of apps) {
268+
if (appCounts[app] > 0) {
269+
const avgTime = appTotals[app] / appCounts[app];
270+
if (avgTime < fastestTime) {
271+
fastestTime = avgTime;
272+
fastestApp = app;
273+
}
274+
}
275+
}
276+
277+
if (fastestApp) {
278+
const frameworkInfo = frameworkConfig[fastestApp] || {
279+
label: fastestApp,
280+
};
281+
console.log(
282+
`| ${backendInfo.emoji} ${backendInfo.label} | ${frameworkInfo.label} | ${formatSec(fastestTime)}s (avg) |`
283+
);
284+
}
285+
}
286+
console.log('');
287+
288+
// Summary: Best backend per framework (by Workflow Time)
289+
console.log('## Summary: Fastest Backend by Framework\n');
290+
console.log('| Framework | Fastest Backend | Workflow Time |');
291+
console.log('|:----------|:----------------|---------------:|');
292+
293+
for (const app of apps) {
294+
const frameworkInfo = frameworkConfig[app] || { label: app };
295+
let fastestBackend = null;
296+
let fastestTime = Infinity;
297+
298+
// Average workflow time across all benchmarks for this app
299+
const backendTotals = {};
300+
const backendCounts = {};
301+
302+
for (const benchData of Object.values(data)) {
303+
for (const backend of backends) {
304+
const metrics = benchData[app]?.[backend];
305+
if (metrics) {
306+
const time = metrics.workflowTime ?? metrics.wallTime;
307+
backendTotals[backend] = (backendTotals[backend] || 0) + time;
308+
backendCounts[backend] = (backendCounts[backend] || 0) + 1;
309+
}
310+
}
311+
}
312+
313+
for (const backend of backends) {
314+
if (backendCounts[backend] > 0) {
315+
const avgTime = backendTotals[backend] / backendCounts[backend];
316+
if (avgTime < fastestTime) {
317+
fastestTime = avgTime;
318+
fastestBackend = backend;
319+
}
320+
}
321+
}
322+
323+
if (fastestBackend) {
324+
const backendInfo = backendConfig[fastestBackend] || {
325+
emoji: '',
326+
label: fastestBackend,
327+
};
328+
console.log(
329+
`| ${frameworkInfo.label} | ${backendInfo.emoji} ${backendInfo.label} | ${formatSec(fastestTime)}s (avg) |`
330+
);
331+
}
332+
}
333+
console.log('');
334+
335+
// Legend
336+
console.log('<details>');
337+
console.log('<summary>Column Definitions</summary>\n');
338+
console.log(
339+
'- **Workflow Time**: Runtime reported by workflow (completedAt - createdAt) - *primary metric*'
340+
);
341+
console.log(
342+
'- **Wall Time**: Total testbench time (trigger workflow + poll for result)'
343+
);
344+
console.log('- **Overhead**: Testbench overhead (Wall Time - Workflow Time)');
345+
console.log(
346+
'- **vs Fastest**: How much slower compared to the fastest configuration for this benchmark'
347+
);
348+
console.log('');
349+
console.log('**Backends:**');
350+
console.log('- 💻 Local: In-memory filesystem backend');
351+
console.log('- 🐘 Postgres: PostgreSQL database backend');
352+
console.log('- ▲ Vercel: Vercel production backend');
353+
console.log('</details>');
354+
}
355+
356+
// Main
357+
const resultFiles = findBenchmarkFiles(resultsDir);
358+
359+
if (resultFiles.length === 0) {
360+
console.log('No benchmark result files found in', resultsDir);
361+
process.exit(0);
362+
}
363+
364+
const data = collectBenchmarkData(resultFiles);
365+
renderComparison(data);

0 commit comments

Comments
 (0)