diff --git a/.github/actions/render-benchmarks/action.yml b/.github/actions/render-benchmarks/action.yml new file mode 100644 index 000000000..59983bc93 --- /dev/null +++ b/.github/actions/render-benchmarks/action.yml @@ -0,0 +1,19 @@ +name: 'Render Benchmark Results' +description: 'Renders vitest benchmark results as markdown in GitHub step summary' +inputs: + benchmark-file: + description: 'Path to the vitest benchmark JSON output file' + required: true + app-name: + description: 'Name of the app being benchmarked' + required: true + backend: + description: 'Backend type (local, postgres, vercel)' + required: true +runs: + using: 'composite' + steps: + - name: Render benchmark results + shell: bash + run: | + node ${{ github.action_path }}/render.js "${{ inputs.benchmark-file }}" "${{ inputs.app-name }}" "${{ inputs.backend }}" >> $GITHUB_STEP_SUMMARY diff --git a/.github/actions/render-benchmarks/render.js b/.github/actions/render-benchmarks/render.js new file mode 100644 index 000000000..8861ad493 --- /dev/null +++ b/.github/actions/render-benchmarks/render.js @@ -0,0 +1,123 @@ +#!/usr/bin/env node + +const fs = require('fs'); + +const [, , benchmarkFile, appName, backend] = process.argv; + +if (!benchmarkFile || !appName || !backend) { + console.error('Usage: render.js '); + process.exit(1); +} + +const path = require('path'); + +// Try to load workflow timing data +let workflowTimings = null; +// Only replace filename, not directory name +const timingFilename = path + .basename(benchmarkFile) + .replace('bench-results-', 'bench-timings-'); +const timingFile = path.join(path.dirname(benchmarkFile), timingFilename); +if (fs.existsSync(timingFile)) { + try { + workflowTimings = JSON.parse(fs.readFileSync(timingFile, 'utf-8')); + } catch (e) { + console.error( + `Warning: Could not parse timing file ${timingFile}: ${e.message}` + ); + } +} + +// Format number with consistent width +function formatSec(ms, decimals = 3) { + return (ms / 1000).toFixed(decimals); +} + +// Get backend emoji +function getBackendEmoji(backend) { + switch (backend) { + case 'vercel': + return '▲'; + case 'postgres': + return '🐘'; + case 'local': + return '💻'; + default: + return ''; + } +} + +try { + const data = JSON.parse(fs.readFileSync(benchmarkFile, 'utf-8')); + + const emoji = getBackendEmoji(backend); + console.log(`## ${emoji} Benchmark Results: ${appName} (${backend})\n`); + + for (const file of data.files) { + for (const group of file.groups) { + // Workflow Time is primary metric, Wall Time is secondary + console.log( + '| Benchmark | Workflow Time (avg) | Min | Max | Wall Time | Overhead | Samples |' + ); + console.log( + '|:----------|--------------------:|----:|----:|----------:|---------:|--------:|' + ); + + for (const bench of group.benchmarks) { + // Skip benchmarks without valid timing data (failed or timed out) + if (bench.mean === undefined || bench.mean === null) { + console.log(`| ${bench.name} | ⚠️ No data | - | - | - | - | 0 |`); + continue; + } + + const wallTimeSec = formatSec(bench.mean); + + // Get workflow execution time if available + let workflowTimeSec = '-'; + let minTimeSec = '-'; + let maxTimeSec = '-'; + let overheadSec = '-'; + + if (workflowTimings?.summary?.[bench.name]) { + const summary = workflowTimings.summary[bench.name]; + workflowTimeSec = formatSec(summary.avgExecutionTimeMs); + + // Get min/max if available + if (summary.minExecutionTimeMs !== undefined) { + minTimeSec = formatSec(summary.minExecutionTimeMs); + } + if (summary.maxExecutionTimeMs !== undefined) { + maxTimeSec = formatSec(summary.maxExecutionTimeMs); + } + + // Calculate overhead (wall time - workflow time) + const overheadMs = bench.mean - summary.avgExecutionTimeMs; + overheadSec = formatSec(overheadMs); + } + + console.log( + `| ${bench.name} | ${workflowTimeSec}s | ${minTimeSec}s | ${maxTimeSec}s | ${wallTimeSec}s | ${overheadSec}s | ${bench.sampleCount} |` + ); + } + console.log(''); + } + } + + // Add legend + console.log('
'); + console.log('Column Definitions\n'); + console.log( + '- **Workflow Time (avg)**: Average runtime reported by workflow (completedAt - createdAt)' + ); + console.log('- **Min**: Minimum workflow execution time across all samples'); + console.log('- **Max**: Maximum workflow execution time across all samples'); + console.log( + '- **Wall Time**: Total testbench time (trigger workflow + poll for result)' + ); + console.log('- **Overhead**: Testbench overhead (Wall Time - Workflow Time)'); + console.log('- **Samples**: Number of benchmark iterations run'); + console.log('
'); +} catch (error) { + console.error(`Error rendering benchmark results: ${error.message}`); + process.exit(1); +} diff --git a/.github/scripts/aggregate-benchmarks.js b/.github/scripts/aggregate-benchmarks.js new file mode 100644 index 000000000..23881f438 --- /dev/null +++ b/.github/scripts/aggregate-benchmarks.js @@ -0,0 +1,365 @@ +#!/usr/bin/env node + +const fs = require('fs'); +const path = require('path'); + +const [, , resultsDir = '.'] = process.argv; + +// Backend display config +const backendConfig = { + local: { emoji: '💻', label: 'Local' }, + postgres: { emoji: '🐘', label: 'Postgres' }, + vercel: { emoji: '▲', label: 'Vercel' }, +}; + +// Framework display config +const frameworkConfig = { + 'nextjs-turbopack': { label: 'Next.js (Turbopack)' }, + nitro: { label: 'Nitro' }, + express: { label: 'Express' }, +}; + +// Format milliseconds as seconds +function formatSec(ms, decimals = 3) { + return (ms / 1000).toFixed(decimals); +} + +// Find all benchmark result files +function findBenchmarkFiles(dir) { + const files = []; + try { + const entries = fs.readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + files.push(...findBenchmarkFiles(fullPath)); + } else if ( + entry.name.startsWith('bench-results-') && + entry.name.endsWith('.json') + ) { + files.push(fullPath); + } + } + } catch (e) { + console.error(`Warning: Could not read directory ${dir}: ${e.message}`); + } + return files; +} + +// Parse filename to extract app and backend +function parseFilename(filename) { + // Format: bench-results-{app}-{backend}.json + const match = filename.match(/bench-results-(.+)-(\w+)\.json$/); + if (!match) return null; + return { app: match[1], backend: match[2] }; +} + +// Load timing data for a benchmark file +function loadTimingData(benchmarkFile) { + // Only replace filename, not directory name + const timingFilename = path + .basename(benchmarkFile) + .replace('bench-results-', 'bench-timings-'); + const timingFile = path.join(path.dirname(benchmarkFile), timingFilename); + if (fs.existsSync(timingFile)) { + try { + return JSON.parse(fs.readFileSync(timingFile, 'utf-8')); + } catch (e) { + console.error( + `Warning: Could not parse timing file ${timingFile}: ${e.message}` + ); + } + } + return null; +} + +// Collect all benchmark data +function collectBenchmarkData(resultFiles) { + // Structure: { [benchmarkName]: { [app]: { [backend]: { wallTime, workflowTime, overhead, min, max, samples } } } } + const data = {}; + + for (const file of resultFiles) { + const parsed = parseFilename(path.basename(file)); + if (!parsed) continue; + + const { app, backend } = parsed; + + try { + const results = JSON.parse(fs.readFileSync(file, 'utf-8')); + const timings = loadTimingData(file); + + for (const fileData of results.files || []) { + for (const group of fileData.groups || []) { + for (const bench of group.benchmarks || []) { + // Skip benchmarks without valid timing data (failed or timed out) + if (bench.mean === undefined || bench.mean === null) { + continue; + } + + const benchName = bench.name; + + if (!data[benchName]) { + data[benchName] = {}; + } + if (!data[benchName][app]) { + data[benchName][app] = {}; + } + + // Get workflow timing if available + let workflowTimeMs = null; + if (timings?.summary?.[benchName]) { + workflowTimeMs = timings.summary[benchName].avgExecutionTimeMs; + } + + data[benchName][app][backend] = { + wallTime: bench.mean, + workflowTime: workflowTimeMs, + overhead: + workflowTimeMs !== null ? bench.mean - workflowTimeMs : null, + min: bench.min, + max: bench.max, + samples: bench.sampleCount, + }; + } + } + } + } catch (e) { + console.error( + `Warning: Could not parse benchmark file ${file}: ${e.message}` + ); + } + } + + return data; +} + +// Get all apps and backends from the data +function getAppsAndBackends(data) { + const apps = new Set(); + const backends = new Set(); + + for (const benchData of Object.values(data)) { + for (const app of Object.keys(benchData)) { + apps.add(app); + for (const backend of Object.keys(benchData[app])) { + backends.add(backend); + } + } + } + + // Sort: local, postgres, vercel for backends + const backendOrder = ['local', 'postgres', 'vercel']; + const sortedBackends = [...backends].sort( + (a, b) => backendOrder.indexOf(a) - backendOrder.indexOf(b) + ); + + // Sort apps alphabetically + const sortedApps = [...apps].sort(); + + return { apps: sortedApps, backends: sortedBackends }; +} + +// Render the comparison tables +function renderComparison(data) { + const { apps, backends } = getAppsAndBackends(data); + + if (Object.keys(data).length === 0) { + console.log('No benchmark data found.\n'); + return; + } + + console.log('# 📊 Benchmark Comparison\n'); + console.log( + 'Cross-matrix comparison of workflow performance across frameworks and backends.\n' + ); + + // For each benchmark, create a comparison table + for (const [benchName, benchData] of Object.entries(data)) { + console.log(`## ${benchName}\n`); + + // Collect all data points with their wall times for ranking + const dataPoints = []; + for (const app of apps) { + for (const backend of backends) { + const metrics = benchData[app]?.[backend]; + if (metrics) { + dataPoints.push({ app, backend, metrics }); + } + } + } + + if (dataPoints.length === 0) { + console.log('_No data available_\n'); + continue; + } + + // Sort by workflow time (primary metric), fall back to wall time if workflow time unavailable + dataPoints.sort((a, b) => { + const aTime = a.metrics.workflowTime ?? a.metrics.wallTime; + const bTime = b.metrics.workflowTime ?? b.metrics.wallTime; + return aTime - bTime; + }); + const fastest = dataPoints[0]; + const fastestTime = + fastest.metrics.workflowTime ?? fastest.metrics.wallTime; + + // Render table - Workflow Time is primary metric + console.log( + '| Backend | Framework | Workflow Time | Wall Time | Overhead | vs Fastest |' + ); + console.log( + '|:--------|:----------|--------------:|----------:|---------:|-----------:|' + ); + + for (const { app, backend, metrics } of dataPoints) { + const backendInfo = backendConfig[backend] || { + emoji: '', + label: backend, + }; + const frameworkInfo = frameworkConfig[app] || { label: app }; + + const isFastest = metrics === fastest.metrics; + const medal = isFastest ? '🥇 ' : ''; + + const workflowTimeSec = + metrics.workflowTime !== null ? formatSec(metrics.workflowTime) : '-'; + const wallTimeSec = formatSec(metrics.wallTime); + const overheadSec = + metrics.overhead !== null ? formatSec(metrics.overhead) : '-'; + + const currentTime = metrics.workflowTime ?? metrics.wallTime; + const factor = isFastest + ? '1.00x' + : `${(currentTime / fastestTime).toFixed(2)}x`; + + console.log( + `| ${backendInfo.emoji} ${backendInfo.label} | ${medal}${frameworkInfo.label} | ${workflowTimeSec}s | ${wallTimeSec}s | ${overheadSec}s | ${factor} |` + ); + } + console.log(''); + } + + // Summary: Best framework per backend (by Workflow Time) + console.log('## Summary: Fastest Framework by Backend\n'); + console.log('| Backend | Fastest Framework | Workflow Time |'); + console.log('|:--------|:------------------|---------------:|'); + + for (const backend of backends) { + const backendInfo = backendConfig[backend] || { emoji: '', label: backend }; + let fastestApp = null; + let fastestTime = Infinity; + + // Average workflow time across all benchmarks for this backend + const appTotals = {}; + const appCounts = {}; + + for (const benchData of Object.values(data)) { + for (const app of apps) { + const metrics = benchData[app]?.[backend]; + if (metrics) { + const time = metrics.workflowTime ?? metrics.wallTime; + appTotals[app] = (appTotals[app] || 0) + time; + appCounts[app] = (appCounts[app] || 0) + 1; + } + } + } + + for (const app of apps) { + if (appCounts[app] > 0) { + const avgTime = appTotals[app] / appCounts[app]; + if (avgTime < fastestTime) { + fastestTime = avgTime; + fastestApp = app; + } + } + } + + if (fastestApp) { + const frameworkInfo = frameworkConfig[fastestApp] || { + label: fastestApp, + }; + console.log( + `| ${backendInfo.emoji} ${backendInfo.label} | ${frameworkInfo.label} | ${formatSec(fastestTime)}s (avg) |` + ); + } + } + console.log(''); + + // Summary: Best backend per framework (by Workflow Time) + console.log('## Summary: Fastest Backend by Framework\n'); + console.log('| Framework | Fastest Backend | Workflow Time |'); + console.log('|:----------|:----------------|---------------:|'); + + for (const app of apps) { + const frameworkInfo = frameworkConfig[app] || { label: app }; + let fastestBackend = null; + let fastestTime = Infinity; + + // Average workflow time across all benchmarks for this app + const backendTotals = {}; + const backendCounts = {}; + + for (const benchData of Object.values(data)) { + for (const backend of backends) { + const metrics = benchData[app]?.[backend]; + if (metrics) { + const time = metrics.workflowTime ?? metrics.wallTime; + backendTotals[backend] = (backendTotals[backend] || 0) + time; + backendCounts[backend] = (backendCounts[backend] || 0) + 1; + } + } + } + + for (const backend of backends) { + if (backendCounts[backend] > 0) { + const avgTime = backendTotals[backend] / backendCounts[backend]; + if (avgTime < fastestTime) { + fastestTime = avgTime; + fastestBackend = backend; + } + } + } + + if (fastestBackend) { + const backendInfo = backendConfig[fastestBackend] || { + emoji: '', + label: fastestBackend, + }; + console.log( + `| ${frameworkInfo.label} | ${backendInfo.emoji} ${backendInfo.label} | ${formatSec(fastestTime)}s (avg) |` + ); + } + } + console.log(''); + + // Legend + console.log('
'); + console.log('Column Definitions\n'); + console.log( + '- **Workflow Time**: Runtime reported by workflow (completedAt - createdAt) - *primary metric*' + ); + console.log( + '- **Wall Time**: Total testbench time (trigger workflow + poll for result)' + ); + console.log('- **Overhead**: Testbench overhead (Wall Time - Workflow Time)'); + console.log( + '- **vs Fastest**: How much slower compared to the fastest configuration for this benchmark' + ); + console.log(''); + console.log('**Backends:**'); + console.log('- 💻 Local: In-memory filesystem backend'); + console.log('- 🐘 Postgres: PostgreSQL database backend'); + console.log('- ▲ Vercel: Vercel production backend'); + console.log('
'); +} + +// Main +const resultFiles = findBenchmarkFiles(resultsDir); + +if (resultFiles.length === 0) { + console.log('No benchmark result files found in', resultsDir); + process.exit(0); +} + +const data = collectBenchmarkData(resultFiles); +renderComparison(data); diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 000000000..b993c2f6a --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,367 @@ +name: Performance Benchmarks + +on: + pull_request: + branches: [main] + push: + branches: [main] + workflow_dispatch: # Allow manual triggers + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + # Phase 0: Create placeholder PR comment (so it's pinned to the top) + pr-comment-start: + name: Create PR Comment + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' + timeout-minutes: 2 + + steps: + - name: Create initial benchmark comment + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: benchmark-results + message: | + ## 📊 Benchmark Results + + ⏳ **Benchmarks are running...** + + This comment will be updated with the results when the benchmarks complete. + + --- + _Started at: ${{ github.event.pull_request.updated_at }}_ + + # Phase 1: Build all packages (not workbenches) + build: + name: Build Packages + runs-on: ubuntu-latest + timeout-minutes: 30 + env: + TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }} + TURBO_TEAM: ${{ vars.TURBO_TEAM }} + + steps: + - uses: actions/checkout@v4 + + - uses: pnpm/action-setup@v3 + with: + version: 10.14.0 + + - uses: actions/setup-node@v4 + with: + node-version: 22.x + cache: 'pnpm' + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Build all packages + run: pnpm turbo run build --filter='!./workbench/*' + + # Cache node_modules and package build outputs + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: build-artifacts + path: | + node_modules + packages/*/dist + retention-days: 1 + + # Phase 2a: Local benchmarks (no postgres) + benchmark-local: + name: Benchmark Local (${{ matrix.app }}) + runs-on: ubuntu-latest + needs: build + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + # Note: Use actual directory names, not symlinks (nitro -> nitro-v3) + app: [nextjs-turbopack, nitro-v3, express] + + env: + TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }} + TURBO_TEAM: ${{ vars.TURBO_TEAM }} + + steps: + - uses: actions/checkout@v4 + + - uses: pnpm/action-setup@v3 + with: + version: 10.14.0 + + - uses: actions/setup-node@v4 + with: + node-version: 22.x + + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + name: build-artifacts + path: . + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Build workbench + run: pnpm turbo run build --filter='./workbench/${{ matrix.app }}' + + - name: Run benchmarks + env: + DEPLOYMENT_URL: "http://localhost:3000" + APP_NAME: ${{ matrix.app }} + run: | + cd workbench/${{ matrix.app }} + pnpm start & + echo "Waiting for server to start..." + sleep 15 + cd ../.. + pnpm vitest bench packages/core/e2e/bench.bench.ts --run --outputJson=bench-results-${{ matrix.app }}-local.json + + - name: Render benchmark results + uses: ./.github/actions/render-benchmarks + with: + benchmark-file: bench-results-${{ matrix.app }}-local.json + app-name: ${{ matrix.app }} + backend: local + + - name: Upload benchmark results + uses: actions/upload-artifact@v4 + with: + name: bench-results-${{ matrix.app }}-local + path: | + bench-results-${{ matrix.app }}-local.json + bench-timings-${{ matrix.app }}-local.json + + # Phase 2b: Postgres benchmarks (with postgres service) + benchmark-postgres: + name: Benchmark Postgres (${{ matrix.app }}) + runs-on: ubuntu-latest + needs: build + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + # Note: Use actual directory names, not symlinks (nitro -> nitro-v3) + app: [nextjs-turbopack, nitro-v3, express] + + services: + postgres: + image: postgres:18-alpine + env: + POSTGRES_USER: world + POSTGRES_PASSWORD: world + POSTGRES_DB: world + ports: + - 5432:5432 + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + env: + TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }} + TURBO_TEAM: ${{ vars.TURBO_TEAM }} + WORKFLOW_TARGET_WORLD: "@workflow/world-postgres" + WORKFLOW_POSTGRES_URL: "postgres://world:world@localhost:5432/world" + + steps: + - uses: actions/checkout@v4 + + - uses: pnpm/action-setup@v3 + with: + version: 10.14.0 + + - uses: actions/setup-node@v4 + with: + node-version: 22.x + + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + name: build-artifacts + path: . + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Setup PostgreSQL database + run: ./packages/world-postgres/bin/setup.js + + # Build workbench with postgres world (build output differs based on target world) + - name: Build workbench for postgres + run: pnpm turbo run build --filter='./workbench/${{ matrix.app }}' + + - name: Run benchmarks + env: + DEPLOYMENT_URL: "http://localhost:3000" + APP_NAME: ${{ matrix.app }} + run: | + cd workbench/${{ matrix.app }} + pnpm start & + echo "Waiting for server to start..." + sleep 15 + cd ../.. + pnpm vitest bench packages/core/e2e/bench.bench.ts --run --outputJson=bench-results-${{ matrix.app }}-postgres.json + + - name: Render benchmark results + uses: ./.github/actions/render-benchmarks + with: + benchmark-file: bench-results-${{ matrix.app }}-postgres.json + app-name: ${{ matrix.app }} + backend: postgres + + - name: Upload benchmark results + uses: actions/upload-artifact@v4 + with: + name: bench-results-${{ matrix.app }}-postgres + path: | + bench-results-${{ matrix.app }}-postgres.json + bench-timings-${{ matrix.app }}-postgres.json + + # Phase 2c: Vercel benchmarks (needs build artifacts for packages) + benchmark-vercel: + name: Benchmark Vercel (${{ matrix.app.name }}) + runs-on: ubuntu-latest + needs: build + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + app: + - name: "nextjs-turbopack" + project-id: "prj_yjkM7UdHliv8bfxZ1sMJQf1pMpdi" + - name: "nitro-v3" + project-id: "prj_e7DZirYdLrQKXNrlxg7KmA6ABx8r" + - name: "express" + project-id: "prj_cCZjpBy92VRbKHHbarDMhOHtkuIr" + + steps: + - uses: actions/checkout@v4 + + - uses: pnpm/action-setup@v3 + with: + version: 10.14.0 + + - uses: actions/setup-node@v4 + with: + node-version: 22.x + + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + name: build-artifacts + path: . + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Wait for Vercel deployment + id: waitForDeployment + uses: ./.github/actions/wait-for-vercel-project + with: + team-id: "team_nO2mCG4W8IxPIeKoSsqwAxxB" + project-id: ${{ matrix.app.project-id }} + vercel-token: ${{ secrets.VERCEL_LABS_TOKEN }} + timeout: 1000 + check-interval: 15 + environment: ${{ github.ref == 'refs/heads/main' && 'production' || 'preview' }} + + - name: Run benchmarks + env: + DEPLOYMENT_URL: ${{ steps.waitForDeployment.outputs.deployment-url }} + APP_NAME: ${{ matrix.app.name }} + WORKFLOW_VERCEL_ENV: ${{ github.ref == 'refs/heads/main' && 'production' || 'preview' }} + WORKFLOW_VERCEL_AUTH_TOKEN: ${{ secrets.VERCEL_LABS_TOKEN }} + WORKFLOW_VERCEL_TEAM: "team_nO2mCG4W8IxPIeKoSsqwAxxB" + WORKFLOW_VERCEL_PROJECT: ${{ matrix.app.project-id }} + run: | + pnpm vitest bench packages/core/e2e/bench.bench.ts --run --outputJson=bench-results-${{ matrix.app.name }}-vercel.json + + - name: Render benchmark results + uses: ./.github/actions/render-benchmarks + with: + benchmark-file: bench-results-${{ matrix.app.name }}-vercel.json + app-name: ${{ matrix.app.name }} + backend: vercel + + - name: Upload benchmark results + uses: actions/upload-artifact@v4 + with: + name: bench-results-${{ matrix.app.name }}-vercel + path: | + bench-results-${{ matrix.app.name }}-vercel.json + bench-timings-${{ matrix.app.name }}-vercel.json + + # Phase 3: Aggregate all benchmark results and create comparison + summary: + name: Benchmark Summary + runs-on: ubuntu-latest + needs: [benchmark-local, benchmark-postgres, benchmark-vercel] + if: always() && !cancelled() + timeout-minutes: 10 + + steps: + - uses: actions/checkout@v4 + + - name: Download all benchmark artifacts + uses: actions/download-artifact@v4 + with: + pattern: bench-results-* + path: benchmark-results + merge-multiple: true + + - name: List downloaded files + run: find benchmark-results -type f -name "*.json" | sort + + - name: Aggregate and compare benchmarks + id: aggregate + run: | + # Capture output to both file and step summary + node .github/scripts/aggregate-benchmarks.js benchmark-results | tee benchmark-summary.md >> $GITHUB_STEP_SUMMARY + + - name: Check benchmark job statuses + id: check-status + run: | + # Check if any benchmark jobs failed + LOCAL_STATUS="${{ needs.benchmark-local.result }}" + POSTGRES_STATUS="${{ needs.benchmark-postgres.result }}" + VERCEL_STATUS="${{ needs.benchmark-vercel.result }}" + + echo "local=$LOCAL_STATUS" >> $GITHUB_OUTPUT + echo "postgres=$POSTGRES_STATUS" >> $GITHUB_OUTPUT + echo "vercel=$VERCEL_STATUS" >> $GITHUB_OUTPUT + + if [[ "$LOCAL_STATUS" == "failure" || "$POSTGRES_STATUS" == "failure" || "$VERCEL_STATUS" == "failure" ]]; then + echo "has_failures=true" >> $GITHUB_OUTPUT + else + echo "has_failures=false" >> $GITHUB_OUTPUT + fi + + - name: Update PR comment with results + if: github.event_name == 'pull_request' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: benchmark-results + path: benchmark-summary.md + + - name: Append failure notice to PR comment + if: github.event_name == 'pull_request' && steps.check-status.outputs.has_failures == 'true' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: benchmark-results + append: true + message: | + + --- + ⚠️ **Some benchmark jobs failed:** + - Local: ${{ needs.benchmark-local.result }} + - Postgres: ${{ needs.benchmark-postgres.result }} + - Vercel: ${{ needs.benchmark-vercel.result }} + + Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details. diff --git a/package.json b/package.json index 5e80e2d39..9dc881195 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,8 @@ "clean": "turbo clean", "typecheck": "turbo typecheck", "test:e2e": "vitest run packages/core/e2e/e2e.test.ts", + "bench": "vitest bench packages/core/e2e/bench.bench.ts", + "bench:local": "DEPLOYMENT_URL=http://localhost:3000 APP_NAME=nextjs-turbopack vitest bench packages/core/e2e/bench.bench.ts", "lint": "biome check", "format": "biome format --write", "changeset": "changeset", diff --git a/packages/core/e2e/bench.bench.ts b/packages/core/e2e/bench.bench.ts new file mode 100644 index 000000000..a887f0bcb --- /dev/null +++ b/packages/core/e2e/bench.bench.ts @@ -0,0 +1,215 @@ +import { withResolvers } from '@workflow/utils'; +import { bench, describe } from 'vitest'; +import { dehydrateWorkflowArguments } from '../src/serialization'; +import fs from 'fs'; +import path from 'path'; + +const deploymentUrl = process.env.DEPLOYMENT_URL; +if (!deploymentUrl) { + throw new Error('`DEPLOYMENT_URL` environment variable is not set'); +} + +// Store workflow execution times for each benchmark +const workflowTimings: Record< + string, + { + createdAt: string; + startedAt?: string; + completedAt?: string; + executionTimeMs?: number; + }[] +> = {}; + +async function triggerWorkflow( + workflow: string | { workflowFile: string; workflowFn: string }, + args: any[] +): Promise<{ runId: string }> { + const url = new URL('/api/trigger', deploymentUrl); + const workflowFn = + typeof workflow === 'string' ? workflow : workflow.workflowFn; + const workflowFile = + typeof workflow === 'string' + ? 'workflows/97_bench.ts' + : workflow.workflowFile; + + url.searchParams.set('workflowFile', workflowFile); + url.searchParams.set('workflowFn', workflowFn); + + const ops: Promise[] = []; + const { promise: runIdPromise, resolve: resolveRunId } = + withResolvers(); + const dehydratedArgs = dehydrateWorkflowArguments(args, ops, runIdPromise); + + const res = await fetch(url, { + method: 'POST', + body: JSON.stringify(dehydratedArgs), + }); + if (!res.ok) { + throw new Error( + `Failed to trigger workflow: ${res.url} ${ + res.status + }: ${await res.text()}` + ); + } + const run = await res.json(); + resolveRunId(run.runId); + + // Resolve and wait for any stream operations + await Promise.all(ops); + + return run; +} + +async function getWorkflowReturnValue( + runId: string +): Promise<{ run: any; value: any }> { + // We need to poll the GET endpoint until the workflow run is completed. + while (true) { + const url = new URL('/api/trigger', deploymentUrl); + url.searchParams.set('runId', runId); + + const res = await fetch(url); + + if (res.status === 202) { + // Workflow run is still running, so we need to wait and poll again + await new Promise((resolve) => setTimeout(resolve, 100)); + continue; + } + + // Extract run metadata from headers + const run = { + runId, + createdAt: res.headers.get('X-Workflow-Run-Created-At'), + startedAt: res.headers.get('X-Workflow-Run-Started-At'), + completedAt: res.headers.get('X-Workflow-Run-Completed-At'), + }; + + const contentType = res.headers.get('Content-Type'); + + if (contentType?.includes('application/json')) { + return { run, value: await res.json() }; + } + + if (contentType?.includes('application/octet-stream')) { + return { run, value: res.body }; + } + + throw new Error(`Unexpected content type: ${contentType}`); + } +} + +function getTimingOutputPath() { + const appName = process.env.APP_NAME || 'unknown'; + // Detect backend type: vercel if WORKFLOW_VERCEL_ENV is set, postgres if target world includes postgres, otherwise local + const backend = process.env.WORKFLOW_VERCEL_ENV + ? 'vercel' + : process.env.WORKFLOW_TARGET_WORLD?.includes('postgres') + ? 'postgres' + : 'local'; + return path.resolve( + process.cwd(), + `bench-timings-${appName}-${backend}.json` + ); +} + +function writeTimingFile() { + const outputPath = getTimingOutputPath(); + + // Calculate average, min, and max execution times + const summary: Record< + string, + { + avgExecutionTimeMs: number; + minExecutionTimeMs: number; + maxExecutionTimeMs: number; + samples: number; + } + > = {}; + for (const [benchName, timings] of Object.entries(workflowTimings)) { + const validTimings = timings.filter((t) => t.executionTimeMs !== undefined); + if (validTimings.length > 0) { + const executionTimes = validTimings.map((t) => t.executionTimeMs!); + const avg = + executionTimes.reduce((sum, t) => sum + t, 0) / executionTimes.length; + const min = Math.min(...executionTimes); + const max = Math.max(...executionTimes); + summary[benchName] = { + avgExecutionTimeMs: avg, + minExecutionTimeMs: min, + maxExecutionTimeMs: max, + samples: validTimings.length, + }; + } + } + + fs.writeFileSync( + outputPath, + JSON.stringify({ timings: workflowTimings, summary }, null, 2) + ); +} + +function recordWorkflowTiming(benchName: string, run: any) { + if (!workflowTimings[benchName]) { + workflowTimings[benchName] = []; + } + + const timing: any = { + createdAt: run.createdAt, + startedAt: run.startedAt, + completedAt: run.completedAt, + }; + + // Calculate execution time if timestamps are available (completedAt - createdAt) + if (run.createdAt && run.completedAt) { + const created = new Date(run.createdAt).getTime(); + const completed = new Date(run.completedAt).getTime(); + timing.executionTimeMs = completed - created; + } + + workflowTimings[benchName].push(timing); + + // Write timing file after each recording (overwrites previous) + writeTimingFile(); +} + +describe.concurrent('Workflow Performance Benchmarks', () => { + bench( + 'workflow with no steps', + async () => { + const { runId } = await triggerWorkflow('noStepsWorkflow', [42]); + const { run } = await getWorkflowReturnValue(runId); + recordWorkflowTiming('workflow with no steps', run); + }, + { time: 5000 } + ); + + bench( + 'workflow with 1 step', + async () => { + const { runId } = await triggerWorkflow('oneStepWorkflow', [100]); + const { run } = await getWorkflowReturnValue(runId); + recordWorkflowTiming('workflow with 1 step', run); + }, + { time: 5000 } + ); + + bench( + 'workflow with 10 sequential steps', + async () => { + const { runId } = await triggerWorkflow('tenSequentialStepsWorkflow', []); + const { run } = await getWorkflowReturnValue(runId); + recordWorkflowTiming('workflow with 10 sequential steps', run); + }, + { time: 5000 } + ); + + bench( + 'workflow with 10 parallel steps', + async () => { + const { runId } = await triggerWorkflow('tenParallelStepsWorkflow', []); + const { run } = await getWorkflowReturnValue(runId); + recordWorkflowTiming('workflow with 10 parallel steps', run); + }, + { time: 5000 } + ); +}); diff --git a/turbo.json b/turbo.json index 7f1ab8bcd..453490eb4 100644 --- a/turbo.json +++ b/turbo.json @@ -4,7 +4,13 @@ "tasks": { "build": { "dependsOn": ["^build"], - "outputs": ["dist/**", "build", ".svelte-kit", ".vercel/output"] + "outputs": [ + "dist/**", + "build", + ".svelte-kit", + ".vercel/output", + ".output/**" + ] }, "dev": { "dependsOn": ["^build"], diff --git a/vitest.config.ts b/vitest.config.ts new file mode 100644 index 000000000..7d49659db --- /dev/null +++ b/vitest.config.ts @@ -0,0 +1,10 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + testTimeout: 60_000, + }, + benchmark: { + include: ['**/*.bench.ts'], + }, +}); diff --git a/workbench/example/api/trigger.ts b/workbench/example/api/trigger.ts index aa7e79f03..2cfe032f9 100644 --- a/workbench/example/api/trigger.ts +++ b/workbench/example/api/trigger.ts @@ -41,7 +41,7 @@ export async function POST(req: Request) { workflowFileItems[workflowFn as keyof typeof workflowFileItems], args ); - console.log('Run:', run); + console.log('Run:', run.runId); return Response.json(run); } catch (err) { console.error(`Failed to start!!`, err); diff --git a/workbench/example/workflows/97_bench.ts b/workbench/example/workflows/97_bench.ts new file mode 100644 index 000000000..4b2b496c6 --- /dev/null +++ b/workbench/example/workflows/97_bench.ts @@ -0,0 +1,40 @@ +// Benchmark workflows for performance testing + +async function doWork() { + 'use step'; + return 42; +} + +// Workflow with no steps - pure orchestration +export async function noStepsWorkflow(input: number) { + 'use workflow'; + return input * 2; +} + +// Workflow with 1 step +export async function oneStepWorkflow(input: number) { + 'use workflow'; + const result = await doWork(); + return result + input; +} + +// Workflow with 10 sequential steps +export async function tenSequentialStepsWorkflow() { + 'use workflow'; + let result = 0; + for (let i = 0; i < 10; i++) { + result = await doWork(); + } + return result; +} + +// Workflow with 10 parallel steps +export async function tenParallelStepsWorkflow() { + 'use workflow'; + const promises = []; + for (let i = 0; i < 10; i++) { + promises.push(doWork()); + } + const results = await Promise.all(promises); + return results.reduce((sum, val) => sum + val, 0); +} diff --git a/workbench/express/src/index.ts b/workbench/express/src/index.ts index c4231c45a..b459fbbf3 100644 --- a/workbench/express/src/index.ts +++ b/workbench/express/src/index.ts @@ -134,6 +134,19 @@ app.get('/api/trigger', async (req, res) => { const returnValue = await run.returnValue; console.log('Return value:', returnValue); + // Include run metadata in headers + const [createdAt, startedAt, completedAt] = await Promise.all([ + run.createdAt, + run.startedAt, + run.completedAt, + ]); + res.setHeader('X-Workflow-Run-Created-At', createdAt?.toISOString() || ''); + res.setHeader('X-Workflow-Run-Started-At', startedAt?.toISOString() || ''); + res.setHeader( + 'X-Workflow-Run-Completed-At', + completedAt?.toISOString() || '' + ); + if (returnValue instanceof ReadableStream) { // Set headers for streaming response res.setHeader('Content-Type', 'application/octet-stream'); diff --git a/workbench/express/turbo.json b/workbench/express/turbo.json new file mode 100644 index 000000000..a28e18230 --- /dev/null +++ b/workbench/express/turbo.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://turborepo.org/schema.json", + "extends": ["//"], + "tasks": { + "build": { + "outputs": [".output/**", ".nitro/**"] + } + } +} diff --git a/workbench/hono/src/index.ts b/workbench/hono/src/index.ts index b16e31b7a..7554bf6d1 100644 --- a/workbench/hono/src/index.ts +++ b/workbench/hono/src/index.ts @@ -102,13 +102,25 @@ app.get('/api/trigger', async ({ req }) => { const run = getRun(runId); const returnValue = await run.returnValue; console.log('Return value:', returnValue); + + // Include run metadata in headers + const [createdAt, startedAt, completedAt] = await Promise.all([ + run.createdAt, + run.startedAt, + run.completedAt, + ]); + const headers: HeadersInit = + returnValue instanceof ReadableStream + ? { 'Content-Type': 'application/octet-stream' } + : {}; + + headers['X-Workflow-Run-Created-At'] = createdAt?.toISOString() || ''; + headers['X-Workflow-Run-Started-At'] = startedAt?.toISOString() || ''; + headers['X-Workflow-Run-Completed-At'] = completedAt?.toISOString() || ''; + return returnValue instanceof ReadableStream - ? new Response(returnValue, { - headers: { - 'Content-Type': 'application/octet-stream', - }, - }) - : Response.json(returnValue); + ? new Response(returnValue, { headers }) + : Response.json(returnValue, { headers }); } catch (error) { if (error instanceof Error) { if (WorkflowRunNotCompletedError.is(error)) { diff --git a/workbench/nextjs-turbopack/app/api/trigger/route.ts b/workbench/nextjs-turbopack/app/api/trigger/route.ts index f9b8d5ef4..097effce4 100644 --- a/workbench/nextjs-turbopack/app/api/trigger/route.ts +++ b/workbench/nextjs-turbopack/app/api/trigger/route.ts @@ -55,7 +55,7 @@ export async function POST(req: Request) { try { const run = await start(workflow as any, args as any); - console.log('Run:', run); + console.log('Run', run.runId); return Response.json(run); } catch (err) { console.error(`Failed to start!!`, err); @@ -98,13 +98,25 @@ export async function GET(req: Request) { const run = getRun(runId); const returnValue = await run.returnValue; console.log('Return value:', returnValue); + + // Include run metadata in headers + const [createdAt, startedAt, completedAt] = await Promise.all([ + run.createdAt, + run.startedAt, + run.completedAt, + ]); + const headers: HeadersInit = + returnValue instanceof ReadableStream + ? { 'Content-Type': 'application/octet-stream' } + : {}; + + headers['X-Workflow-Run-Created-At'] = createdAt?.toISOString() || ''; + headers['X-Workflow-Run-Started-At'] = startedAt?.toISOString() || ''; + headers['X-Workflow-Run-Completed-At'] = completedAt?.toISOString() || ''; + return returnValue instanceof ReadableStream - ? new Response(returnValue, { - headers: { - 'Content-Type': 'application/octet-stream', - }, - }) - : Response.json(returnValue); + ? new Response(returnValue, { headers }) + : Response.json(returnValue, { headers }); } catch (error) { if (error instanceof Error) { if (WorkflowRunNotCompletedError.is(error)) { diff --git a/workbench/nextjs-turbopack/workflows/97_bench.ts b/workbench/nextjs-turbopack/workflows/97_bench.ts new file mode 120000 index 000000000..d88c3e927 --- /dev/null +++ b/workbench/nextjs-turbopack/workflows/97_bench.ts @@ -0,0 +1 @@ +../../example/workflows/97_bench.ts \ No newline at end of file diff --git a/workbench/nextjs-webpack/app/api/trigger/route.ts b/workbench/nextjs-webpack/app/api/trigger/route.ts index d1dafb427..d45673599 100644 --- a/workbench/nextjs-webpack/app/api/trigger/route.ts +++ b/workbench/nextjs-webpack/app/api/trigger/route.ts @@ -54,7 +54,7 @@ export async function POST(req: Request) { } const run = await start(workflow as any, args); - console.log('Run:', run); + console.log('Run:', run.runId); return Response.json(run); } catch (err) { console.error(`Failed to start!!`, err); diff --git a/workbench/nitro-v2/server/api/trigger.post.ts b/workbench/nitro-v2/server/api/trigger.post.ts index b5d85fe2a..fbe4e55d4 100644 --- a/workbench/nitro-v2/server/api/trigger.post.ts +++ b/workbench/nitro-v2/server/api/trigger.post.ts @@ -53,7 +53,7 @@ export default defineEventHandler(async (event) => { try { const run = await start(workflow as any, args as any); - console.log('Run:', run); + console.log('Run:', run.runId); return Response.json(run); } catch (err) { console.error(`Failed to start!!`, err); diff --git a/workbench/nitro-v2/turbo.json b/workbench/nitro-v2/turbo.json deleted file mode 120000 index 2ea168a38..000000000 --- a/workbench/nitro-v2/turbo.json +++ /dev/null @@ -1 +0,0 @@ -../nitro-v3/turbo.json \ No newline at end of file diff --git a/workbench/nitro-v2/turbo.json b/workbench/nitro-v2/turbo.json new file mode 100644 index 000000000..29fc5e458 --- /dev/null +++ b/workbench/nitro-v2/turbo.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://turborepo.org/schema.json", + "extends": ["//"], + "tasks": { + "build": { + "outputs": [".output/**", ".nuxt/**", ".nitro/**"] + } + } +} diff --git a/workbench/nitro-v3/routes/api/trigger.get.ts b/workbench/nitro-v3/routes/api/trigger.get.ts index a7ef468e6..448710573 100644 --- a/workbench/nitro-v3/routes/api/trigger.get.ts +++ b/workbench/nitro-v3/routes/api/trigger.get.ts @@ -38,13 +38,25 @@ export default async ({ url }: { req: Request; url: URL }) => { const run = getRun(runId); const returnValue = await run.returnValue; console.log('Return value:', returnValue); + + // Include run metadata in headers + const [createdAt, startedAt, completedAt] = await Promise.all([ + run.createdAt, + run.startedAt, + run.completedAt, + ]); + const headers: HeadersInit = + returnValue instanceof ReadableStream + ? { 'Content-Type': 'application/octet-stream' } + : {}; + + headers['X-Workflow-Run-Created-At'] = createdAt?.toISOString() || ''; + headers['X-Workflow-Run-Started-At'] = startedAt?.toISOString() || ''; + headers['X-Workflow-Run-Completed-At'] = completedAt?.toISOString() || ''; + return returnValue instanceof ReadableStream - ? new Response(returnValue, { - headers: { - 'Content-Type': 'application/octet-stream', - }, - }) - : Response.json(returnValue); + ? new Response(returnValue, { headers }) + : Response.json(returnValue, { headers }); } catch (error) { if (error instanceof Error) { if (WorkflowRunNotCompletedError.is(error)) { diff --git a/workbench/nitro-v3/routes/api/trigger.post.ts b/workbench/nitro-v3/routes/api/trigger.post.ts index 2cf002565..270f5b633 100644 --- a/workbench/nitro-v3/routes/api/trigger.post.ts +++ b/workbench/nitro-v3/routes/api/trigger.post.ts @@ -50,7 +50,7 @@ export default async ({ req, url }: { req: Request; url: URL }) => { try { const run = await start(workflow as any, args as any); - console.log('Run:', run); + console.log('Run:', run.runId); return Response.json(run); } catch (err) { console.error(`Failed to start!!`, err); diff --git a/workbench/nitro-v3/turbo.json b/workbench/nitro-v3/turbo.json index a93da9f00..a28e18230 100644 --- a/workbench/nitro-v3/turbo.json +++ b/workbench/nitro-v3/turbo.json @@ -3,7 +3,7 @@ "extends": ["//"], "tasks": { "build": { - "outputs": [".vercel/output/**"] + "outputs": [".output/**", ".nitro/**"] } } } diff --git a/workbench/nitro-v3/workflows/97_bench.ts b/workbench/nitro-v3/workflows/97_bench.ts new file mode 120000 index 000000000..d88c3e927 --- /dev/null +++ b/workbench/nitro-v3/workflows/97_bench.ts @@ -0,0 +1 @@ +../../example/workflows/97_bench.ts \ No newline at end of file diff --git a/workbench/nuxt/server/api/trigger.post.ts b/workbench/nuxt/server/api/trigger.post.ts index b5d85fe2a..fbe4e55d4 100644 --- a/workbench/nuxt/server/api/trigger.post.ts +++ b/workbench/nuxt/server/api/trigger.post.ts @@ -53,7 +53,7 @@ export default defineEventHandler(async (event) => { try { const run = await start(workflow as any, args as any); - console.log('Run:', run); + console.log('Run:', run.runId); return Response.json(run); } catch (err) { console.error(`Failed to start!!`, err); diff --git a/workbench/sveltekit/src/routes/api/trigger/+server.ts b/workbench/sveltekit/src/routes/api/trigger/+server.ts index 6492f436d..f1017f6a2 100644 --- a/workbench/sveltekit/src/routes/api/trigger/+server.ts +++ b/workbench/sveltekit/src/routes/api/trigger/+server.ts @@ -56,7 +56,7 @@ export const POST: RequestHandler = async ({ request }) => { try { const run = await start(workflow as any, args as any); - console.log('Run:', run); + console.log('Run:', run.runId); return Response.json(run); } catch (err) { console.error(`Failed to start!!`, err); diff --git a/workbench/sveltekit/turbo.json b/workbench/sveltekit/turbo.json new file mode 100644 index 000000000..197d61494 --- /dev/null +++ b/workbench/sveltekit/turbo.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://turborepo.org/schema.json", + "extends": ["//"], + "tasks": { + "build": { + "outputs": [".svelte-kit/**", "build/**"] + } + } +} diff --git a/workbench/vite/routes/api/trigger.post.ts b/workbench/vite/routes/api/trigger.post.ts index 2cf002565..270f5b633 100644 --- a/workbench/vite/routes/api/trigger.post.ts +++ b/workbench/vite/routes/api/trigger.post.ts @@ -50,7 +50,7 @@ export default async ({ req, url }: { req: Request; url: URL }) => { try { const run = await start(workflow as any, args as any); - console.log('Run:', run); + console.log('Run:', run.runId); return Response.json(run); } catch (err) { console.error(`Failed to start!!`, err);