Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
6303576
building the template (almost) works, with minimal changes!
djeebus Sep 29, 2025
4251937
nearly there
djeebus Sep 29, 2025
c10d42f
so close!
djeebus Sep 29, 2025
2020c5e
successfully built!
djeebus Sep 30, 2025
53741ef
a little more clean up
djeebus Sep 30, 2025
f446d92
clean up error message
djeebus Sep 30, 2025
66a7947
consistent tests
djeebus Sep 30, 2025
f0be207
gather env vars just-in-time rather than on init()
djeebus Sep 30, 2025
964c811
Merge branch 'just-in-time-env-vars' into test-harness
djeebus Sep 30, 2025
d345ae3
back to benchmark
djeebus Sep 30, 2025
6ea705c
progress
djeebus Sep 30, 2025
8edaf97
only build templates when not yet built
djeebus Oct 1, 2025
394528a
Merge branch 'main' into test-harness
djeebus Oct 1, 2025
82074fd
fix some issues plus some clean up
djeebus Oct 1, 2025
23ebdcc
add some instructions
djeebus Oct 1, 2025
37b9143
enable huge pages, remove useless file
djeebus Oct 2, 2025
192d4c3
revert some changes
djeebus Oct 2, 2025
82ce513
stop using b.Cleanup
djeebus Oct 2, 2025
86dd6bb
add tracing when useful
djeebus Oct 2, 2025
8d38265
Merge branch 'main' into test-harness
djeebus Oct 2, 2025
a870eb6
linting
djeebus Oct 2, 2025
b93f05c
fix tracer
djeebus Oct 2, 2025
b953305
Merge branch 'main' into test-harness
djeebus Oct 2, 2025
58927b2
Merge branch 'main' into test-harness
djeebus Oct 7, 2025
ecdce77
Merge branch 'main' into test-harness
djeebus Oct 7, 2025
ef48758
fix compilation issue
djeebus Oct 8, 2025
eb619e1
clean up
djeebus Oct 8, 2025
1b23122
check the status code. thanks cursor!
djeebus Oct 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/fc-kernels/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
vmlinux.bin
314 changes: 314 additions & 0 deletions packages/orchestrator/benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,314 @@
// run with something like:
//
// sudo `which go` test -benchtime=15s -bench=. -v
package main

import (
"net/http"
"net/url"
"os"
"path/filepath"
"testing"
"time"

"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/otel/metric/noop"
"go.uber.org/zap"

"github.com/e2b-dev/infra/packages/orchestrator/internal/proxy"
"github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox"
blockmetrics "github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/block/metrics"
"github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/nbd"
"github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/network"
"github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/template"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/config"
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/metrics"
artifactsregistry "github.com/e2b-dev/infra/packages/shared/pkg/artifacts-registry"
"github.com/e2b-dev/infra/packages/shared/pkg/dockerhub"
featureflags "github.com/e2b-dev/infra/packages/shared/pkg/feature-flags"
"github.com/e2b-dev/infra/packages/shared/pkg/limit"
sbxlogger "github.com/e2b-dev/infra/packages/shared/pkg/logger/sandbox"
"github.com/e2b-dev/infra/packages/shared/pkg/smap"
"github.com/e2b-dev/infra/packages/shared/pkg/storage"
"github.com/e2b-dev/infra/packages/shared/pkg/utils"
)

func BenchmarkBaseImageLaunch(b *testing.B) {
if os.Geteuid() != 0 {
b.Skip("skipping benchmark because not running as root")
}

clientID := uuid.NewString()
baseImage := "e2bdev/base"
kernelVersion := "vmlinux-6.1.102"
fcVersion := "v1.10.1_1fcdaec08"
templateID := "fcb33d09-3141-42c4-8d3b-c2df411681db"
buildID := "ba6aae36-74f7-487a-b6f7-74fd7c94e479"

persistenceDir := filepath.Join(os.TempDir(), "e2b-orchestrator-benchmark")
kernelsDir := filepath.Join(persistenceDir, "kernels")
sandboxDir := filepath.Join(persistenceDir, "sandbox")
err := os.MkdirAll(kernelsDir, 0o755)
require.NoError(b, err)

tempDir := b.TempDir()

abs := func(s string) string {
return utils.Must(filepath.Abs(s))
}

linuxKernelURL, err := url.JoinPath("https://storage.googleapis.com/e2b-prod-public-builds/kernels/", kernelVersion, "vmlinux.bin")
require.NoError(b, err)
linuxKernelFilename := filepath.Join(kernelsDir, kernelVersion, "vmlinux.bin")

downloadKernel(b, linuxKernelFilename, linuxKernelURL)

// hacks, these should go away
b.Setenv("ARTIFACTS_REGISTRY_PROVIDER", "Local")
b.Setenv("USE_LOCAL_NAMESPACE_STORAGE", "true")
b.Setenv("STORAGE_PROVIDER", "Local")
b.Setenv("ORCHESTRATOR_BASE_PATH", tempDir)
b.Setenv("HOST_ENVD_PATH", abs(filepath.Join("..", "envd", "bin", "envd")))
b.Setenv("FIRECRACKER_VERSIONS_DIR", abs(filepath.Join("..", "fc-versions", "builds")))
b.Setenv("HOST_KERNELS_DIR", abs(kernelsDir))
b.Setenv("SANDBOX_DIR", abs(sandboxDir))
b.Setenv("SNAPSHOT_CACHE_DIR", abs(filepath.Join(tempDir, "snapshot-cache")))
b.Setenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH", abs(filepath.Join(persistenceDir, "templates")))

// prep directories
for _, subdir := range []string{"build", "build-templates" /*"fc-vm",*/, "sandbox", "snapshot-cache", "template"} {
fullDirName := filepath.Join(tempDir, subdir)
err := os.MkdirAll(fullDirName, 0o755)
require.NoError(b, err)
}

logger, err := zap.NewDevelopment()
require.NoError(b, err)

sbxlogger.SetSandboxLoggerInternal(logger)
// sbxlogger.SetSandboxLoggerExternal(logger)

networkPool, err := network.NewPool(
b.Context(), noop.MeterProvider{}, 8, 8, clientID,
)
require.NoError(b, err)
b.Cleanup(func() {
err := networkPool.Close(b.Context())
assert.NoError(b, err)
})

devicePool, err := nbd.NewDevicePool(b.Context(), noop.MeterProvider{})
require.NoError(b, err, "do you have the nbd kernel module installed?")
b.Cleanup(func() {
err := devicePool.Close(b.Context())
assert.NoError(b, err)
})

featureFlags, err := featureflags.NewClient()
require.NoError(b, err)
b.Cleanup(func() {
err := featureFlags.Close(b.Context())
assert.NoError(b, err)
})

limiter, err := limit.New(b.Context(), featureFlags)
require.NoError(b, err)

persistence, err := storage.GetTemplateStorageProvider(b.Context(), limiter)
require.NoError(b, err)

blockMetrics, err := blockmetrics.NewMetrics(&noop.MeterProvider{})
require.NoError(b, err)

templateCache, err := template.NewCache(b.Context(), featureFlags, persistence, blockMetrics)
require.NoError(b, err)

sandboxFactory := sandbox.NewFactory(networkPool, devicePool, featureFlags, true)

dockerhubRepository, err := dockerhub.GetRemoteRepository(b.Context())
require.NoError(b, err)
b.Cleanup(func() {
err := dockerhubRepository.Close()
assert.NoError(b, err)
})

allowInternetAccess := true
accessToken := "access-token"
sandboxConfig := sandbox.Config{
BaseTemplateID: templateID,
Vcpu: 2,
RamMB: 512,
TotalDiskSizeMB: 2 * 1024,
HugePages: false,
AllowInternetAccess: &allowInternetAccess,
Envd: sandbox.EnvdMetadata{
Vars: map[string]string{"HELLO": "WORLD"},
AccessToken: &accessToken,
Version: "1.2.3",
},
}

runtime := sandbox.RuntimeMetadata{
TemplateID: templateID,
SandboxID: "sandbox-id",
ExecutionID: "execution-id",
TeamID: "team-id",
}

artifactRegistry, err := artifactsregistry.GetArtifactsRegistryProvider(b.Context())
require.NoError(b, err)

persistenceTemplate, err := storage.GetTemplateStorageProvider(b.Context(), nil)
require.NoError(b, err)

persistenceBuild, err := storage.GetBuildCacheStorageProvider(b.Context(), nil)
require.NoError(b, err)

var proxyPort uint = 5007

sandboxes := smap.New[*sandbox.Sandbox]()

sandboxProxy, err := proxy.NewSandboxProxy(noop.MeterProvider{}, proxyPort, sandboxes)
require.NoError(b, err)
go func() {
err := sandboxProxy.Start(b.Context())
assert.ErrorIs(b, http.ErrServerClosed, err)
}()
b.Cleanup(func() {
err := sandboxProxy.Close(b.Context())
assert.NoError(b, err)
})

buildMetrics, err := metrics.NewBuildMetrics(noop.MeterProvider{})
require.NoError(b, err)

builder := build.NewBuilder(
logger,
sandboxFactory,
persistenceTemplate,
persistenceBuild,
artifactRegistry,
dockerhubRepository,
sandboxProxy,
sandboxes,
templateCache,
buildMetrics,
)

buildPath := filepath.Join(os.Getenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH"), buildID, "rootfs.ext4")
if _, err := os.Stat(buildPath); os.IsNotExist(err) {
// build template
force := true
templateConfig := config.TemplateConfig{
TemplateID: templateID,
FromImage: baseImage,
Force: &force,
VCpuCount: sandboxConfig.Vcpu,
MemoryMB: sandboxConfig.RamMB,
StartCmd: "echo 'start cmd debug' && sleep 10 && echo 'done starting command debug'",
DiskSizeMB: sandboxConfig.TotalDiskSizeMB,
HugePages: sandboxConfig.HugePages,
}

metadata := storage.TemplateFiles{
BuildID: buildID,
KernelVersion: kernelVersion,
FirecrackerVersion: fcVersion,
}
_, err = builder.Build(b.Context(), metadata, templateConfig, logger.Core())
require.NoError(b, err)
}

// retrieve template
tmpl, err := templateCache.GetTemplate(
b.Context(),
buildID,
kernelVersion,
fcVersion,
false,
false,
)
require.NoError(b, err)

type testCycle string

const (
onlyStart testCycle = "only-start"
startAndPause testCycle = "start-and-pause"
startPauseResume testCycle = "start-pause-resume"
)

testType := onlyStart

for b.Loop() {
sbx, err := sandboxFactory.ResumeSandbox(
b.Context(),
tmpl,
sandboxConfig,
runtime,
uuid.NewString(),
time.Now(),
time.Now().Add(time.Second*15),
nil,
)
require.NoError(b, err)

if testType == onlyStart {
b.StopTimer()
err = sbx.Close(b.Context())
require.NoError(b, err)
b.StartTimer()
continue
}

meta, err := sbx.Template.Metadata()
require.NoError(b, err)

templateMetadata := meta.SameVersionTemplate(storage.TemplateFiles{
BuildID: buildID,
KernelVersion: kernelVersion,
FirecrackerVersion: fcVersion,
})
snap, err := sbx.Pause(b.Context(), templateMetadata)
require.NoError(b, err)
require.NotNil(b, snap)

// resume sandbox
sbx, err = sandboxFactory.ResumeSandbox(b.Context(), tmpl, sandboxConfig, runtime, uuid.NewString(), time.Now(), time.Now().Add(time.Second*15), nil)
require.NoError(b, err)

// close sandbox
err = sbx.Close(b.Context())
require.NoError(b, err)
}
}

func downloadKernel(b *testing.B, filename, url string) {
b.Helper()

dirname := filepath.Dir(filename)
err := os.MkdirAll(dirname, 0o755)
require.NoError(b, err)

// kernel already exists
if _, err := os.Stat(filename); err == nil {
return
}

client := &http.Client{}
req, err := http.NewRequestWithContext(b.Context(), http.MethodGet, url, nil)
require.NoError(b, err)
response, err := client.Do(req)
require.NoError(b, err)
defer response.Body.Close()

file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
require.NoError(b, err)
defer file.Close()

_, err = file.ReadFrom(response.Body)
require.NoError(b, err)
}
4 changes: 2 additions & 2 deletions packages/orchestrator/internal/sandbox/fc/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,14 @@ func (p *Process) configure(
ctx, childSpan := tracer.Start(ctx, "configure-fc")
defer childSpan.End()

stdoutWriter := &zapio.Writer{Log: sbxlogger.I(sbxMetadata).Logger, Level: zap.InfoLevel}
stdoutWriter := &zapio.Writer{Log: sbxlogger.I(sbxMetadata).Logger, Level: zap.DebugLevel}
stdoutWriters := []io.Writer{stdoutWriter}
if stdoutExternal != nil {
stdoutWriters = append(stdoutWriters, stdoutExternal)
}
p.cmd.Stdout = io.MultiWriter(stdoutWriters...)

stderrWriter := &zapio.Writer{Log: sbxlogger.I(sbxMetadata).Logger, Level: zap.ErrorLevel}
stderrWriter := &zapio.Writer{Log: sbxlogger.I(sbxMetadata).Logger, Level: zap.DebugLevel}
stderrWriters := []io.Writer{stderrWriter}
if stderrExternal != nil {
stderrWriters = append(stderrWriters, stderrExternal)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"os/exec"
"path/filepath"
"strings"
"syscall"

"github.com/containers/storage/pkg/archive"
"github.com/dustin/go-humanize"
Expand Down Expand Up @@ -237,6 +238,8 @@ func unpackRootfs(ctx context.Context, logger *zap.Logger, srcImage containerreg
return fmt.Errorf("while mounting overlayfs with layers: %w", err)
}
defer func() {
syscall.Sync()

if unmountErr := filesystem.Unmount(context.WithoutCancel(ctx), mountPath); unmountErr != nil {
zap.L().Error("error unmounting overlayfs mount point", zap.Error(unmountErr))
}
Expand Down Expand Up @@ -289,7 +292,8 @@ func copyFiles(ctx context.Context, src, dest string) error {
// --whole-file: Copy files without using the delta algorithm, which is faster for local copies
// --inplace: Update destination files in place, no need to create temporary files
cmd := exec.CommandContext(ctx, "rsync", "-aH", "--whole-file", "--inplace", src+"/", dest)
if out, err := cmd.CombinedOutput(); err != nil {
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("while copying files from %s to %s: %w: %s", src, dest, err, string(out))
}
return nil
Expand Down
6 changes: 3 additions & 3 deletions packages/shared/pkg/storage/storage_fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func (fs *FileSystemStorageProvider) UploadSignedURL(_ context.Context, _ string
return "", fmt.Errorf("file system storage does not support signed URLs")
}

func (fs *FileSystemStorageProvider) OpenObject(ctx context.Context, path string) (StorageObjectProvider, error) {
func (fs *FileSystemStorageProvider) OpenObject(_ context.Context, path string) (StorageObjectProvider, error) {
dir := filepath.Dir(fs.getPath(path))
if err := os.MkdirAll(dir, 0o755); err != nil {
return nil, err
Expand All @@ -68,7 +68,7 @@ func (f *FileSystemStorageObjectProvider) WriteTo(ctx context.Context, dst io.Wr
return io.Copy(dst, handle)
}

func (f *FileSystemStorageObjectProvider) WriteFromFileSystem(ctx context.Context, path string) error {
func (f *FileSystemStorageObjectProvider) WriteFromFileSystem(_ context.Context, path string) error {
handle, err := f.getHandle(false)
if err != nil {
return err
Expand All @@ -89,7 +89,7 @@ func (f *FileSystemStorageObjectProvider) WriteFromFileSystem(ctx context.Contex
return nil
}

func (f *FileSystemStorageObjectProvider) Write(ctx context.Context, data []byte) (int, error) {
func (f *FileSystemStorageObjectProvider) Write(_ context.Context, data []byte) (int, error) {
handle, err := f.getHandle(false)
if err != nil {
return 0, err
Expand Down
Loading