diff --git a/pkg/sentry/control/state.go b/pkg/sentry/control/state.go index 2ef7b414b0..ccad7435f0 100644 --- a/pkg/sentry/control/state.go +++ b/pkg/sentry/control/state.go @@ -92,6 +92,8 @@ type SaveOpts struct { // ExecOpts contains options for executing a binary during save/restore. ExecOpts SaveRestoreExecOpts + + SaveOptsExtra } // SaveRestoreExecOpts contains options for executing a binary @@ -112,53 +114,54 @@ type SaveRestoreExecOpts struct { // state.SaveOpts.Close() must be called when the state.SaveOpts is no longer // needed. func ConvertToStateSaveOpts(o *SaveOpts) (*state.SaveOpts, error) { + saveOpts := &state.SaveOpts{ + Key: o.Key, + Metadata: o.Metadata, + AppMFExcludeCommittedZeroPages: o.AppMFExcludeCommittedZeroPages, + Resume: o.Resume, + } + if err := setSaveOptsImpl(o, saveOpts); err != nil { + saveOpts.Close() + return nil, err + } + return saveOpts, nil +} + +func setSaveOptsForLocalCheckpointFiles(o *SaveOpts, saveOpts *state.SaveOpts) error { wantFiles := 1 if o.HavePagesFile { wantFiles += 2 } if gotFiles := len(o.FilePayload.Files); gotFiles != wantFiles { - return nil, fmt.Errorf("got %d files, wanted %d", gotFiles, wantFiles) + return fmt.Errorf("got %d files, wanted %d", gotFiles, wantFiles) } // Save to the first provided stream. stateFile, err := o.ReleaseFD(0) if err != nil { - return nil, err - } - cu := cleanup.Make(func() { stateFile.Close() }) - defer cu.Clean() - - saveOpts := &state.SaveOpts{ - Destination: stateFile, - Key: o.Key, - Metadata: o.Metadata, - AppMFExcludeCommittedZeroPages: o.AppMFExcludeCommittedZeroPages, - Resume: o.Resume, + return err } - + // Setting saveOpts.Destination/PagesMetadata/PagesFile transfers ownership + // of the created object to saveOpts, even if we return a non-nil error. + saveOpts.Destination = stateFile if o.HavePagesFile { pagesMetadataFile, err := o.ReleaseFD(1) if err != nil { - return nil, err + return err } - // //pkg/state/wire writes one byte at a time; buffer these writes to - // avoid making one syscall per write. For the state file, this - // buffering is handled by statefile.NewWriter() => compressio.Writer - // or compressio.NewSimpleWriter(). + // //pkg/state/wire writes one byte at a time; buffer writes to + // pagesMetadataFile to avoid making one syscall per write. For the + // state file, this buffering is handled by statefile.NewWriter() => + // compressio.Writer or compressio.NewSimpleWriter(). saveOpts.PagesMetadata = stateio.NewBufioWriteCloser(pagesMetadataFile) - cu.Add(func() { saveOpts.PagesMetadata.Close() }) pagesFileFD, err := unix.Dup(int(o.Files[2].Fd())) if err != nil { - return nil, err + return err } - // TODO: Allow `runsc checkpoint` to override I/O parameters. saveOpts.PagesFile = stateio.NewPagesFileFDWriterDefault(int32(pagesFileFD)) - cu.Add(func() { saveOpts.PagesFile.Close() }) } - - cu.Release() - return saveOpts, nil + return nil } // Save saves the running system. diff --git a/pkg/sentry/control/state_impl.go b/pkg/sentry/control/state_impl.go index ec775f34e3..a9ec0e8086 100644 --- a/pkg/sentry/control/state_impl.go +++ b/pkg/sentry/control/state_impl.go @@ -23,6 +23,12 @@ import ( "gvisor.dev/gvisor/pkg/timing" ) +type SaveOptsExtra struct{} + +func setSaveOptsImpl(o *SaveOpts, saveOpts *state.SaveOpts) error { + return setSaveOptsForLocalCheckpointFiles(o, saveOpts) +} + func preSaveImpl(k *kernel.Kernel, o *state.SaveOpts) error { return nil } diff --git a/runsc/boot/filter/config/config_main.go b/runsc/boot/filter/config/config_main.go index 0f9fe8b50b..9c34c9bbb4 100644 --- a/runsc/boot/filter/config/config_main.go +++ b/runsc/boot/filter/config/config_main.go @@ -163,6 +163,10 @@ var allowedSyscalls = seccomp.MakeSyscallRules(map[uintptr]seccomp.SyscallRule{ seccomp.EqualTo(linux.MEMBARRIER_CMD_GLOBAL), seccomp.EqualTo(0), }, + unix.SYS_MEMFD_CREATE: seccomp.PerArg{ + seccomp.AnyValue{}, /* name */ + seccomp.EqualTo(0), /* flags */ + }, unix.SYS_MINCORE: seccomp.MatchAll{}, unix.SYS_MLOCK: seccomp.MatchAll{}, unix.SYS_MMAP: seccomp.Or{ diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go index ddf6b5d020..cd99ecf2c9 100644 --- a/runsc/cmd/checkpoint.go +++ b/runsc/cmd/checkpoint.go @@ -110,7 +110,7 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...any) su SaveRestoreExecContainerID: id, } - if err := cont.Checkpoint(c.imagePath, opts); err != nil { + if err := cont.Checkpoint(conf, c.imagePath, opts); err != nil { util.Fatalf("checkpoint failed: %v", err) } diff --git a/runsc/container/container.go b/runsc/container/container.go index 19ab57d272..65a1613f94 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -725,12 +725,12 @@ func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() { // Checkpoint sends the checkpoint call to the container. // The statefile will be written to f, the file at the specified image-path. -func (c *Container) Checkpoint(imagePath string, opts sandbox.CheckpointOpts) error { +func (c *Container) Checkpoint(conf *config.Config, imagePath string, opts sandbox.CheckpointOpts) error { log.Debugf("Checkpoint container, cid: %s", c.ID) if err := c.requireStatus("checkpoint", Created, Running, Paused); err != nil { return err } - return c.Sandbox.Checkpoint(c.ID, imagePath, opts) + return c.Sandbox.Checkpoint(conf, c.ID, imagePath, opts) } // Pause suspends the container and its kernel. diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index be8af9d844..e98201ba64 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -1063,7 +1063,7 @@ func testCheckpointRestore(t *testing.T, conf *config.Config, compression statef } // Checkpoint running container; save state into new file. - if err := cont.Checkpoint(dir, sandbox.CheckpointOpts{Compression: compression}); err != nil { + if err := cont.Checkpoint(conf, dir, sandbox.CheckpointOpts{Compression: compression}); err != nil { t.Fatalf("error checkpointing container to empty file: %v", err) } @@ -1267,7 +1267,7 @@ func TestCheckpointRestoreExecKilled(t *testing.T) { } // Checkpoint running container. - if err := cont.Checkpoint(dir, sandbox.CheckpointOpts{Compression: statefile.CompressionLevelFlateBestSpeed}); err != nil { + if err := cont.Checkpoint(conf, dir, sandbox.CheckpointOpts{Compression: statefile.CompressionLevelFlateBestSpeed}); err != nil { t.Fatalf("error checkpointing container: %v", err) } cont.Destroy() @@ -1346,7 +1346,7 @@ func TestCheckpointRestoreCreateMountPoint(t *testing.T) { } // Checkpoint running container; save state into new file. - if err := cont.Checkpoint(dir, sandbox.CheckpointOpts{Compression: statefile.CompressionLevelDefault}); err != nil { + if err := cont.Checkpoint(conf, dir, sandbox.CheckpointOpts{Compression: statefile.CompressionLevelDefault}); err != nil { t.Fatalf("error checkpointing container to file: %v", err) } @@ -1445,7 +1445,7 @@ func TestUnixDomainSockets(t *testing.T) { } // Checkpoint running container; save state into new file. - if err := cont.Checkpoint(dir, sandbox.CheckpointOpts{Compression: statefile.CompressionLevelDefault}); err != nil { + if err := cont.Checkpoint(conf, dir, sandbox.CheckpointOpts{Compression: statefile.CompressionLevelDefault}); err != nil { t.Fatalf("error checkpointing container to empty file: %v", err) } @@ -2829,7 +2829,7 @@ func TestUsageFD(t *testing.T) { } // Checkpoint running container. - if err := cont.Checkpoint(dir, sandbox.CheckpointOpts{Compression: statefile.CompressionLevelDefault}); err != nil { + if err := cont.Checkpoint(conf, dir, sandbox.CheckpointOpts{Compression: statefile.CompressionLevelDefault}); err != nil { t.Fatalf("error checkpointing container: %v", err) } cont.Destroy() @@ -3963,7 +3963,7 @@ func TestSpecValidation(t *testing.T) { t.Fatalf("error chmoding file: %q, %v", dir, err) } // Checkpoint running container; save state into new file. - if err := cont.Checkpoint(dir, sandbox.CheckpointOpts{Compression: statefile.CompressionLevelFlateBestSpeed}); err != nil { + if err := cont.Checkpoint(conf, dir, sandbox.CheckpointOpts{Compression: statefile.CompressionLevelFlateBestSpeed}); err != nil { t.Fatalf("error checkpointing container to empty file: %v", err) } @@ -4175,7 +4175,7 @@ func TestCheckpointResume(t *testing.T) { } // Checkpoint running container; save state into new file. - if err := cont.Checkpoint(dir, sandbox.CheckpointOpts{Resume: true}); err != nil { + if err := cont.Checkpoint(conf, dir, sandbox.CheckpointOpts{Resume: true}); err != nil { t.Fatalf("error checkpointing container to empty file: %v", err) } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index bddc82133f..b1f7028065 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -2782,7 +2782,7 @@ func testMultiContainerCheckpointRestore(t *testing.T, conf *config.Config, comp }() // Checkpoint root container; save state into new file. - if err := conts[0].Checkpoint(dir, sandbox.CheckpointOpts{Compression: compression}); err != nil { + if err := conts[0].Checkpoint(conf, dir, sandbox.CheckpointOpts{Compression: compression}); err != nil { t.Fatalf("error checkpointing container to empty file: %v", err) } diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index ef3a9996eb..091fcca74e 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -1502,33 +1502,27 @@ type CheckpointOpts struct { // Checkpoint sends the checkpoint call for a container in the sandbox. // The statefile will be written to f. -func (s *Sandbox) Checkpoint(cid string, imagePath string, opts CheckpointOpts) error { +func (s *Sandbox) Checkpoint(conf *config.Config, cid string, imagePath string, opts CheckpointOpts) error { log.Debugf("Checkpoint sandbox %q, imagePath %q, opts %+v", s.ID, imagePath, opts) - files, err := createSaveFiles(imagePath, opts.Direct, opts.Compression) - if err != nil { - return err - } - defer func() { - for _, f := range files { - _ = f.Close() - } - }() - opt := control.SaveOpts{ Metadata: opts.Compression.ToMetadata(), AppMFExcludeCommittedZeroPages: opts.ExcludeCommittedZeroPages, - FilePayload: urpc.FilePayload{ - Files: files, - }, - HavePagesFile: len(files) > 1, - Resume: opts.Resume, + Resume: opts.Resume, ExecOpts: control.SaveRestoreExecOpts{ Argv: opts.SaveRestoreExecArgv, Timeout: opts.SaveRestoreExecTimeout, ContainerID: opts.SaveRestoreExecContainerID, }, } + defer func() { + for _, f := range opt.FilePayload.Files { + _ = f.Close() + } + }() + if err := setCheckpointOptsImpl(conf, imagePath, opts, &opt); err != nil { + return err + } if err := s.call(boot.ContMgrCheckpoint, &opt, nil); err != nil { return fmt.Errorf("checkpointing container %q: %w", cid, err) @@ -1537,6 +1531,16 @@ func (s *Sandbox) Checkpoint(cid string, imagePath string, opts CheckpointOpts) return nil } +func setCheckpointOptsForLocalCheckpointFiles(conf *config.Config, imagePath string, opts CheckpointOpts, opt *control.SaveOpts) error { + files, err := createSaveFiles(imagePath, opts.Direct, opts.Compression) + if err != nil { + return err + } + opt.FilePayload.Files = files + opt.HavePagesFile = len(files) > 1 + return nil +} + // createSaveFiles creates the files used by checkpoint to save the state. They are returned in // the following order: sentry state, page metadata, page file. This is the same order expected by // RPCs and argument passing to the sandbox. diff --git a/runsc/sandbox/sandbox_impl.go b/runsc/sandbox/sandbox_impl.go index 7b018d70df..6f09b7c365 100644 --- a/runsc/sandbox/sandbox_impl.go +++ b/runsc/sandbox/sandbox_impl.go @@ -18,6 +18,7 @@ package sandbox import ( + "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/donation" @@ -27,6 +28,10 @@ func createSandboxProcessExtra(conf *config.Config, args *Args, donations *donat return nil } +func setCheckpointOptsImpl(conf *config.Config, imagePath string, opts CheckpointOpts, opt *control.SaveOpts) error { + return setCheckpointOptsForLocalCheckpointFiles(conf, imagePath, opts, opt) +} + func (s *Sandbox) setRestoreOptsImpl(conf *config.Config, imagePath string, direct bool, opt *boot.RestoreOpts) error { return s.setRestoreOptsForLocalCheckpointFiles(conf, imagePath, direct, opt) }