diff --git a/cgroups/cgroups_v1.go b/cgroups/cgroups_v1.go index 3216d8c0..17ae6673 100644 --- a/cgroups/cgroups_v1.go +++ b/cgroups/cgroups_v1.go @@ -539,7 +539,7 @@ func (cg *CgroupV1) GetMemoryData(pid int, cgPath string) (*rspec.LinuxMemory, e return nil, err } kernelLimit := res - lm.Kernel = &kernelLimit + lm.Kernel = &kernelLimit //nolint:staticcheck // Ignore SA1019: lm.Kernel is deprecated case 4: res, err := strconv.ParseInt(strings.TrimSpace(string(contents)), 10, 64) if err != nil { @@ -671,6 +671,11 @@ func (cg *CgroupV1) GetPidsData(pid int, cgPath string) (*rspec.LinuxPids, error if err != nil { return nil, err } + if strings.TrimSpace(string(contents)) == "max" { + res := int64(-1) + lp.Limit = &res + return lp, nil + } res, err := strconv.ParseInt(strings.TrimSpace(string(contents)), 10, 64) if err != nil { if os.IsNotExist(err) { @@ -679,7 +684,7 @@ func (cg *CgroupV1) GetPidsData(pid int, cgPath string) (*rspec.LinuxPids, error return nil, err } - lp.Limit = res + lp.Limit = &res return lp, nil } diff --git a/cmd/oci-runtime-tool/generate.go b/cmd/oci-runtime-tool/generate.go index 0936da76..70b4d27e 100644 --- a/cmd/oci-runtime-tool/generate.go +++ b/cmd/oci-runtime-tool/generate.go @@ -16,6 +16,8 @@ import ( "github.com/opencontainers/runtime-tools/generate" "github.com/opencontainers/runtime-tools/generate/seccomp" "github.com/urfave/cli" + + mpolCheck "github.com/opencontainers/runtime-tools/validate/memorypolicy" ) var generateFlags = []cli.Flag{ @@ -64,6 +66,9 @@ var generateFlags = []cli.Flag{ cli.StringFlag{Name: "linux-mems", Usage: "list of memory nodes in the cpuset (default is to use any available memory node)"}, cli.Uint64Flag{Name: "linux-mem-swap", Usage: "total memory limit (memory + swap) (in bytes)"}, cli.Uint64Flag{Name: "linux-mem-swappiness", Usage: "how aggressive the kernel will swap memory pages (Range from 0 to 100)"}, + cli.StringFlag{Name: "linux-memorypolicy-mode", Usage: "memory policy defines from which nodes memory is allocated by default, e.g MPOL_INTERLEAVE"}, + cli.StringFlag{Name: "linux-memorypolicy-nodes", Usage: "memory nodes related to the linux-memorypolicy-mode, e.g 0-3,7"}, + cli.StringSliceFlag{Name: "linux-memorypolicy-flags", Usage: "optional memory policy mode flags, e.g MPOL_F_STATIC_NODES"}, cli.StringFlag{Name: "linux-mount-label", Usage: "selinux mount context label"}, cli.StringSliceFlag{Name: "linux-namespace-add", Usage: "adds a namespace to the set of namespaces to create or join of the form 'ns[:path]'"}, cli.StringSliceFlag{Name: "linux-namespace-remove", Usage: "removes a namespace from the set of namespaces to create or join of the form 'ns'"}, @@ -782,6 +787,35 @@ func setupSpec(g *generate.Generator, context *cli.Context) error { g.SetLinuxResourcesMemorySwappiness(context.Uint64("linux-mem-swappiness")) } + if context.IsSet("linux-memorypolicy-mode") { + mpolMode := context.String("linux-memorypolicy-mode") + if err := mpolCheck.MpolModeValid(mpolMode); err != nil { + return err + } + g.SetLinuxMemoryPolicyMode(mpolMode) + } + + if context.IsSet("linux-memorypolicy-nodes") { + g.SetLinuxMemoryPolicyNodes(context.String("linux-memorypolicy-nodes")) + } + + if context.IsSet("linux-memorypolicy-flags") { + mpolFlags := context.StringSlice("linux-memorypolicy-flags") + for _, flag := range mpolFlags { + if err := mpolCheck.MpolFlagValid(flag); err != nil { + return err + } + } + g.SetLinuxMemoryPolicyFlags(mpolFlags) + } + + if g.Config.Linux.MemoryPolicy != nil { + // Validating memory policy nodes needs mode as a context. + if err := mpolCheck.MpolModeNodesValid(g.Config.Linux.MemoryPolicy.Mode, g.Config.Linux.MemoryPolicy.Nodes); err != nil { + return err + } + } + if context.IsSet("linux-network-classid") { g.SetLinuxResourcesNetworkClassID(uint32(context.Int("linux-network-classid"))) } diff --git a/generate/config.go b/generate/config.go index 48f281d2..b6dd9c3b 100644 --- a/generate/config.go +++ b/generate/config.go @@ -109,6 +109,13 @@ func (g *Generator) initConfigLinuxResourcesMemory() { } } +func (g *Generator) initConfigLinuxMemoryPolicy() { + g.initConfigLinux() + if g.Config.Linux.MemoryPolicy == nil { + g.Config.Linux.MemoryPolicy = &rspec.LinuxMemoryPolicy{} + } +} + func (g *Generator) initConfigLinuxResourcesNetwork() { g.initConfigLinuxResources() if g.Config.Linux.Resources.Network == nil { diff --git a/generate/generate.go b/generate/generate.go index 16e8f5cd..12478ed5 100644 --- a/generate/generate.go +++ b/generate/generate.go @@ -26,6 +26,12 @@ var ( } ) +const ( + // UnlimitedPidsLimit can be passed to SetLinuxResourcesPidsLimit to + // request unlimited PIDs. + UnlimitedPidsLimit int64 = -1 +) + // Generator represents a generator for a container config. type Generator struct { Config *rspec.Spec @@ -911,7 +917,7 @@ func (g *Generator) SetLinuxResourcesMemorySwap(swap int64) { // SetLinuxResourcesMemoryKernel sets g.Config.Linux.Resources.Memory.Kernel. func (g *Generator) SetLinuxResourcesMemoryKernel(kernel int64) { g.initConfigLinuxResourcesMemory() - g.Config.Linux.Resources.Memory.Kernel = &kernel + g.Config.Linux.Resources.Memory.Kernel = &kernel //nolint:staticcheck // Ignore SA1019: g.Config.Linux.Resources.Memory.Kernel is deprecated } // SetLinuxResourcesMemoryKernelTCP sets g.Config.Linux.Resources.Memory.KernelTCP. @@ -926,6 +932,26 @@ func (g *Generator) SetLinuxResourcesMemorySwappiness(swappiness uint64) { g.Config.Linux.Resources.Memory.Swappiness = &swappiness } +// SetLinuxMemoryPolicyMode sets g.Config.Linux.MemoryPolicy.Mode +func (g *Generator) SetLinuxMemoryPolicyMode(mode string) { + g.initConfigLinuxMemoryPolicy() + g.Config.Linux.MemoryPolicy.Mode = rspec.MemoryPolicyModeType(mode) +} + +// SetLinuxMemoryPolicyNodes sets g.Config.Linux.MemoryPolicy.Nodes +func (g *Generator) SetLinuxMemoryPolicyNodes(nodes string) { + g.initConfigLinuxMemoryPolicy() + g.Config.Linux.MemoryPolicy.Nodes = nodes +} + +// SetLinuxMemoryPolicyFlags sets g.Config.Linux.MemoryPolicy.Flags +func (g *Generator) SetLinuxMemoryPolicyFlags(flags []string) { + g.initConfigLinuxMemoryPolicy() + for _, flag := range flags { + g.Config.Linux.MemoryPolicy.Flags = append(g.Config.Linux.MemoryPolicy.Flags, rspec.MemoryPolicyFlagType(flag)) + } +} + // SetLinuxResourcesMemoryDisableOOMKiller sets g.Config.Linux.Resources.Memory.DisableOOMKiller. func (g *Generator) SetLinuxResourcesMemoryDisableOOMKiller(disable bool) { g.initConfigLinuxResourcesMemory() @@ -970,7 +996,7 @@ func (g *Generator) DropLinuxResourcesNetworkPriorities(name string) { // SetLinuxResourcesPidsLimit sets g.Config.Linux.Resources.Pids.Limit. func (g *Generator) SetLinuxResourcesPidsLimit(limit int64) { g.initConfigLinuxResourcesPids() - g.Config.Linux.Resources.Pids.Limit = limit + g.Config.Linux.Resources.Pids.Limit = &limit } // ClearLinuxSysctl clears g.Config.Linux.Sysctl. @@ -1060,13 +1086,13 @@ func (g *Generator) ClearPreStartHooks() { if g.Config == nil || g.Config.Hooks == nil { return } - g.Config.Hooks.Prestart = []rspec.Hook{} + g.Config.Hooks.Prestart = []rspec.Hook{} //nolint:staticcheck // Ignore SA1019: g.Config.Hooks.Prestart is deprecated } // AddPreStartHook add a prestart hook into g.Config.Hooks.Prestart. func (g *Generator) AddPreStartHook(preStartHook rspec.Hook) { g.initConfigHooks() - g.Config.Hooks.Prestart = append(g.Config.Hooks.Prestart, preStartHook) + g.Config.Hooks.Prestart = append(g.Config.Hooks.Prestart, preStartHook) //nolint:staticcheck // Ignore SA1019: g.Config.Hooks.Prestart is deprecated } // ClearPostStopHooks clear g.Config.Hooks.Poststop. diff --git a/go.mod b/go.mod index 59b6aab6..589147c7 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/moby/sys/capability v0.4.0 github.com/moby/sys/mountinfo v0.7.2 github.com/mrunalp/fileutils v0.5.0 - github.com/opencontainers/runtime-spec v1.1.0 + github.com/opencontainers/runtime-spec v1.3.0 github.com/opencontainers/selinux v1.9.1 github.com/sirupsen/logrus v1.8.1 github.com/stretchr/testify v1.3.0 diff --git a/go.sum b/go.sum index 08c9d701..5e1b762c 100644 --- a/go.sum +++ b/go.sum @@ -11,14 +11,14 @@ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+l github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b h1:Ga1nclDSe8gOw37MVLMhfu2QKWtD6gvtQ298zsKVh8g= github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs= -github.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg= -github.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4= github.com/moby/sys/capability v0.4.0 h1:4D4mI6KlNtWMCM1Z/K0i7RV1FkX+DBDHKVJpCndZoHk= github.com/moby/sys/capability v0.4.0/go.mod h1:4g9IK291rVkms3LKCDOoYlnV8xKwoDTpIrNEE35Wq0I= +github.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg= +github.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4= github.com/mrunalp/fileutils v0.5.0 h1:NKzVxiH7eSk+OQ4M+ZYW1K6h27RUV3MI6NUTsHhU6Z4= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= -github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg= -github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.3.0 h1:YZupQUdctfhpZy3TM39nN9Ika5CBWT5diQ8ibYCRkxg= +github.com/opencontainers/runtime-spec v1.3.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.9.1 h1:b4VPEF3O5JLZgdTDBmGepaaIbAo0GqoF6EBRq5f/g3Y= github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/validate/memorypolicy/validate.go b/validate/memorypolicy/validate.go new file mode 100644 index 00000000..7f6cc446 --- /dev/null +++ b/validate/memorypolicy/validate.go @@ -0,0 +1,67 @@ +package memorypolicy + +import ( + "fmt" + "strings" + + rspec "github.com/opencontainers/runtime-spec/specs-go" +) + +var ( + knownModes map[rspec.MemoryPolicyModeType]struct{} = map[rspec.MemoryPolicyModeType]struct{}{ + rspec.MpolDefault: {}, + rspec.MpolBind: {}, + rspec.MpolInterleave: {}, + rspec.MpolWeightedInterleave: {}, + rspec.MpolPreferred: {}, + rspec.MpolPreferredMany: {}, + rspec.MpolLocal: {}, + } + + knownModeFlags map[rspec.MemoryPolicyFlagType]struct{} = map[rspec.MemoryPolicyFlagType]struct{}{ + rspec.MpolFNumaBalancing: {}, + rspec.MpolFRelativeNodes: {}, + rspec.MpolFStaticNodes: {}, + } +) + +// MpolModeValid checks if the provided memory policy mode is valid. +func MpolModeValid(mode string) error { + if !strings.HasPrefix(mode, "MPOL_") { + return fmt.Errorf("memory policy mode %q must start with 'MPOL_'", mode) + } + if _, ok := knownModes[rspec.MemoryPolicyModeType(mode)]; !ok { + return fmt.Errorf("invalid memory policy mode %q", mode) + } + return nil +} + +// MpolModeNodesValid checks if the nodes specification is valid for the given memory policy mode. +func MpolModeNodesValid(mode rspec.MemoryPolicyModeType, nodes string) error { + switch mode { + case rspec.MpolDefault, rspec.MpolLocal: + if nodes != "" { + return fmt.Errorf("memory policy mode %q must not have nodes specified", mode) + } + case rspec.MpolBind, rspec.MpolInterleave, rspec.MpolWeightedInterleave, rspec.MpolPreferred, rspec.MpolPreferredMany: + if nodes == "" { + return fmt.Errorf("memory policy mode %q must have nodes specified", mode) + } + case "": + return fmt.Errorf("memory policy mode must be specified") + default: + return fmt.Errorf("unknown memory policy mode %q ", mode) + } + return nil +} + +// MpolFlagValid checks if the provided memory policy flag is valid. +func MpolFlagValid(flag string) error { + if !strings.HasPrefix(flag, "MPOL_F_") { + return fmt.Errorf("memory policy flag %q must start with 'MPOL_F_'", flag) + } + if _, ok := knownModeFlags[rspec.MemoryPolicyFlagType(flag)]; !ok { + return fmt.Errorf("invalid memory policy flag %q", flag) + } + return nil +} diff --git a/validate/validate.go b/validate/validate.go index 48fb52ca..82e7b49b 100644 --- a/validate/validate.go +++ b/validate/validate.go @@ -275,7 +275,7 @@ func (v *Validator) CheckHooks() (errs error) { } if v.spec.Hooks != nil { - errs = multierror.Append(errs, v.checkEventHooks("prestart", v.spec.Hooks.Prestart, v.HostSpecific)) + errs = multierror.Append(errs, v.checkEventHooks("prestart", v.spec.Hooks.Prestart, v.HostSpecific)) //nolint:staticcheck // Ignore SA1019: v.Spec.Hooks.Prestart is deprecated errs = multierror.Append(errs, v.checkEventHooks("poststart", v.spec.Hooks.Poststart, v.HostSpecific)) errs = multierror.Append(errs, v.checkEventHooks("poststop", v.spec.Hooks.Poststop, v.HostSpecific)) } diff --git a/validate/validate_linux.go b/validate/validate_linux.go index 2c7cdb75..91fe91e4 100644 --- a/validate/validate_linux.go +++ b/validate/validate_linux.go @@ -14,6 +14,7 @@ import ( rspec "github.com/opencontainers/runtime-spec/specs-go" osFilepath "github.com/opencontainers/runtime-tools/filepath" "github.com/opencontainers/runtime-tools/specerror" + mpolCheck "github.com/opencontainers/runtime-tools/validate/memorypolicy" "github.com/opencontainers/selinux/go-selinux/label" "github.com/sirupsen/logrus" ) @@ -220,5 +221,18 @@ func (v *Validator) CheckLinux() (errs error) { } } + if v.spec.Linux.MemoryPolicy != nil { + if err := mpolCheck.MpolModeValid(string(v.spec.Linux.MemoryPolicy.Mode)); err != nil { + errs = multierror.Append(errs, err) + } else if err := mpolCheck.MpolModeNodesValid(v.spec.Linux.MemoryPolicy.Mode, v.spec.Linux.MemoryPolicy.Nodes); err != nil { + errs = multierror.Append(errs, err) + } + for _, flag := range v.spec.Linux.MemoryPolicy.Flags { + if err := mpolCheck.MpolFlagValid(string(flag)); err != nil { + errs = multierror.Append(errs, err) + } + } + } + return } diff --git a/validation/util/linux_resources_memory.go b/validation/util/linux_resources_memory.go index 73de149d..305e89d1 100644 --- a/validation/util/linux_resources_memory.go +++ b/validation/util/linux_resources_memory.go @@ -31,11 +31,11 @@ func ValidateLinuxResourcesMemory(config *rspec.Spec, t *tap.T, state *rspec.Sta t.Ok(*lm.Swap == *config.Linux.Resources.Memory.Swap, "memory swap is set correctly") t.Diagnosticf("expect: %d, actual: %d", *config.Linux.Resources.Memory.Swap, *lm.Reservation) - t.Ok(*lm.Kernel == *config.Linux.Resources.Memory.Kernel, "memory kernel is set correctly") - t.Diagnosticf("expect: %d, actual: %d", *config.Linux.Resources.Memory.Kernel, *lm.Kernel) + t.Ok(*lm.Kernel == *config.Linux.Resources.Memory.Kernel, "memory kernel is set correctly") //nolint:staticcheck // Ignore SA1019: lm.Kernel is deprecated + t.Diagnosticf("expect: %d, actual: %d", *config.Linux.Resources.Memory.Kernel, *lm.Kernel) //nolint:staticcheck // Ignore SA1019: config.Linux.Resources.Memory.Kernel is deprecated t.Ok(*lm.KernelTCP == *config.Linux.Resources.Memory.KernelTCP, "memory kernelTCP is set correctly") - t.Diagnosticf("expect: %d, actual: %d", *config.Linux.Resources.Memory.KernelTCP, *lm.Kernel) + t.Diagnosticf("expect: %d, actual: %d", *config.Linux.Resources.Memory.KernelTCP, *lm.Kernel) //nolint:staticcheck // Ignore SA1019: lm.Kernel is deprecated t.Ok(*lm.Swappiness == *config.Linux.Resources.Memory.Swappiness, "memory swappiness is set correctly") t.Diagnosticf("expect: %d, actual: %d", *config.Linux.Resources.Memory.Swappiness, *lm.Swappiness) diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go index 4e7717d5..3ef33338 100644 --- a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go @@ -31,6 +31,8 @@ type Spec struct { VM *VM `json:"vm,omitempty" platform:"vm"` // ZOS is platform-specific configuration for z/OS based containers. ZOS *ZOS `json:"zos,omitempty" platform:"zos"` + // FreeBSD is platform-specific configuration for FreeBSD based containers. + FreeBSD *FreeBSD `json:"freebsd,omitempty" platform:"freebsd"` } // Scheduler represents the scheduling attributes for a process. It is based on @@ -83,7 +85,7 @@ type Process struct { // Rlimits specifies rlimit options to apply to the process. Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris,zos"` // NoNewPrivileges controls whether additional privileges could be gained by processes in the container. - NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"` + NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux,zos"` // ApparmorProfile specifies the apparmor profile for the container. ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"` // Specify an oom_score_adj for the container. @@ -94,10 +96,12 @@ type Process struct { SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"` // IOPriority contains the I/O priority settings for the cgroup. IOPriority *LinuxIOPriority `json:"ioPriority,omitempty" platform:"linux"` + // ExecCPUAffinity specifies CPU affinity for exec processes. + ExecCPUAffinity *CPUAffinity `json:"execCPUAffinity,omitempty" platform:"linux"` } // LinuxCapabilities specifies the list of allowed capabilities that are kept for a process. -// http://man7.org/linux/man-pages/man7/capabilities.7.html +// https://man7.org/linux/man-pages/man7/capabilities.7.html type LinuxCapabilities struct { // Bounding is the set of capabilities checked by the kernel. Bounding []string `json:"bounding,omitempty" platform:"linux"` @@ -127,6 +131,12 @@ const ( IOPRIO_CLASS_IDLE IOPriorityClass = "IOPRIO_CLASS_IDLE" ) +// CPUAffinity specifies process' CPU affinity. +type CPUAffinity struct { + Initial string `json:"initial,omitempty"` + Final string `json:"final,omitempty"` +} + // Box specifies dimensions of a rectangle. Used for specifying the size of a console. type Box struct { // Height is the vertical dimension of a box. @@ -162,7 +172,7 @@ type Mount struct { // Destination is the absolute path where the mount will be placed in the container. Destination string `json:"destination"` // Type specifies the mount kind. - Type string `json:"type,omitempty" platform:"linux,solaris,zos"` + Type string `json:"type,omitempty" platform:"linux,solaris,zos,freebsd"` // Source specifies the source path of the mount. Source string `json:"source,omitempty"` // Options are fstab style mount options. @@ -187,6 +197,10 @@ type Hook struct { type Hooks struct { // Prestart is Deprecated. Prestart is a list of hooks to be run before the container process is executed. // It is called in the Runtime Namespace + // + // Deprecated: use [Hooks.CreateRuntime], [Hooks.CreateContainer], and + // [Hooks.StartContainer] instead, which allow more granular hook control + // during the create and start phase. Prestart []Hook `json:"prestart,omitempty"` // CreateRuntime is a list of hooks to be run after the container has been created but before pivot_root or any equivalent operation has been called // It is called in the Runtime Namespace @@ -224,6 +238,8 @@ type Linux struct { Namespaces []LinuxNamespace `json:"namespaces,omitempty"` // Devices are a list of device nodes that are created for the container Devices []LinuxDevice `json:"devices,omitempty"` + // NetDevices are key-value pairs, keyed by network device name on the host, moved to the container's network namespace. + NetDevices map[string]LinuxNetDevice `json:"netDevices,omitempty"` // Seccomp specifies the seccomp security settings for the container. Seccomp *LinuxSeccomp `json:"seccomp,omitempty"` // RootfsPropagation is the rootfs mount propagation mode for the container. @@ -237,6 +253,8 @@ type Linux struct { // IntelRdt contains Intel Resource Director Technology (RDT) information for // handling resource constraints and monitoring metrics (e.g., L3 cache, memory bandwidth) for the container IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"` + // MemoryPolicy contains NUMA memory policy for the container. + MemoryPolicy *LinuxMemoryPolicy `json:"memoryPolicy,omitempty"` // Personality contains configuration for the Linux personality syscall Personality *LinuxPersonality `json:"personality,omitempty"` // TimeOffsets specifies the offset for supporting time namespaces. @@ -371,6 +389,12 @@ type LinuxMemory struct { // Total memory limit (memory + swap). Swap *int64 `json:"swap,omitempty"` // Kernel memory limit (in bytes). + // + // Deprecated: kernel-memory limits are not supported in cgroups v2, and + // were obsoleted in [kernel v5.4]. This field should no longer be used, + // as it may be ignored by runtimes. + // + // [kernel v5.4]: https://github.com/torvalds/linux/commit/0158115f702b0ba208ab0 Kernel *int64 `json:"kernel,omitempty"` // Kernel memory limit for tcp (in bytes) KernelTCP *int64 `json:"kernelTCP,omitempty"` @@ -412,7 +436,7 @@ type LinuxCPU struct { // LinuxPids for Linux cgroup 'pids' resource management (Linux 4.3) type LinuxPids struct { // Maximum number of PIDs. Default is "no limit". - Limit int64 `json:"limit"` + Limit *int64 `json:"limit,omitempty"` } // LinuxNetwork identification and priority configuration @@ -473,6 +497,12 @@ type LinuxDevice struct { GID *uint32 `json:"gid,omitempty"` } +// LinuxNetDevice represents a single network device to be added to the container's network namespace +type LinuxNetDevice struct { + // Name of the device in the container namespace + Name string `json:"name,omitempty"` +} + // LinuxDeviceCgroup represents a device rule for the devices specified to // the device controller type LinuxDeviceCgroup struct { @@ -617,6 +647,17 @@ type WindowsCPUResources struct { // cycles per 10,000 cycles. Set processor `maximum` to a percentage times // 100. Maximum *uint16 `json:"maximum,omitempty"` + // Set of CPUs to affinitize for this container. + Affinity []WindowsCPUGroupAffinity `json:"affinity,omitempty"` +} + +// Similar to _GROUP_AFFINITY struct defined in +// https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/miniport/ns-miniport-_group_affinity +type WindowsCPUGroupAffinity struct { + // CPU mask relative to this CPU group. + Mask uint64 `json:"mask,omitempty"` + // Processor group the mask refers to, as returned by GetLogicalProcessorInformationEx. + Group uint32 `json:"group,omitempty"` } // WindowsStorageResources contains storage resource management settings. @@ -649,6 +690,32 @@ type WindowsHyperV struct { UtilityVMPath string `json:"utilityVMPath,omitempty"` } +// IOMems contains information about iomem addresses that should be passed to the VM. +type IOMems struct { + // Guest Frame Number to map the iomem range. If GFN is not specified, the mapping will be done to the same Frame Number as was provided in FirstMFN. + FirstGFN *uint64 `json:"firstGFN,omitempty"` + // Physical page number of iomem regions. + FirstMFN *uint64 `json:"firstMFN"` + // Number of pages to be mapped. + NrMFNs *uint64 `json:"nrMFNs"` +} + +// Hardware configuration for the VM image +type HWConfig struct { + // Path to the container device-tree file that should be passed to the VM configuration. + DeviceTree string `json:"deviceTree,omitempty"` + // Number of virtual cpus for the VM. + VCPUs *uint32 `json:"vcpus,omitempty"` + // Maximum memory in bytes allocated to the VM. + Memory *uint64 `json:"memory,omitempty"` + // Host device tree nodes to passthrough to the VM. + DtDevs []string `json:"dtdevs,omitempty"` + // Allow auto-translated domains to access specific hardware I/O memory pages. + IOMems []IOMems `json:"iomems,omitempty"` + // Allows VM to access specific physical IRQs. + Irqs []uint32 `json:"irqs,omitempty"` +} + // VM contains information for virtual-machine-based containers. type VM struct { // Hypervisor specifies hypervisor-related configuration for virtual-machine-based containers. @@ -657,6 +724,8 @@ type VM struct { Kernel VMKernel `json:"kernel"` // Image specifies guest image related configuration for virtual-machine-based containers. Image VMImage `json:"image,omitempty"` + // Hardware configuration that should be passed to the VM. + HwConfig *HWConfig `json:"hwconfig,omitempty"` } // VMHypervisor contains information about the hypervisor to use for a virtual machine. @@ -741,6 +810,10 @@ const ( ArchPARISC Arch = "SCMP_ARCH_PARISC" ArchPARISC64 Arch = "SCMP_ARCH_PARISC64" ArchRISCV64 Arch = "SCMP_ARCH_RISCV64" + ArchLOONGARCH64 Arch = "SCMP_ARCH_LOONGARCH64" + ArchM68K Arch = "SCMP_ARCH_M68K" + ArchSH Arch = "SCMP_ARCH_SH" + ArchSHEB Arch = "SCMP_ARCH_SHEB" ) // LinuxSeccompAction taken upon Seccomp rule match @@ -795,49 +868,92 @@ type LinuxSyscall struct { type LinuxIntelRdt struct { // The identity for RDT Class of Service ClosID string `json:"closID,omitempty"` + + // Schemata specifies the complete schemata to be written as is to the + // schemata file in resctrl fs. Each element represents a single line in the schemata file. + // NOTE: This will overwrite schemas specified in the L3CacheSchema and/or + // MemBwSchema fields. + Schemata []string `json:"schemata,omitempty"` + // The schema for L3 cache id and capacity bitmask (CBM) // Format: "L3:=;=;..." + // NOTE: Should not be specified if Schemata is non-empty. L3CacheSchema string `json:"l3CacheSchema,omitempty"` // The schema of memory bandwidth per L3 cache id // Format: "MB:=bandwidth0;=bandwidth1;..." // The unit of memory bandwidth is specified in "percentages" by // default, and in "MBps" if MBA Software Controller is enabled. + // NOTE: Should not be specified if Schemata is non-empty. MemBwSchema string `json:"memBwSchema,omitempty"` - // EnableCMT is the flag to indicate if the Intel RDT CMT is enabled. CMT (Cache Monitoring Technology) supports monitoring of - // the last-level cache (LLC) occupancy for the container. - EnableCMT bool `json:"enableCMT,omitempty"` + // EnableMonitoring enables resctrl monitoring for the container. This will + // create a dedicated resctrl monitoring group for the container. + EnableMonitoring bool `json:"enableMonitoring,omitempty"` +} + +// LinuxMemoryPolicy represents input for the set_mempolicy syscall. +type LinuxMemoryPolicy struct { + // Mode for the set_mempolicy syscall. + Mode MemoryPolicyModeType `json:"mode"` - // EnableMBM is the flag to indicate if the Intel RDT MBM is enabled. MBM (Memory Bandwidth Monitoring) supports monitoring of - // total and local memory bandwidth for the container. - EnableMBM bool `json:"enableMBM,omitempty"` + // Nodes representing the nodemask for the set_mempolicy syscall in comma separated ranges format. + // Format: "-,,-,..." + Nodes string `json:"nodes"` + + // Flags for the set_mempolicy syscall. + Flags []MemoryPolicyFlagType `json:"flags,omitempty"` } // ZOS contains platform-specific configuration for z/OS based containers. type ZOS struct { - // Devices are a list of device nodes that are created for the container - Devices []ZOSDevice `json:"devices,omitempty"` + // Namespaces contains the namespaces that are created and/or joined by the container + Namespaces []ZOSNamespace `json:"namespaces,omitempty"` } -// ZOSDevice represents the mknod information for a z/OS special device file -type ZOSDevice struct { - // Path to the device. - Path string `json:"path"` - // Device type, block, char, etc. - Type string `json:"type"` - // Major is the device's major number. - Major int64 `json:"major"` - // Minor is the device's minor number. - Minor int64 `json:"minor"` - // FileMode permission bits for the device. - FileMode *os.FileMode `json:"fileMode,omitempty"` - // UID of the device. - UID *uint32 `json:"uid,omitempty"` - // Gid of the device. - GID *uint32 `json:"gid,omitempty"` +// ZOSNamespace is the configuration for a z/OS namespace +type ZOSNamespace struct { + // Type is the type of namespace + Type ZOSNamespaceType `json:"type"` + // Path is a path to an existing namespace persisted on disk that can be joined + // and is of the same type + Path string `json:"path,omitempty"` } +// ZOSNamespaceType is one of the z/OS namespaces +type ZOSNamespaceType string + +const ( + // PIDNamespace for isolating process IDs + ZOSPIDNamespace ZOSNamespaceType = "pid" + // MountNamespace for isolating mount points + ZOSMountNamespace ZOSNamespaceType = "mount" + // IPCNamespace for isolating System V IPC, POSIX message queues + ZOSIPCNamespace ZOSNamespaceType = "ipc" + // UTSNamespace for isolating hostname and NIS domain name + ZOSUTSNamespace ZOSNamespaceType = "uts" +) + +type MemoryPolicyModeType string + +const ( + MpolDefault MemoryPolicyModeType = "MPOL_DEFAULT" + MpolBind MemoryPolicyModeType = "MPOL_BIND" + MpolInterleave MemoryPolicyModeType = "MPOL_INTERLEAVE" + MpolWeightedInterleave MemoryPolicyModeType = "MPOL_WEIGHTED_INTERLEAVE" + MpolPreferred MemoryPolicyModeType = "MPOL_PREFERRED" + MpolPreferredMany MemoryPolicyModeType = "MPOL_PREFERRED_MANY" + MpolLocal MemoryPolicyModeType = "MPOL_LOCAL" +) + +type MemoryPolicyFlagType string + +const ( + MpolFNumaBalancing MemoryPolicyFlagType = "MPOL_F_NUMA_BALANCING" + MpolFRelativeNodes MemoryPolicyFlagType = "MPOL_F_RELATIVE_NODES" + MpolFStaticNodes MemoryPolicyFlagType = "MPOL_F_STATIC_NODES" +) + // LinuxSchedulerPolicy represents different scheduling policies used with the Linux Scheduler type LinuxSchedulerPolicy string @@ -877,3 +993,75 @@ const ( // SchedFlagUtilClampMin represents the utilization clamp maximum scheduling flag SchedFlagUtilClampMax LinuxSchedulerFlag = "SCHED_FLAG_UTIL_CLAMP_MAX" ) + +// FreeBSD contains platform-specific configuration for FreeBSD based containers. +type FreeBSD struct { + // Devices which are accessible in the container + Devices []FreeBSDDevice `json:"devices,omitempty"` + // Jail definition for this container + Jail *FreeBSDJail `json:"jail,omitempty"` +} + +type FreeBSDDevice struct { + // Path to the device, relative to /dev. + Path string `json:"path"` + // FileMode permission bits for the device. + Mode *os.FileMode `json:"mode,omitempty"` +} + +// FreeBSDJail describes how to configure the container's jail +type FreeBSDJail struct { + // Parent jail name - this can be used to share a single vnet + // across several containers + Parent string `json:"parent,omitempty"` + // Whether to use parent UTS names or override in the container + Host FreeBSDSharing `json:"host,omitempty"` + // IPv4 address sharing for the container + Ip4 FreeBSDSharing `json:"ip4,omitempty"` + // IPv4 addresses for the container + Ip4Addr []string `json:"ip4Addr,omitempty"` + // IPv6 address sharing for the container + Ip6 FreeBSDSharing `json:"ip6,omitempty"` + // IPv6 addresses for the container + Ip6Addr []string `json:"ip6Addr,omitempty"` + // Which network stack to use for the container + Vnet FreeBSDSharing `json:"vnet,omitempty"` + // If set, Ip4Addr and Ip6Addr addresses will be added to this interface + Interface string `json:"interface,omitempty"` + // List interfaces to be moved to the container's vnet + VnetInterfaces []string `json:"vnetInterfaces,omitempty"` + // SystemV IPC message sharing for the container + SysVMsg FreeBSDSharing `json:"sysvmsg,omitempty"` + // SystemV semaphore message sharing for the container + SysVSem FreeBSDSharing `json:"sysvsem,omitempty"` + // SystemV memory sharing for the container + SysVShm FreeBSDSharing `json:"sysvshm,omitempty"` + // Mount visibility (see jail(8) for details) + EnforceStatfs *int `json:"enforceStatfs,omitempty"` + // Jail capabilities + Allow *FreeBSDJailAllow `json:"allow,omitempty"` +} + +// These values are used to control access to features in the container, either +// disabling the feature, sharing state with the parent or creating new private +// state in the container. +type FreeBSDSharing string + +const ( + FreeBSDShareDisable FreeBSDSharing = "disable" + FreeBSDShareNew FreeBSDSharing = "new" + FreeBSDShareInherit FreeBSDSharing = "inherit" +) + +// FreeBSDJailAllow describes jail capabilities +type FreeBSDJailAllow struct { + SetHostname bool `json:"setHostname,omitempty"` + RawSockets bool `json:"rawSockets,omitempty"` + Chflags bool `json:"chflags,omitempty"` + Mount []string `json:"mount,omitempty"` + Quotas bool `json:"quotas,omitempty"` + SocketAf bool `json:"socketAf,omitempty"` + Mlock bool `json:"mlock,omitempty"` + ReservedPorts bool `json:"reservedPorts,omitempty"` + Suser bool `json:"suser,omitempty"` +} diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go index b3fca349..1fcae5bb 100644 --- a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go @@ -6,12 +6,12 @@ const ( // VersionMajor is for an API incompatible changes VersionMajor = 1 // VersionMinor is for functionality in a backwards-compatible manner - VersionMinor = 1 + VersionMinor = 3 // VersionPatch is for backwards-compatible bug fixes - VersionPatch = 0 + VersionPatch = 1 // VersionDev indicates development branch. Releases will be empty string. - VersionDev = "" + VersionDev = "+dev" ) // Version is the specification version that the package types support. diff --git a/vendor/modules.txt b/vendor/modules.txt index 0b8ddbc6..4373732b 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -25,7 +25,7 @@ github.com/moby/sys/mountinfo # github.com/mrunalp/fileutils v0.5.0 ## explicit; go 1.13 github.com/mrunalp/fileutils -# github.com/opencontainers/runtime-spec v1.1.0 +# github.com/opencontainers/runtime-spec v1.3.0 ## explicit github.com/opencontainers/runtime-spec/specs-go # github.com/opencontainers/selinux v1.9.1