From 1c7f51f6167f32edf5a415baf619b7fdc61cef4b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 12 Sep 2025 16:21:18 -0700 Subject: [PATCH] ioctl_sniffer: use seccomp_unotify to trap ioctl-s PiperOrigin-RevId: 806465209 --- pkg/abi/linux/ptrace_amd64.go | 4 + pkg/abi/linux/ptrace_arm64.go | 4 + pkg/abi/linux/seccomp.go | 1 + pkg/seccomp/BUILD | 1 + pkg/seccomp/seccomp.go | 22 +++- pkg/seccomp/seccomp_unsafe.go | 139 +++++++++++++++++++++++++ pkg/test/dockerutil/gpu.go | 4 + runsc/boot/filter/BUILD | 1 + runsc/boot/filter/filter.go | 13 ++- tools/ioctl_sniffer/BUILD | 3 + tools/ioctl_sniffer/run_sniffer.go | 76 ++++++++++++-- tools/ioctl_sniffer/sniffer/BUILD | 1 + tools/ioctl_sniffer/sniffer/sniffer.go | 67 ++++++++++++ 13 files changed, 325 insertions(+), 11 deletions(-) diff --git a/pkg/abi/linux/ptrace_amd64.go b/pkg/abi/linux/ptrace_amd64.go index e970b5b4aa..507130d12c 100644 --- a/pkg/abi/linux/ptrace_amd64.go +++ b/pkg/abi/linux/ptrace_amd64.go @@ -67,3 +67,7 @@ func (p *PtraceRegs) StackPointer() uint64 { func (p *PtraceRegs) SetStackPointer(sp uint64) { p.Rsp = sp } + +func (p *PtraceRegs) SyscallRet() uint64 { + return p.Rax +} diff --git a/pkg/abi/linux/ptrace_arm64.go b/pkg/abi/linux/ptrace_arm64.go index 14967f738a..f9e924db09 100644 --- a/pkg/abi/linux/ptrace_arm64.go +++ b/pkg/abi/linux/ptrace_arm64.go @@ -75,3 +75,7 @@ func (p *PtraceRegs) StackPointer() uint64 { func (p *PtraceRegs) SetStackPointer(sp uint64) { p.Sp = sp } + +func (p *PtraceRegs) SyscallRet() uint64 { + return p.Regs[0] +} diff --git a/pkg/abi/linux/seccomp.go b/pkg/abi/linux/seccomp.go index 81c307f2bb..14fe92f0c5 100644 --- a/pkg/abi/linux/seccomp.go +++ b/pkg/abi/linux/seccomp.go @@ -31,6 +31,7 @@ const ( SECCOMP_FILTER_FLAG_TSYNC = 1 SECCOMP_FILTER_FLAG_NEW_LISTENER = 1 << 3 + SECCOMP_FILTER_FLAG_TSYNC_ESRCH = 1 << 4 SECCOMP_USER_NOTIF_FLAG_CONTINUE = 1 diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD index 667428cc00..80d6aec233 100644 --- a/pkg/seccomp/BUILD +++ b/pkg/seccomp/BUILD @@ -20,6 +20,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/bpf", + "//pkg/hostsyscall", "//pkg/log", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index a26efcd6b7..22fdfcc967 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -82,8 +82,14 @@ func Install(rules SyscallRules, denyRules SyscallRules, options ProgramOptions) } // Perform the actual installation. - if err := SetFilter(instrs); err != nil { - return fmt.Errorf("failed to set filter: %v", err) + if options.LogNotifications { + if err := SetFilterAndLogNotifications(instrs, options); err != nil { + return fmt.Errorf("failed to set filter: %v", err) + } + } else { + if err := SetFilter(instrs); err != nil { + return fmt.Errorf("failed to set filter: %v", err) + } } log.Infof("Seccomp filters installed.") @@ -321,6 +327,17 @@ type ProgramOptions struct { // called >10% of the time out of all syscalls made). // It is ordered from most frequent to least frequent. HotSyscalls []uintptr + + // LogNotifications enables logging of user notifications at the + // warning level. Syscalls triggered notifications are not blocked. + LogNotifications bool + + // NotificationCallback is called when a blocked syscall is triggered. + NotificationCallback NotificationCallback + + // NotifyFDNum is a target number for the seccomp notify descriptor. + // It can be used in filters to allow ioctl-s on this file descriptor. + NotifyFDNum int } // DefaultProgramOptions returns the default program options. @@ -333,6 +350,7 @@ func DefaultProgramOptions() ProgramOptions { DefaultAction: action, BadArchAction: action, Optimize: true, + NotifyFDNum: -1, } } diff --git a/pkg/seccomp/seccomp_unsafe.go b/pkg/seccomp/seccomp_unsafe.go index 629ac5cca1..d621dcc252 100644 --- a/pkg/seccomp/seccomp_unsafe.go +++ b/pkg/seccomp/seccomp_unsafe.go @@ -16,14 +16,153 @@ package seccomp import ( "fmt" + "os" "runtime" + "syscall" "unsafe" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/hostsyscall" + "gvisor.dev/gvisor/pkg/log" ) +// NotificationCallback is a callback which is called when a blocked syscall is triggered. +type NotificationCallback func(f *os.File, req linux.SeccompNotif, ret int) + +// SetFilterAndLogNotifications installs the given BPF program and logs user +// notifications triggered by the seccomp filter. It allows the triggering +// syscalls to proceed without being blocked. +// +// This function is intended for debugging seccomp filter violations and should +// not be used in production environments. +// +// Note: It spawns a background goroutine to monitor a seccomp file descriptor +// and log any received notifications. +func SetFilterAndLogNotifications( + instrs []bpf.Instruction, + options ProgramOptions, +) error { + // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See + // seccomp(2) for details. + // + // PR_SET_NO_NEW_PRIVS is specific to the calling thread, not the whole + // thread group, so between PR_SET_NO_NEW_PRIVS and seccomp() below we must + // remain on the same thread. no_new_privs will be propagated to other + // threads in the thread group by seccomp(SECCOMP_FILTER_FLAG_TSYNC), in + // kernel/seccomp.c:seccomp_sync_threads(). + runtime.LockOSThread() + defer runtime.UnlockOSThread() + if _, _, errno := unix.RawSyscall6(unix.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); errno != 0 { + return errno + } + + sockProg := linux.SockFprog{ + Len: uint16(len(instrs)), + Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])), + } + flags := linux.SECCOMP_FILTER_FLAG_TSYNC | + linux.SECCOMP_FILTER_FLAG_NEW_LISTENER | + linux.SECCOMP_FILTER_FLAG_TSYNC_ESRCH | (1 << 5) + fd, errno := seccomp(linux.SECCOMP_SET_MODE_FILTER, uint32(flags), unsafe.Pointer(&sockProg)) + if errno != 0 { + return errno + } + if options.NotifyFDNum > 0 { + if err := unix.Dup2(int(fd), options.NotifyFDNum); err != nil { + panic(fmt.Sprintf("dup2 %d -> %d: %v", fd, options.NotifyFDNum, err)) + } + unix.Close(int(fd)) + fd = uintptr(options.NotifyFDNum) + } + f := os.NewFile(fd, "seccomp_notify") + go func() { + // LockOSThread should help minimizing interactions with the scheduler. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + var ( + req linux.SeccompNotif + resp linux.SeccompNotifResp + ) + for { + req = linux.SeccompNotif{} + _, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(f.Fd()), + uintptr(linux.SECCOMP_IOCTL_NOTIF_RECV), + uintptr(unsafe.Pointer(&req))) + if errno != 0 { + if errno == unix.EINTR { + continue + } + panic(fmt.Sprintf("SECCOMP_IOCTL_NOTIF_RECV failed with %d", errno)) + } + + attached := true + if errno := hostsyscall.RawSyscallErrno(unix.SYS_PTRACE, unix.PTRACE_ATTACH, uintptr(req.Pid), 0); errno != 0 { + log.Warningf("unable to attach: %v", errno) + attached = false + } + resp = linux.SeccompNotifResp{ + ID: req.ID, + Flags: linux.SECCOMP_USER_NOTIF_FLAG_CONTINUE, + } + errno = hostsyscall.RawSyscallErrno(unix.SYS_IOCTL, uintptr(f.Fd()), + uintptr(linux.SECCOMP_IOCTL_NOTIF_SEND), + uintptr(unsafe.Pointer(&resp))) + if errno != 0 { + panic(fmt.Sprintf("SECCOMP_IOCTL_NOTIF_SEND failed with %d", errno)) + } + if !attached { + if options.NotificationCallback != nil { + options.NotificationCallback(f, req, 0) + } else { + log.Warningf("Seccomp violation: %#v", req) + } + continue + } + for { + var info unix.Siginfo + errno := unix.Waitid(unix.P_PID, int(req.Pid), &info, syscall.WALL|syscall.WEXITED, nil) + if errno == syscall.EINTR { + continue + } else if errno != nil { + log.Warningf("failed to wait for the child process: %v", errno) + } + break + } + ret := 0 + { + var regs linux.PtraceRegs + iovec := unix.Iovec{ + Base: (*byte)(unsafe.Pointer(®s)), + Len: uint64(unsafe.Sizeof(regs)), + } + _, _, errno := unix.RawSyscall6( + unix.SYS_PTRACE, + unix.PTRACE_GETREGSET, + uintptr(req.Pid), + linux.NT_PRSTATUS, + uintptr(unsafe.Pointer(&iovec)), + 0, 0) + if errno != 0 { + log.Warningf("unable to get registers: %s", errno) + } + ret = int(regs.SyscallRet()) + } + + if options.NotificationCallback != nil { + options.NotificationCallback(f, req, ret) + } else { + log.Warningf("Seccomp violation: %#v", req) + } + if errno := hostsyscall.RawSyscallErrno(unix.SYS_PTRACE, unix.PTRACE_DETACH, uintptr(req.Pid), 0); errno != 0 { + panic(fmt.Sprintf("unable to detach: %v", errno)) + } + } + }() + return nil +} + // SetFilter installs the given BPF program. func SetFilter(instrs []bpf.Instruction) error { // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See diff --git a/pkg/test/dockerutil/gpu.go b/pkg/test/dockerutil/gpu.go index 3b89d3f14d..7eed648c77 100644 --- a/pkg/test/dockerutil/gpu.go +++ b/pkg/test/dockerutil/gpu.go @@ -193,6 +193,10 @@ func (sgo *SniffGPUOpts) GPUCapabilities() string { // prepend prepends the sniffer arguments to the given command. func (sgo *SniffGPUOpts) prepend(argv []string) []string { + if *runtime != "" && *runtime != "runc" { + // ioctl_sniffer isn't supported in gVisor. + return argv + } if sgo.DisableSnifferReason != "" { return argv } diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index 81642d1a6d..d96767743f 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -32,6 +32,7 @@ go_library( "//pkg/seccomp/precompiledseccomp", "//pkg/sync", "//runsc/boot/filter/config", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go index 8c5cdd35c0..0df512fe5b 100644 --- a/runsc/boot/filter/filter.go +++ b/runsc/boot/filter/filter.go @@ -29,7 +29,10 @@ import ( // If you suspect the Sentry is getting killed due to a seccomp violation, // change this to `true` to get a panic stack trace when there is a // violation. -const debugFilter = false +const ( + debugFilterPanic = false // Panic on seccomp violation with stack trace. + debugFilterWarn = false // Log seccomp violation, but continue program execution. +) // Options is a re-export of the config Options type under this package. type Options = config.Options @@ -41,7 +44,7 @@ func Install(opt Options) error { } key := opt.ConfigKey() precompiled, usePrecompiled := GetPrecompiled(key) - if usePrecompiled && !debugFilter { + if usePrecompiled && !debugFilterPanic && !debugFilterWarn { vars := opt.Vars() log.Debugf("Loaded precompiled seccomp instructions for options %v, using variables: %v", key, vars) insns, err := precompiled.RenderInstructions(vars) @@ -51,9 +54,13 @@ func Install(opt Options) error { return seccomp.SetFilter(insns) } seccompOpts := config.SeccompOptions(opt) - if debugFilter { + if debugFilterPanic { log.Infof("Seccomp filter debugging is enabled; seccomp failures will result in a panic stack trace.") seccompOpts.DefaultAction = linux.SECCOMP_RET_TRAP + } else if debugFilterWarn { + log.Infof("Seccomp filter debugging is enabled; seccomp failures will be logged") + seccompOpts.DefaultAction = linux.SECCOMP_RET_USER_NOTIF + seccompOpts.LogNotifications = true } else { log.Infof("No precompiled program found for config options %v, building seccomp program from scratch. This may slow down container startup.", key) if log.IsLogging(log.Debug) { diff --git a/tools/ioctl_sniffer/BUILD b/tools/ioctl_sniffer/BUILD index acefa4d80d..4ed3e0af3c 100644 --- a/tools/ioctl_sniffer/BUILD +++ b/tools/ioctl_sniffer/BUILD @@ -54,7 +54,10 @@ go_binary( "//:sandbox", ], deps = [ + "//pkg/abi/linux", "//pkg/log", + "//pkg/seccomp", "//tools/ioctl_sniffer/sniffer", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/tools/ioctl_sniffer/run_sniffer.go b/tools/ioctl_sniffer/run_sniffer.go index 54c9f8a1c5..2c3275b137 100644 --- a/tools/ioctl_sniffer/run_sniffer.go +++ b/tools/ioctl_sniffer/run_sniffer.go @@ -21,8 +21,13 @@ import ( "fmt" "os" "os/exec" + "syscall" + "time" + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/seccomp" "gvisor.dev/gvisor/tools/ioctl_sniffer/sniffer" _ "embed" // Necessary to use go:embed. @@ -32,6 +37,7 @@ var ( enforceCompatibility = flag.String("enforce_compatibility", "", "May be set to 'INSTANT' or 'REPORT'. If set, the sniffer will return a non-zero error code if it detects an unsupported ioctl. 'INSTANT' causes the sniffer to exit immediately when this happens. 'REPORT' causes the sniffer to report all unsupported ioctls at the end of execution.") verbose = flag.Bool("verbose", false, "If true, the sniffer will print all Nvidia ioctls it sees.") addLdPath = flag.String("add_ld_path", "", "If set, reconfigure the ld cache to include the given directory") + seccompMode = flag.Bool("seccomp", true, "If true, seccomp is used to trace ioctl system calls.") ) //go:embed libioctl_hook.so @@ -106,23 +112,80 @@ func Main(ctx context.Context) error { } }() + seccompResults := sniffer.NewResults() + if *seccompMode { + // All ioctl-s to notifyFD are not traced, because it is used to + // recv seccomp notification request via ioctl-s. That can be + // fixed if seccomp rules will be installed just for a target + // process. + const notifyFD = 888 + + options := seccomp.ProgramOptions{ + DefaultAction: linux.SECCOMP_RET_ALLOW, + LogNotifications: true, + NotificationCallback: func(f *os.File, req linux.SeccompNotif, ret int) { + sniffer.ServeSeccompRequest(seccompResults, req, ret) + }, + NotifyFDNum: notifyFD, + } + instrs, _, err := seccomp.BuildProgram([]seccomp.RuleSet{ + { + Rules: seccomp.MakeSyscallRules(map[uintptr]seccomp.SyscallRule{ + unix.SYS_IOCTL: seccomp.PerArg{ + seccomp.NotEqual(notifyFD), + }, + }), + Action: linux.SECCOMP_RET_USER_NOTIF, + }, + }, + options, + ) + if err != nil { + return err + } + if err := seccomp.SetFilterAndLogNotifications(instrs, options); err != nil { + return fmt.Errorf("failed to set filter: %v", err) + } + } + // Set up command from flags cmd := exec.Command(flag.Arg(0), flag.Args()[1:]...) cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr - // Refer to the hook file by file descriptor here as its named file no - // longer exists. - cmd.Env = append(os.Environ(), - fmt.Sprintf("LD_PRELOAD=/proc/%d/fd/%d", os.Getpid(), hookFile.Fd()), - fmt.Sprintf("GVISOR_IOCTL_SNIFFER_SOCKET_PATH=%v", server.Addr()), - fmt.Sprintf("GVISOR_IOCTL_SNIFFER_ENFORCE_COMPATIBILITY=%s", *enforceCompatibility)) + if !*seccompMode { + // Refer to the hook file by file descriptor here as its named file no + // longer exists. + cmd.Env = append(os.Environ(), + fmt.Sprintf("LD_PRELOAD=/proc/%d/fd/%d", os.Getpid(), hookFile.Fd()), + fmt.Sprintf("GVISOR_IOCTL_SNIFFER_SOCKET_PATH=%v", server.Addr()), + fmt.Sprintf("GVISOR_IOCTL_SNIFFER_ENFORCE_COMPATIBILITY=%s", *enforceCompatibility)) + } + go func() { + time.Sleep(600 * time.Second) + panic("timeout") + }() // Run the command and start reading the output. if err := cmd.Start(); err != nil { return fmt.Errorf("failed to run command: %w", err) } +waitLoop: + for { + var info unix.Siginfo + errno := unix.Waitid(unix.P_PID, cmd.Process.Pid, &info, syscall.WEXITED|syscall.WNOWAIT, nil) + if errno == syscall.EINTR { + continue + } else if errno != nil { + return fmt.Errorf("failed to wait for the child process: %v", errno) + } + switch info.Code { + case linux.CLD_EXITED, linux.CLD_KILLED, linux.CLD_DUMPED: + break waitLoop + } + } + // Once our command is done, we can close the sniffer server and print the // results. cmdErr := cmd.Wait() @@ -130,6 +193,7 @@ func Main(ctx context.Context) error { // Merge results from each connection. finalResults := server.AllResults() + finalResults.Merge(seccompResults) if finalResults.HasUnsupportedIoctl() { if *enforceCompatibility != "" { return fmt.Errorf("unsupported ioctls found: %v", finalResults) diff --git a/tools/ioctl_sniffer/sniffer/BUILD b/tools/ioctl_sniffer/sniffer/BUILD index fddfcd84ea..2309feb5bb 100644 --- a/tools/ioctl_sniffer/sniffer/BUILD +++ b/tools/ioctl_sniffer/sniffer/BUILD @@ -20,5 +20,6 @@ go_library( "//pkg/sentry/devices/nvproxy/nvconf", "//tools/ioctl_sniffer:ioctl_go_proto", "@org_golang_google_protobuf//proto:go_default_library", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/tools/ioctl_sniffer/sniffer/sniffer.go b/tools/ioctl_sniffer/sniffer/sniffer.go index f2aff220fa..cd2a6ae2ce 100644 --- a/tools/ioctl_sniffer/sniffer/sniffer.go +++ b/tools/ioctl_sniffer/sniffer/sniffer.go @@ -23,12 +23,15 @@ import ( "os" "regexp" "strings" + "unsafe" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/abi/nvgpu" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/devices/nvproxy" "gvisor.dev/gvisor/pkg/sentry/devices/nvproxy/nvconf" + pb "gvisor.dev/gvisor/tools/ioctl_sniffer/ioctl_go_proto" ) @@ -258,6 +261,70 @@ func (c Connection) ReadHookOutput(ctx context.Context) *Results { return res } +func ServeSeccompRequest(res *Results, req linux.SeccompNotif, ret int) { + pid := req.Pid + fd := req.Data.Args[0] + cmd := req.Data.Args[1] + ioctlPB := &pb.Ioctl{ + Request: cmd, + Ret: int32(ret), + } + + path := fmt.Sprintf("/proc/%d/fd/%d", pid, fd) + fileName, err := os.Readlink(path) + if err != nil { + log.Warningf("Error getting descriptor path for ioctl %v: %v", cmd, err) + return + } + if !strings.HasPrefix(fileName, "/dev/nvidia") { + return + } + ioctlPB.FdPath = fileName + + size := linux.IOC_SIZE(uint32(cmd)) + if size != 0 && !strings.HasPrefix(fileName, "/dev/nvidia-uvm") { + localBuffer := make([]byte, size) + + localIov := unix.Iovec{ + Base: &localBuffer[0], + Len: uint64(size), + } + + remoteIov := unix.Iovec{ + Base: (*byte)(unsafe.Pointer(uintptr(req.Data.Args[2]))), + Len: uint64(size), + } + + bytesRead, _, errno := unix.Syscall6( + unix.SYS_PROCESS_VM_READV, + uintptr(pid), + uintptr(unsafe.Pointer(&localIov)), + 1, // Number of local iovec structures + uintptr(unsafe.Pointer(&remoteIov)), + 1, // Number of remote iovec structures + 0, // flags + ) + if errno != 0 { + log.Warningf("Error getting request (addr %x size %x) for ioctl %v: %s", req.Data.Args[2], size, cmd, errno) + } else { + ioctlPB.ArgData = localBuffer[:bytesRead] + } + } + ioctl, err := ParseIoctlOutput(ioctlPB) + if err != nil { + log.Warningf("Error parsing ioctl %v: %v", ioctlPB, err) + return + } + + if !ioctl.IsSupported() { + res.AddUnsupportedIoctl(ioctl) + if crashOnUnsupportedIoctl { + log.Warningf("Unsupported ioctl found; crashing immediately: %v", ioctl) + os.Exit(1) + } + } +} + // ParseIoctlOutput parses an ioctl protobuf from the ioctl hook. func ParseIoctlOutput(ioctl *pb.Ioctl) (Ioctl, error) { parsedIoctl := Ioctl{pb: ioctl}