@@ -16,14 +16,153 @@ package seccomp
16
16
17
17
import (
18
18
"fmt"
19
+ "os"
19
20
"runtime"
21
+ "syscall"
20
22
"unsafe"
21
23
22
24
"golang.org/x/sys/unix"
23
25
"gvisor.dev/gvisor/pkg/abi/linux"
24
26
"gvisor.dev/gvisor/pkg/bpf"
27
+ "gvisor.dev/gvisor/pkg/hostsyscall"
28
+ "gvisor.dev/gvisor/pkg/log"
25
29
)
26
30
31
+ // NotificationCallback is a callback which is called when a blocked syscall is triggered.
32
+ type NotificationCallback func (f * os.File , req linux.SeccompNotif , ret int )
33
+
34
+ // SetFilterAndLogNotifications installs the given BPF program and logs user
35
+ // notifications triggered by the seccomp filter. It allows the triggering
36
+ // syscalls to proceed without being blocked.
37
+ //
38
+ // This function is intended for debugging seccomp filter violations and should
39
+ // not be used in production environments.
40
+ //
41
+ // Note: It spawns a background goroutine to monitor a seccomp file descriptor
42
+ // and log any received notifications.
43
+ func SetFilterAndLogNotifications (
44
+ instrs []bpf.Instruction ,
45
+ options ProgramOptions ,
46
+ ) error {
47
+ // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See
48
+ // seccomp(2) for details.
49
+ //
50
+ // PR_SET_NO_NEW_PRIVS is specific to the calling thread, not the whole
51
+ // thread group, so between PR_SET_NO_NEW_PRIVS and seccomp() below we must
52
+ // remain on the same thread. no_new_privs will be propagated to other
53
+ // threads in the thread group by seccomp(SECCOMP_FILTER_FLAG_TSYNC), in
54
+ // kernel/seccomp.c:seccomp_sync_threads().
55
+ runtime .LockOSThread ()
56
+ defer runtime .UnlockOSThread ()
57
+ if _ , _ , errno := unix .RawSyscall6 (unix .SYS_PRCTL , linux .PR_SET_NO_NEW_PRIVS , 1 , 0 , 0 , 0 , 0 ); errno != 0 {
58
+ return errno
59
+ }
60
+
61
+ sockProg := linux.SockFprog {
62
+ Len : uint16 (len (instrs )),
63
+ Filter : (* linux .BPFInstruction )(unsafe .Pointer (& instrs [0 ])),
64
+ }
65
+ flags := linux .SECCOMP_FILTER_FLAG_TSYNC |
66
+ linux .SECCOMP_FILTER_FLAG_NEW_LISTENER |
67
+ linux .SECCOMP_FILTER_FLAG_TSYNC_ESRCH | (1 << 5 )
68
+ fd , errno := seccomp (linux .SECCOMP_SET_MODE_FILTER , uint32 (flags ), unsafe .Pointer (& sockProg ))
69
+ if errno != 0 {
70
+ return errno
71
+ }
72
+ if options .NotifyFDNum > 0 {
73
+ if err := unix .Dup2 (int (fd ), options .NotifyFDNum ); err != nil {
74
+ panic (fmt .Sprintf ("dup2 %d -> %d: %v" , fd , options .NotifyFDNum , err ))
75
+ }
76
+ unix .Close (int (fd ))
77
+ fd = uintptr (options .NotifyFDNum )
78
+ }
79
+ f := os .NewFile (fd , "seccomp_notify" )
80
+ go func () {
81
+ // LockOSThread should help minimizing interactions with the scheduler.
82
+ runtime .LockOSThread ()
83
+ defer runtime .UnlockOSThread ()
84
+ var (
85
+ req linux.SeccompNotif
86
+ resp linux.SeccompNotifResp
87
+ )
88
+ for {
89
+ req = linux.SeccompNotif {}
90
+ _ , _ , errno := unix .Syscall (unix .SYS_IOCTL , uintptr (f .Fd ()),
91
+ uintptr (linux .SECCOMP_IOCTL_NOTIF_RECV ),
92
+ uintptr (unsafe .Pointer (& req )))
93
+ if errno != 0 {
94
+ if errno == unix .EINTR {
95
+ continue
96
+ }
97
+ panic (fmt .Sprintf ("SECCOMP_IOCTL_NOTIF_RECV failed with %d" , errno ))
98
+ }
99
+
100
+ attached := true
101
+ if errno := hostsyscall .RawSyscallErrno (unix .SYS_PTRACE , unix .PTRACE_ATTACH , uintptr (req .Pid ), 0 ); errno != 0 {
102
+ log .Warningf ("unable to attach: %v" , errno )
103
+ attached = false
104
+ }
105
+ resp = linux.SeccompNotifResp {
106
+ ID : req .ID ,
107
+ Flags : linux .SECCOMP_USER_NOTIF_FLAG_CONTINUE ,
108
+ }
109
+ errno = hostsyscall .RawSyscallErrno (unix .SYS_IOCTL , uintptr (f .Fd ()),
110
+ uintptr (linux .SECCOMP_IOCTL_NOTIF_SEND ),
111
+ uintptr (unsafe .Pointer (& resp )))
112
+ if errno != 0 {
113
+ panic (fmt .Sprintf ("SECCOMP_IOCTL_NOTIF_SEND failed with %d" , errno ))
114
+ }
115
+ if ! attached {
116
+ if options .NotificationCallback != nil {
117
+ options .NotificationCallback (f , req , 0 )
118
+ } else {
119
+ log .Warningf ("Seccomp violation: %#v" , req )
120
+ }
121
+ continue
122
+ }
123
+ for {
124
+ var info unix.Siginfo
125
+ errno := unix .Waitid (unix .P_PID , int (req .Pid ), & info , syscall .WALL | syscall .WEXITED , nil )
126
+ if errno == syscall .EINTR {
127
+ continue
128
+ } else if errno != nil {
129
+ log .Warningf ("failed to wait for the child process: %v" , errno )
130
+ }
131
+ break
132
+ }
133
+ ret := 0
134
+ {
135
+ var regs linux.PtraceRegs
136
+ iovec := unix.Iovec {
137
+ Base : (* byte )(unsafe .Pointer (& regs )),
138
+ Len : uint64 (unsafe .Sizeof (regs )),
139
+ }
140
+ _ , _ , errno := unix .RawSyscall6 (
141
+ unix .SYS_PTRACE ,
142
+ unix .PTRACE_GETREGSET ,
143
+ uintptr (req .Pid ),
144
+ linux .NT_PRSTATUS ,
145
+ uintptr (unsafe .Pointer (& iovec )),
146
+ 0 , 0 )
147
+ if errno != 0 {
148
+ log .Warningf ("unable to get registers: %s" , errno )
149
+ }
150
+ ret = int (regs .SyscallRet ())
151
+ }
152
+
153
+ if options .NotificationCallback != nil {
154
+ options .NotificationCallback (f , req , ret )
155
+ } else {
156
+ log .Warningf ("Seccomp violation: %#v" , req )
157
+ }
158
+ if errno := hostsyscall .RawSyscallErrno (unix .SYS_PTRACE , unix .PTRACE_DETACH , uintptr (req .Pid ), 0 ); errno != 0 {
159
+ panic (fmt .Sprintf ("unable to detach: %v" , errno ))
160
+ }
161
+ }
162
+ }()
163
+ return nil
164
+ }
165
+
27
166
// SetFilter installs the given BPF program.
28
167
func SetFilter (instrs []bpf.Instruction ) error {
29
168
// PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See
0 commit comments