@@ -16,14 +16,158 @@ package seccomp
16
16
17
17
import (
18
18
"fmt"
19
+ "os"
19
20
"runtime"
21
+ "syscall"
22
+ "time"
20
23
"unsafe"
21
24
22
25
"golang.org/x/sys/unix"
23
26
"gvisor.dev/gvisor/pkg/abi/linux"
24
27
"gvisor.dev/gvisor/pkg/bpf"
28
+ "gvisor.dev/gvisor/pkg/hostsyscall"
29
+ "gvisor.dev/gvisor/pkg/log"
25
30
)
26
31
32
+ // NotificationCallback is a callback which is called when a blocked syscall is triggered.
33
+ type NotificationCallback func (f * os.File , req linux.SeccompNotif , ret int )
34
+
35
+ // SetFilterAndLogNotifications installs the given BPF program and logs user
36
+ // notifications triggered by the seccomp filter. It allows the triggering
37
+ // syscalls to proceed without being blocked.
38
+ //
39
+ // This function is intended for debugging seccomp filter violations and should
40
+ // not be used in production environments.
41
+ //
42
+ // Note: It spawns a background goroutine to monitor a seccomp file descriptor
43
+ // and log any received notifications.
44
+ func SetFilterAndLogNotifications (
45
+ instrs []bpf.Instruction ,
46
+ options ProgramOptions ,
47
+ ) error {
48
+ // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See
49
+ // seccomp(2) for details.
50
+ //
51
+ // PR_SET_NO_NEW_PRIVS is specific to the calling thread, not the whole
52
+ // thread group, so between PR_SET_NO_NEW_PRIVS and seccomp() below we must
53
+ // remain on the same thread. no_new_privs will be propagated to other
54
+ // threads in the thread group by seccomp(SECCOMP_FILTER_FLAG_TSYNC), in
55
+ // kernel/seccomp.c:seccomp_sync_threads().
56
+ runtime .LockOSThread ()
57
+ defer runtime .UnlockOSThread ()
58
+ if _ , _ , errno := unix .RawSyscall6 (unix .SYS_PRCTL , linux .PR_SET_NO_NEW_PRIVS , 1 , 0 , 0 , 0 , 0 ); errno != 0 {
59
+ return errno
60
+ }
61
+
62
+ sockProg := linux.SockFprog {
63
+ Len : uint16 (len (instrs )),
64
+ Filter : (* linux .BPFInstruction )(unsafe .Pointer (& instrs [0 ])),
65
+ }
66
+ flags := linux .SECCOMP_FILTER_FLAG_TSYNC |
67
+ linux .SECCOMP_FILTER_FLAG_NEW_LISTENER |
68
+ linux .SECCOMP_FILTER_FLAG_TSYNC_ESRCH | (1 << 5 )
69
+ fd , errno := seccomp (linux .SECCOMP_SET_MODE_FILTER , uint32 (flags ), unsafe .Pointer (& sockProg ))
70
+ if errno != 0 {
71
+ return errno
72
+ }
73
+ if options .NotifyFDNum > 0 {
74
+ if err := unix .Dup2 (int (fd ), options .NotifyFDNum ); err != nil {
75
+ panic (fmt .Sprintf ("dup2 %d -> %d: %v" , fd , options .NotifyFDNum , err ))
76
+ }
77
+ unix .Close (int (fd ))
78
+ fd = uintptr (options .NotifyFDNum )
79
+ }
80
+ f := os .NewFile (fd , "seccomp_notify" )
81
+ go func () {
82
+ // LockOSThread should help minimizing interactions with the scheduler.
83
+ runtime .LockOSThread ()
84
+ defer runtime .UnlockOSThread ()
85
+ var (
86
+ req linux.SeccompNotif
87
+ resp linux.SeccompNotifResp
88
+ )
89
+ for {
90
+ req = linux.SeccompNotif {}
91
+ _ , _ , errno := unix .Syscall (unix .SYS_IOCTL , uintptr (f .Fd ()),
92
+ uintptr (linux .SECCOMP_IOCTL_NOTIF_RECV ),
93
+ uintptr (unsafe .Pointer (& req )))
94
+ if errno != 0 {
95
+ if errno == unix .EINTR {
96
+ continue
97
+ }
98
+ panic (fmt .Sprintf ("SECCOMP_IOCTL_NOTIF_RECV failed with %d" , errno ))
99
+ }
100
+
101
+ log .Warningf ("req %#v" , req )
102
+ attached := true
103
+ if errno := hostsyscall .RawSyscallErrno (unix .SYS_PTRACE , unix .PTRACE_ATTACH , uintptr (req .Pid ), 0 ); errno != 0 {
104
+ log .Warningf ("unable to attach: %v" , errno )
105
+ attached = false
106
+ }
107
+ resp = linux.SeccompNotifResp {
108
+ ID : req .ID ,
109
+ Flags : linux .SECCOMP_USER_NOTIF_FLAG_CONTINUE ,
110
+ }
111
+ errno = hostsyscall .RawSyscallErrno (unix .SYS_IOCTL , uintptr (f .Fd ()),
112
+ uintptr (linux .SECCOMP_IOCTL_NOTIF_SEND ),
113
+ uintptr (unsafe .Pointer (& resp )))
114
+ if errno != 0 {
115
+ panic (fmt .Sprintf ("SECCOMP_IOCTL_NOTIF_SEND failed with %d" , errno ))
116
+ }
117
+ if ! attached {
118
+ if options .NotificationCallback != nil {
119
+ options .NotificationCallback (f , req , 0 )
120
+ } else {
121
+ log .Warningf ("Seccomp violation: %#v" , req )
122
+ }
123
+ continue
124
+ }
125
+ time .Sleep (1 * time .Second )
126
+ for {
127
+ var info unix.Siginfo
128
+ errno := unix .Waitid (unix .P_PID , int (req .Pid ), & info , syscall .WALL | syscall .WEXITED , nil )
129
+ if errno == syscall .EINTR {
130
+ continue
131
+ } else if errno != nil {
132
+ log .Warningf ("failed to wait for the child process: %v" , errno )
133
+ }
134
+ log .Warningf ("%d: stopped -> %x" , req .Pid , info .Code )
135
+ break
136
+ }
137
+ ret := 0
138
+ {
139
+ var regs linux.PtraceRegs
140
+ iovec := unix.Iovec {
141
+ Base : (* byte )(unsafe .Pointer (& regs )),
142
+ Len : uint64 (unsafe .Sizeof (regs )),
143
+ }
144
+ _ , _ , errno := unix .RawSyscall6 (
145
+ unix .SYS_PTRACE ,
146
+ unix .PTRACE_GETREGSET ,
147
+ uintptr (req .Pid ),
148
+ linux .NT_PRSTATUS ,
149
+ uintptr (unsafe .Pointer (& iovec )),
150
+ 0 , 0 )
151
+ if errno != 0 {
152
+ log .Warningf ("unable to get registers: %s" , errno )
153
+ }
154
+ ret = int (regs .SyscallRet ())
155
+ }
156
+
157
+ if options .NotificationCallback != nil {
158
+ options .NotificationCallback (f , req , ret )
159
+ } else {
160
+ log .Warningf ("Seccomp violation: %#v" , req )
161
+ }
162
+ log .Warningf ("detach from %d" , req .Pid )
163
+ if errno := hostsyscall .RawSyscallErrno (unix .SYS_PTRACE , unix .PTRACE_DETACH , uintptr (req .Pid ), 0 ); errno != 0 {
164
+ panic (fmt .Sprintf ("unable to detach: %v" , errno ))
165
+ }
166
+ }
167
+ }()
168
+ return nil
169
+ }
170
+
27
171
// SetFilter installs the given BPF program.
28
172
func SetFilter (instrs []bpf.Instruction ) error {
29
173
// PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See
0 commit comments