@@ -24,12 +24,12 @@ use futures::never::Never;
24
24
use std:: {
25
25
any:: Any ,
26
26
fmt,
27
- os:: unix:: net:: UnixStream as StdUnixStream ,
27
+ os:: unix:: net:: UnixStream ,
28
28
path:: PathBuf ,
29
29
sync:: mpsc:: { Receiver , RecvTimeoutError } ,
30
30
time:: Duration ,
31
31
} ;
32
- use tokio:: { io, net :: UnixStream , runtime:: Runtime } ;
32
+ use tokio:: { io, runtime:: Runtime } ;
33
33
34
34
/// Use this macro to declare a `fn main() {}` that will create an executable that can be used for
35
35
/// spawning the desired worker.
@@ -50,6 +50,8 @@ macro_rules! decl_worker_main {
50
50
// See <https://github.com/paritytech/polkadot/issues/7117>.
51
51
$crate:: sp_tracing:: try_init_simple( ) ;
52
52
53
+ let worker_pid = std:: process:: id( ) ;
54
+
53
55
let args = std:: env:: args( ) . collect:: <Vec <_>>( ) ;
54
56
if args. len( ) == 1 {
55
57
print_help( $expected_command) ;
@@ -75,20 +77,25 @@ macro_rules! decl_worker_main {
75
77
} ,
76
78
"--check-can-unshare-user-namespace-and-change-root" => {
77
79
#[ cfg( target_os = "linux" ) ]
78
- let status = if security:: unshare_user_namespace_and_change_root(
79
- $crate:: worker:: WorkerKind :: Execute ,
80
+ let status = if let Err ( err) = security:: unshare_user_namespace_and_change_root(
81
+ $crate:: worker:: WorkerKind :: CheckPivotRoot ,
82
+ worker_pid,
80
83
// We're not accessing any files, so we can try to pivot_root in the temp
81
84
// dir without conflicts with other processes.
82
85
& std:: env:: temp_dir( ) ,
83
- )
84
- . is_ok( )
85
- {
86
- 0
87
- } else {
86
+ ) {
87
+ // Write the error to stderr, log it on the host-side.
88
+ eprintln!( "{}" , err) ;
88
89
-1
90
+ } else {
91
+ 0
89
92
} ;
90
93
#[ cfg( not( target_os = "linux" ) ) ]
91
- let status = -1 ;
94
+ let status = {
95
+ // Write the error to stderr, log it on the host-side.
96
+ eprintln!( "not available on macos" ) ;
97
+ -1
98
+ } ;
92
99
std:: process:: exit( status)
93
100
} ,
94
101
@@ -153,13 +160,15 @@ pub const JOB_TIMEOUT_OVERHEAD: Duration = Duration::from_millis(50);
153
160
pub enum WorkerKind {
154
161
Prepare ,
155
162
Execute ,
163
+ CheckPivotRoot ,
156
164
}
157
165
158
166
impl fmt:: Display for WorkerKind {
159
167
fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
160
168
match self {
161
169
Self :: Prepare => write ! ( f, "prepare" ) ,
162
170
Self :: Execute => write ! ( f, "execute" ) ,
171
+ Self :: CheckPivotRoot => write ! ( f, "check pivot root" ) ,
163
172
}
164
173
}
165
174
}
@@ -178,13 +187,61 @@ pub fn worker_event_loop<F, Fut>(
178
187
Fut : futures:: Future < Output = io:: Result < Never > > ,
179
188
{
180
189
let worker_pid = std:: process:: id ( ) ;
181
- gum:: debug!( target: LOG_TARGET , %worker_pid, ?worker_dir_path, "starting pvf worker ({})" , worker_kind) ;
190
+ gum:: debug!(
191
+ target: LOG_TARGET ,
192
+ %worker_pid,
193
+ ?worker_dir_path,
194
+ ?security_status,
195
+ "starting pvf worker ({})" ,
196
+ worker_kind
197
+ ) ;
198
+
199
+ // Check for a mismatch between the node and worker versions.
200
+ if let ( Some ( node_version) , Some ( worker_version) ) = ( node_version, worker_version) {
201
+ if node_version != worker_version {
202
+ gum:: error!(
203
+ target: LOG_TARGET ,
204
+ %worker_kind,
205
+ %worker_pid,
206
+ %node_version,
207
+ %worker_version,
208
+ "Node and worker version mismatch, node needs restarting, forcing shutdown" ,
209
+ ) ;
210
+ kill_parent_node_in_emergency ( ) ;
211
+ worker_shutdown_message ( worker_kind, worker_pid, "Version mismatch" ) ;
212
+ return
213
+ }
214
+ }
215
+
216
+ // Make sure that we can read the worker dir path, and log its contents.
217
+ let entries = || -> Result < Vec < _ > , io:: Error > {
218
+ std:: fs:: read_dir ( & worker_dir_path) ?
219
+ . map ( |res| res. map ( |e| e. file_name ( ) ) )
220
+ . collect ( )
221
+ } ( ) ;
222
+ match entries {
223
+ Ok ( entries) =>
224
+ gum:: trace!( target: LOG_TARGET , %worker_pid, ?worker_dir_path, "content of worker dir: {:?}" , entries) ,
225
+ Err ( err) => {
226
+ gum:: error!(
227
+ target: LOG_TARGET ,
228
+ %worker_kind,
229
+ %worker_pid,
230
+ ?worker_dir_path,
231
+ "Could not read worker dir: {}" ,
232
+ err. to_string( )
233
+ ) ;
234
+ worker_shutdown_message ( worker_kind, worker_pid, & err. to_string ( ) ) ;
235
+ return
236
+ } ,
237
+ }
182
238
183
239
// Connect to the socket.
184
- let stream = || -> std:: io:: Result < StdUnixStream > {
185
- let socket_path = worker_dir:: socket ( & worker_dir_path) ;
186
- let stream = StdUnixStream :: connect ( & socket_path) ?;
187
- stream. set_nonblocking ( true ) ?; // See note for `from_std`.
240
+ let socket_path = worker_dir:: socket ( & worker_dir_path) ;
241
+ let stream = || -> std:: io:: Result < UnixStream > {
242
+ let stream = UnixStream :: connect ( & socket_path) ?;
243
+ // Remove the socket here. We don't also need to do this on the host-side; on failed
244
+ // rendezvous, the host will delete the whole worker dir.
188
245
std:: fs:: remove_file ( & socket_path) ?;
189
246
Ok ( stream)
190
247
} ( ) ;
@@ -203,23 +260,6 @@ pub fn worker_event_loop<F, Fut>(
203
260
} ,
204
261
} ;
205
262
206
- // Check for a mismatch between the node and worker versions.
207
- if let ( Some ( node_version) , Some ( worker_version) ) = ( node_version, worker_version) {
208
- if node_version != worker_version {
209
- gum:: error!(
210
- target: LOG_TARGET ,
211
- %worker_kind,
212
- %worker_pid,
213
- %node_version,
214
- %worker_version,
215
- "Node and worker version mismatch, node needs restarting, forcing shutdown" ,
216
- ) ;
217
- kill_parent_node_in_emergency ( ) ;
218
- worker_shutdown_message ( worker_kind, worker_pid, "Version mismatch" ) ;
219
- return
220
- }
221
- }
222
-
223
263
// Enable some security features.
224
264
{
225
265
// Call based on whether we can change root. Error out if it should work but fails.
@@ -230,9 +270,11 @@ pub fn worker_event_loop<F, Fut>(
230
270
// > CLONE_NEWUSER requires that the calling process is not threaded.
231
271
#[ cfg( target_os = "linux" ) ]
232
272
if security_status. can_unshare_user_namespace_and_change_root {
233
- if let Err ( err) =
234
- security:: unshare_user_namespace_and_change_root ( worker_kind, & worker_dir_path)
235
- {
273
+ if let Err ( err) = security:: unshare_user_namespace_and_change_root (
274
+ worker_kind,
275
+ worker_pid,
276
+ & worker_dir_path,
277
+ ) {
236
278
// The filesystem may be in an inconsistent state, bail out.
237
279
gum:: error!(
238
280
target: LOG_TARGET ,
@@ -251,7 +293,7 @@ pub fn worker_event_loop<F, Fut>(
251
293
#[ cfg( target_os = "linux" ) ]
252
294
if security_status. can_enable_landlock {
253
295
let landlock_status =
254
- security:: landlock:: enable_for_worker ( worker_kind, & worker_dir_path) ;
296
+ security:: landlock:: enable_for_worker ( worker_kind, worker_pid , & worker_dir_path) ;
255
297
if !matches ! ( landlock_status, Ok ( landlock:: RulesetStatus :: FullyEnforced ) ) {
256
298
// We previously were able to enable, so this should never happen.
257
299
//
@@ -284,10 +326,7 @@ pub fn worker_event_loop<F, Fut>(
284
326
// Run the main worker loop.
285
327
let rt = Runtime :: new ( ) . expect ( "Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it." ) ;
286
328
let err = rt
287
- . block_on ( async move {
288
- let stream = UnixStream :: from_std ( stream) ?;
289
- event_loop ( stream, worker_dir_path) . await
290
- } )
329
+ . block_on ( event_loop ( stream, worker_dir_path) )
291
330
// It's never `Ok` because it's `Ok(Never)`.
292
331
. unwrap_err ( ) ;
293
332
0 commit comments