@@ -51,20 +51,89 @@ pub fn start_master_listener_unix(master_pid: i32) -> io::Result<()> {
5151 let handle = thread:: Builder :: new ( )
5252 . name ( "dd-sidecar" . into ( ) )
5353 . spawn ( move || {
54- let acquire_listener = move || -> io:: Result < _ > {
55- std_listener. set_nonblocking ( true ) ?;
56- let listener = UnixListener :: from_std ( std_listener. try_clone ( ) ?) ?;
57- let cancel = {
58- let fd = listener. as_raw_fd ( ) ;
59- move || stop_listening ( fd)
60- } ;
61- Ok ( ( move |handler| accept_socket_loop ( listener, handler) , cancel) )
54+ // Use blocking I/O - no shared tokio Runtime needed
55+ // This makes the code fork-safe
56+ use crate :: service:: sidecar_server:: SidecarServer ;
57+ let runtime = match tokio:: runtime:: Builder :: new_current_thread ( )
58+ . enable_all ( )
59+ . build ( )
60+ {
61+ Ok ( rt) => rt,
62+ Err ( e) => {
63+ error ! ( "Failed to create runtime for server initialization: {}" , e) ;
64+ return ;
65+ }
6266 } ;
6367
64- let _ = enter_listener_loop ( acquire_listener) . map_err ( |e| {
65- error ! ( "enter_listener_loop failed: {}" , e) ;
66- e
67- } ) ;
68+ let server = runtime. block_on ( async { SidecarServer :: default ( ) } ) ;
69+
70+ loop {
71+ match std_listener. accept ( ) {
72+ Ok ( ( stream, _addr) ) => {
73+ let server = server. clone ( ) ;
74+ // Spawn a detached thread for each connection
75+ // Threads are not joined during shutdown to avoid blocking on active connections
76+ // Each thread will exit naturally when its connection closes
77+ if let Err ( e) = thread:: Builder :: new ( ) . name ( "dd-conn-handler" . into ( ) ) . spawn (
78+ move || {
79+ // Create a minimal single-threaded runtime for this connection only
80+ // This runtime will be dropped when the connection closes
81+ let runtime = match tokio:: runtime:: Builder :: new_current_thread ( )
82+ . enable_all ( )
83+ . build ( )
84+ {
85+ Ok ( rt) => rt,
86+ Err ( e) => {
87+ error ! ( "Failed to create runtime for connection: {}" , e) ;
88+ return ;
89+ }
90+ } ;
91+
92+ runtime. block_on ( async move {
93+ // Convert std UnixStream to tokio UnixStream
94+ if let Err ( e) = stream. set_nonblocking ( true ) {
95+ error ! ( "Failed to set nonblocking: {}" , e) ;
96+ return ;
97+ }
98+
99+ let tokio_stream = match UnixStream :: from_std ( stream) {
100+ Ok ( s) => s,
101+ Err ( e) => {
102+ error ! ( "Failed to convert stream: {}" , e) ;
103+ return ;
104+ }
105+ } ;
106+
107+ // Handle the connection using existing async infrastructure
108+ use datadog_ipc:: platform:: AsyncChannel ;
109+
110+ // Use the cloned shared server
111+ server
112+ . accept_connection ( AsyncChannel :: from ( tokio_stream) )
113+ . await ;
114+ } ) ;
115+ } ,
116+ ) {
117+ error ! ( "Failed to spawn handler thread: {}" , e) ;
118+ }
119+ }
120+ Err ( e) => {
121+ match e. kind ( ) {
122+ io:: ErrorKind :: Interrupted => continue ,
123+ io:: ErrorKind :: InvalidInput => break , // Socket shut down
124+ _ => {
125+ error ! ( "Accept error: {}" , e) ;
126+ thread:: sleep ( Duration :: from_millis ( 100 ) ) ;
127+ }
128+ }
129+ }
130+ }
131+ }
132+
133+ info ! ( "Master listener stopped accepting connections" ) ;
134+
135+ // Shutdown the server - connection threads will finish naturally
136+ server. shutdown ( ) ;
68137 } )
69138 . map_err ( io:: Error :: other) ?;
70139
@@ -95,6 +164,7 @@ pub fn connect_worker_unix(master_pid: i32) -> io::Result<SidecarTransport> {
95164 }
96165 }
97166
167+ error ! ( "Worker failed to connect after 10 attempts" ) ;
98168 Err ( last_error. unwrap_or_else ( || io:: Error :: other ( "Connection failed" ) ) )
99169}
100170
@@ -112,28 +182,35 @@ pub fn shutdown_master_listener_unix() -> io::Result<()> {
112182
113183 if let Some ( ( handle, fd) ) = listener_data {
114184 stop_listening ( fd) ;
185+ let _ = handle. join ( ) ;
186+ }
115187
116- // Try to join with a timeout to avoid hanging the shutdown
117- // We spawn a helper thread to do the join so we can implement a timeout
118- let ( tx, rx) = std:: sync:: mpsc:: channel ( ) ;
119- std:: thread:: spawn ( move || {
120- let result = handle. join ( ) ;
121- let _ = tx. send ( result) ;
122- } ) ;
123-
124- // Wait up to 2 seconds for clean shutdown (including time for tokio runtime shutdown)
125- match rx. recv_timeout ( Duration :: from_millis ( 2000 ) ) {
126- Ok ( Ok ( ( ) ) ) => {
127- // Clean shutdown
128- }
129- Ok ( Err ( _) ) => {
130- error ! ( "Listener thread panicked during shutdown" ) ;
131- }
132- Err ( _) => {
133- // Timeout - thread didn't exit in time
134- // This is acceptable as the OS will clean up when the process exits
188+ Ok ( ( ) )
189+ }
190+
191+ /// Clears inherited resources in child processes after fork().
192+ /// With the new blocking I/O approach, we only need to forget the listener thread handle.
193+ /// Each connection creates its own short-lived runtime, so there's no global runtime to inherit.
194+ pub fn clear_inherited_listener_unix ( ) -> io:: Result < ( ) > {
195+ info ! ( "Child process clearing inherited listener state" ) ;
196+ match MASTER_LISTENER . lock ( ) {
197+ Ok ( mut guard) => {
198+ if let Some ( ( handle, _fd) ) = guard. take ( ) {
199+ info ! ( "Child forgetting inherited listener thread handle" ) ;
200+ // Forget the handle without joining - parent owns the thread
201+ std:: mem:: forget ( handle) ;
202+ info ! ( "Child successfully forgot listener handle" ) ;
203+ } else {
204+ info ! ( "Child found no listener to clear" ) ;
135205 }
136206 }
207+ Err ( e) => {
208+ error ! (
209+ "Failed to acquire lock for clearing inherited listener: {}" ,
210+ e
211+ ) ;
212+ return Err ( io:: Error :: other ( "Mutex poisoned" ) ) ;
213+ }
137214 }
138215
139216 Ok ( ( ) )
0 commit comments