diff --git a/packages/edge/infra/client/manager/src/actor/mod.rs b/packages/edge/infra/client/manager/src/actor/mod.rs index 80766a0102..05ee6a5719 100644 --- a/packages/edge/infra/client/manager/src/actor/mod.rs +++ b/packages/edge/infra/client/manager/src/actor/mod.rs @@ -22,7 +22,7 @@ mod setup; /// How often to check for a PID when one is not present and a stop command was received. const STOP_PID_INTERVAL: Duration = std::time::Duration::from_millis(250); /// How many times to check for a PID when a stop command was received. -const STOP_PID_RETRIES: usize = 32; +const STOP_PID_RETRIES: usize = 1024; pub struct Actor { actor_id: Uuid, @@ -377,16 +377,31 @@ impl Actor { // Signal command might be sent before the actor has a runner. This loop waits for the runner to start let runner_guard = loop { - let runner_guard = self.runner.lock().await; - if runner_guard.is_some() { - break Some(runner_guard); + { + let runner_guard = self.runner.lock().await; + if runner_guard.is_some() { + break Some(runner_guard); + } } - tracing::warn!( - actor_id=?self.actor_id, - generation=?self.generation, - "waiting for pid to signal actor", - ); + if *self.exited.lock().await { + tracing::warn!( + actor_id=?self.actor_id, + generation=?self.generation, + "actor exited before PID was set, ignoring signal", + ); + + break None; + } + + // Progress log + if i % 10 == 0 { + tracing::warn!( + actor_id=?self.actor_id, + generation=?self.generation, + "waiting for PID to signal actor", + ); + } if i > STOP_PID_RETRIES { tracing::error!( @@ -396,6 +411,7 @@ impl Actor { break None; } + i += 1; tokio::time::sleep(STOP_PID_INTERVAL).await; diff --git a/packages/edge/infra/client/manager/src/ctx.rs b/packages/edge/infra/client/manager/src/ctx.rs index 8c4bebbc6e..f8e1d4a770 100644 --- a/packages/edge/infra/client/manager/src/ctx.rs +++ b/packages/edge/infra/client/manager/src/ctx.rs @@ -412,13 +412,7 @@ impl Ctx { protocol::ToClient::PrewarmImage { image_id, image_artifact_url_stub, - } => { - let self2 = self.clone(); - - tokio::spawn(async move { - utils::prewarm_image(&self2, image_id, &image_artifact_url_stub).await - }); - } + } => utils::prewarm_image(&self, image_id, &image_artifact_url_stub), } Ok(()) diff --git a/packages/edge/infra/client/manager/src/utils/mod.rs b/packages/edge/infra/client/manager/src/utils/mod.rs index 1907360122..edbae38096 100644 --- a/packages/edge/infra/client/manager/src/utils/mod.rs +++ b/packages/edge/infra/client/manager/src/utils/mod.rs @@ -2,6 +2,7 @@ use std::{ hash::{DefaultHasher, Hasher}, path::Path, result::Result::{Err, Ok}, + sync::Arc, time::{self, Duration}, }; @@ -395,17 +396,22 @@ pub async fn fetch_image_stream( bail!("artifact url could not be resolved"); } -pub async fn prewarm_image(ctx: &Ctx, image_id: Uuid, image_artifact_url_stub: &str) { +pub fn prewarm_image(ctx: &Arc, image_id: Uuid, image_artifact_url_stub: &str) { // Log full URL for prewarm operation let prewarm_url = format!("{}/{}", image_artifact_url_stub, image_id); tracing::info!(?image_id, %prewarm_url, "prewarming image"); - match fetch_image_stream(ctx, image_id, image_artifact_url_stub, None).await { - Ok(_) => tracing::info!(?image_id, %prewarm_url, "prewarm complete"), - Err(_) => tracing::warn!( - ?image_id, - %prewarm_url, - "prewarm failed, artifact url could not be resolved" - ), - } + let ctx = ctx.clone(); + let image_artifact_url_stub = image_artifact_url_stub.to_string(); + + tokio::spawn(async move { + match fetch_image_stream(&ctx, image_id, &image_artifact_url_stub, None).await { + Ok(_) => tracing::info!(?image_id, %prewarm_url, "prewarm complete"), + Err(_) => tracing::warn!( + ?image_id, + %prewarm_url, + "prewarm failed, artifact url could not be resolved" + ), + } + }); }