Skip to content

Commit ad8ea6b

Browse files
committed
fix(pb): fix actor reschedule with wrong image (#2428)
<!-- Please make sure there is an issue that this PR is correlated to. --> Fixes RVT-4721 ## Changes <!-- If there are frontend changes, please include screenshots. -->
1 parent 631965b commit ad8ea6b

File tree

4 files changed

+42
-27
lines changed

4 files changed

+42
-27
lines changed

packages/edge/infra/client/manager/src/actor/setup.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ impl Actor {
841841
Path::new("/proc/1/ns/net").to_path_buf()
842842
} else {
843843
// CNI network that will be created
844-
Path::new("/var/run/netns").join(self.actor_id.to_string())
844+
Path::new("/var/run/netns").join(format!("{}-{}", self.actor_id, self.generation))
845845
}
846846
}
847847

packages/edge/services/pegboard/src/workflows/actor/mod.rs

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ pub async fn pegboard_actor(ctx: &mut WorkflowCtx, input: &Input) -> GlobalResul
147147

148148
let state_res = ctx
149149
.loope(
150-
runtime::State::new(client_id, client_workflow_id),
150+
runtime::State::new(client_id, client_workflow_id, input.image_id),
151151
|ctx, state| {
152152
let input = input.clone();
153153

@@ -173,8 +173,13 @@ pub async fn pegboard_actor(ctx: &mut WorkflowCtx, input: &Input) -> GlobalResul
173173
)
174174
.await?;
175175

176-
if let Some(sig) =
177-
runtime::reschedule_actor(ctx, &input, state, None).await?
176+
if let Some(sig) = runtime::reschedule_actor(
177+
ctx,
178+
&input,
179+
state,
180+
state.image_id.unwrap_or(input.image_id),
181+
)
182+
.await?
178183
{
179184
// Destroyed early
180185
return Ok(Loop::Break(runtime::StateRes {
@@ -307,9 +312,14 @@ pub async fn pegboard_actor(ctx: &mut WorkflowCtx, input: &Input) -> GlobalResul
307312
.await?;
308313
}
309314

310-
if runtime::reschedule_actor(ctx, &input, state, None)
311-
.await?
312-
.is_some()
315+
if runtime::reschedule_actor(
316+
ctx,
317+
&input,
318+
state,
319+
state.image_id.unwrap_or(input.image_id),
320+
)
321+
.await?
322+
.is_some()
313323
{
314324
// Destroyed early
315325
return Ok(Loop::Break(runtime::StateRes {
@@ -364,9 +374,19 @@ pub async fn pegboard_actor(ctx: &mut WorkflowCtx, input: &Input) -> GlobalResul
364374
)
365375
.await?;
366376

367-
if let Some(sig) =
368-
runtime::reschedule_actor(ctx, &input, state, Some(sig.image_id))
369-
.await?
377+
ctx.activity(runtime::UpdateImageInput {
378+
image_id: sig.image_id,
379+
})
380+
.await?;
381+
state.image_id = Some(sig.image_id);
382+
383+
if let Some(sig) = runtime::reschedule_actor(
384+
ctx,
385+
&input,
386+
state,
387+
state.image_id.unwrap_or(input.image_id),
388+
)
389+
.await?
370390
{
371391
// Destroyed early
372392
return Ok(Loop::Break(runtime::StateRes {

packages/edge/services/pegboard/src/workflows/actor/runtime.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,18 @@ pub struct State {
2828
pub generation: u32,
2929
pub client_id: Uuid,
3030
pub client_workflow_id: Uuid,
31+
pub image_id: Option<Uuid>,
3132
pub drain_timeout_ts: Option<i64>,
3233
pub gc_timeout_ts: Option<i64>,
3334
}
3435

3536
impl State {
36-
pub fn new(client_id: Uuid, client_workflow_id: Uuid) -> Self {
37+
pub fn new(client_id: Uuid, client_workflow_id: Uuid, image_id: Uuid) -> Self {
3738
State {
3839
generation: 0,
3940
client_id,
4041
client_workflow_id,
42+
image_id: Some(image_id),
4143
drain_timeout_ts: None,
4244
gc_timeout_ts: Some(util::timestamp::now() + ACTOR_START_THRESHOLD_MS),
4345
}
@@ -655,7 +657,7 @@ pub async fn spawn_actor(
655657
generation,
656658
config: Box::new(protocol::ActorConfig {
657659
image: protocol::Image {
658-
id: input.image_id,
660+
id: actor_setup.image_id,
659661
artifact_url_stub: actor_setup.artifact_url_stub.clone(),
660662
fallback_artifact_url: actor_setup.fallback_artifact_url.clone(),
661663
kind: match actor_setup.meta.build_kind {
@@ -749,20 +751,19 @@ pub async fn reschedule_actor(
749751
ctx: &mut WorkflowCtx,
750752
input: &Input,
751753
state: &mut State,
752-
new_image_id: Option<Uuid>,
754+
image_id: Uuid,
753755
) -> GlobalResult<Option<Destroy>> {
754756
tracing::debug!(actor_id=?input.actor_id, "rescheduling actor");
755757

756758
ctx.activity(ClearPortsAndResourcesInput {
757759
actor_id: input.actor_id,
758-
image_id: new_image_id.unwrap_or(input.image_id),
760+
image_id,
759761
client_id: state.client_id,
760762
client_workflow_id: state.client_workflow_id,
761763
})
762764
.await?;
763765

764-
let actor_setup =
765-
setup::setup(ctx, &input, setup::SetupCtx::Reschedule { new_image_id }).await?;
766+
let actor_setup = setup::setup(ctx, &input, setup::SetupCtx::Reschedule { image_id }).await?;
766767

767768
let next_generation = state.generation + 1;
768769

packages/edge/services/pegboard/src/workflows/actor/setup.rs

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use foundationdb as fdb;
66
use sqlx::Acquire;
77
use util::serde::AsHashableExt;
88

9-
use super::{runtime, Input, Port};
9+
use super::{Input, Port};
1010
use crate::{
1111
keys, protocol,
1212
types::{ActorLifecycle, ActorResources, GameGuardProtocol, NetworkMode, Routing},
@@ -519,12 +519,13 @@ pub enum SetupCtx {
519519
network_ports: util::serde::HashableMap<String, Port>,
520520
},
521521
Reschedule {
522-
new_image_id: Option<Uuid>,
522+
image_id: Uuid,
523523
},
524524
}
525525

526526
#[derive(Clone)]
527527
pub struct ActorSetupCtx {
528+
pub image_id: Uuid,
528529
pub meta: GetMetaOutput,
529530
pub resources: protocol::Resources,
530531
pub artifact_url_stub: String,
@@ -564,15 +565,7 @@ pub async fn setup(
564565

565566
input.image_id
566567
}
567-
SetupCtx::Reschedule { new_image_id } => {
568-
if let Some(image_id) = new_image_id {
569-
ctx.activity(runtime::UpdateImageInput { image_id }).await?;
570-
571-
image_id
572-
} else {
573-
input.image_id
574-
}
575-
}
568+
SetupCtx::Reschedule { image_id } => image_id,
576569
};
577570

578571
let meta = ctx
@@ -596,6 +589,7 @@ pub async fn setup(
596589
.await?;
597590

598591
Ok(ActorSetupCtx {
592+
image_id,
599593
meta,
600594
resources,
601595
artifact_url_stub: artifacts_res.artifact_url_stub,

0 commit comments

Comments
 (0)