Skip to content

Commit b8bd54c

Browse files
davepachecocharliepark
authored andcommitted
add live test for Nexus handoff (#9024)
1 parent 615a8e9 commit b8bd54c

File tree

4 files changed

+653
-87
lines changed

4 files changed

+653
-87
lines changed

live-tests/tests/common/mod.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ impl LiveTestContext {
4141
let datastore = create_datastore(&log, &resolver).await?;
4242
let opctx = OpContext::for_tests(log.clone(), datastore.clone());
4343
check_hardware_environment(&opctx, &datastore).await?;
44+
check_configuration(&opctx, &datastore).await?;
4445
Ok(LiveTestContext { logctx, opctx, resolver, datastore })
4546
}
4647

@@ -255,3 +256,25 @@ async fn check_hardware_environment(
255256
))
256257
}
257258
}
259+
260+
/// Performs checks on the system configuration to determine if it's appropriate
261+
/// for live tests
262+
///
263+
/// Currently, this just verifies that the planner is off.
264+
async fn check_configuration(
265+
opctx: &OpContext,
266+
datastore: &DataStore,
267+
) -> Result<(), anyhow::Error> {
268+
let reconfigurator_config = datastore
269+
.reconfigurator_config_get_latest(opctx)
270+
.await
271+
.expect("obtained latest reconfigurator config")
272+
.unwrap_or_default();
273+
if reconfigurator_config.config.planner_enabled {
274+
Err(anyhow!(
275+
"refusing to operate on a system with blueprint planning enabled"
276+
))
277+
} else {
278+
Ok(())
279+
}
280+
}

live-tests/tests/common/reconfigurator.rs

Lines changed: 147 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,55 @@
44

55
//! Helpers common to Reconfigurator tests
66
7-
use anyhow::{Context, ensure};
8-
use nexus_client::types::BlueprintTargetSet;
7+
use anyhow::{Context, anyhow, bail, ensure};
8+
use nexus_client::types::{BackgroundTasksActivateRequest, BlueprintTargetSet};
9+
use nexus_db_queries::context::OpContext;
10+
use nexus_db_queries::db::DataStore;
911
use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder;
1012
use nexus_reconfigurator_planning::planner::PlannerRng;
1113
use nexus_types::deployment::{Blueprint, PlanningInput};
14+
use nexus_types::external_api::views::SledState;
1215
use nexus_types::inventory::Collection;
16+
use omicron_test_utils::dev::poll::{CondCheckError, wait_for_condition};
1317
use omicron_uuid_kinds::GenericUuid;
1418
use slog::{debug, info};
19+
use slog_error_chain::InlineErrorChain;
20+
use std::time::Duration;
21+
22+
/// Return the current target blueprint
23+
///
24+
/// Also validates that it's enabled. If an operator has disabled execution, we
25+
/// don't want to proceed with tests.
26+
pub async fn blueprint_load_target_enabled(
27+
log: &slog::Logger,
28+
nexus: &nexus_client::Client,
29+
) -> Result<Blueprint, anyhow::Error> {
30+
// Fetch the current target configuration.
31+
info!(log, "editing current target blueprint");
32+
let target_blueprint = nexus
33+
.blueprint_target_view()
34+
.await
35+
.context("fetch current target config")?
36+
.into_inner();
37+
38+
debug!(log, "found current target blueprint";
39+
"blueprint_id" => %target_blueprint.target_id
40+
);
41+
ensure!(
42+
target_blueprint.enabled,
43+
"refusing to operate on a system with target blueprint disabled"
44+
);
45+
46+
let blueprint = nexus
47+
.blueprint_view(target_blueprint.target_id.as_untyped_uuid())
48+
.await
49+
.context("fetch current target blueprint")?
50+
.into_inner();
51+
debug!(log, "fetched current target blueprint";
52+
"blueprint_id" => %target_blueprint.target_id
53+
);
54+
Ok(blueprint)
55+
}
1556

1657
/// Modify the system by editing the current target blueprint
1758
///
@@ -44,28 +85,7 @@ pub async fn blueprint_edit_current_target(
4485
) -> Result<(Blueprint, Blueprint), anyhow::Error> {
4586
// Fetch the current target configuration.
4687
info!(log, "editing current target blueprint");
47-
let target_blueprint = nexus
48-
.blueprint_target_view()
49-
.await
50-
.context("fetch current target config")?
51-
.into_inner();
52-
debug!(log, "found current target blueprint";
53-
"blueprint_id" => %target_blueprint.target_id
54-
);
55-
ensure!(
56-
target_blueprint.enabled,
57-
"refusing to modify a system with target blueprint disabled"
58-
);
59-
60-
// Fetch the actual blueprint.
61-
let blueprint1 = nexus
62-
.blueprint_view(target_blueprint.target_id.as_untyped_uuid())
63-
.await
64-
.context("fetch current target blueprint")?
65-
.into_inner();
66-
debug!(log, "fetched current target blueprint";
67-
"blueprint_id" => %target_blueprint.target_id
68-
);
88+
let blueprint1 = blueprint_load_target_enabled(log, nexus).await?;
6989

7090
// Make a new builder based on that blueprint and use `edit_fn` to edit it.
7191
let mut builder = BlueprintBuilder::new_based_on(
@@ -83,7 +103,7 @@ pub async fn blueprint_edit_current_target(
83103
// Assemble the new blueprint, import it, and make it the new target.
84104
let blueprint2 = builder.build();
85105
info!(log, "assembled new blueprint based on target";
86-
"current_target_id" => %target_blueprint.target_id,
106+
"current_target_id" => %blueprint1.id,
87107
"new_blueprint_id" => %blueprint2.id,
88108
);
89109
nexus
@@ -107,3 +127,105 @@ pub async fn blueprint_edit_current_target(
107127

108128
Ok((blueprint1, blueprint2))
109129
}
130+
131+
/// Checks whether the given blueprint's sled configurations appear to be
132+
/// propagated to all sleds.
133+
///
134+
/// If so, returns the inventory collection so that the caller can check
135+
/// additional details if wanted. If not or if we failed to determine the
136+
/// answer, returns an error.
137+
pub async fn blueprint_sled_configs_propagated(
138+
opctx: &OpContext,
139+
datastore: &DataStore,
140+
blueprint: &Blueprint,
141+
) -> Result<Collection, anyhow::Error> {
142+
let log = &opctx.log;
143+
let latest_collection = datastore
144+
.inventory_get_latest_collection(opctx)
145+
.await
146+
.context("fetching latest collection")?
147+
.ok_or_else(|| anyhow!("have no inventory collections"))?;
148+
debug!(log, "got inventory"; "id" => %latest_collection.id);
149+
for (sled_id, sled_config) in &blueprint.sleds {
150+
if sled_config.state != SledState::Active {
151+
continue;
152+
}
153+
154+
let agent = latest_collection
155+
.sled_agents
156+
.get(sled_id)
157+
.ok_or_else(|| anyhow!("sled {sled_id}: missing inventory"))?;
158+
let reconciled_config = &agent
159+
.last_reconciliation
160+
.as_ref()
161+
.ok_or_else(|| {
162+
anyhow!("sled {sled_id}: missing last_reconciliation")
163+
})?
164+
.last_reconciled_config;
165+
if reconciled_config.generation < sled_config.sled_agent_generation {
166+
bail!(
167+
"sled {sled_id}: last reconciled generation {}, waiting for {}",
168+
reconciled_config.generation,
169+
sled_config.sled_agent_generation
170+
);
171+
}
172+
}
173+
174+
Ok(latest_collection)
175+
}
176+
177+
/// Waits for the given blueprint's sled configurations to appear to be
178+
/// propagated to all sleds.
179+
///
180+
/// Returns the inventory collection so that the caller can check additional
181+
/// details if wanted.
182+
pub async fn blueprint_wait_sled_configs_propagated(
183+
opctx: &OpContext,
184+
datastore: &DataStore,
185+
blueprint: &Blueprint,
186+
nexus: &nexus_client::Client,
187+
timeout: Duration,
188+
) -> Result<Collection, anyhow::Error> {
189+
wait_for_condition(
190+
|| async {
191+
match blueprint_sled_configs_propagated(opctx, datastore, blueprint)
192+
.await
193+
{
194+
Ok(collection) => Ok(collection),
195+
Err(error) => {
196+
debug!(
197+
opctx.log,
198+
"blueprint_wait_sled_configs_propagated";
199+
InlineErrorChain::new(&*error)
200+
);
201+
202+
// Activate the inventory collector.
203+
info!(opctx.log, "activating inventory collector");
204+
nexus
205+
.bgtask_activate(&BackgroundTasksActivateRequest {
206+
bgtask_names: vec![String::from(
207+
"inventory_collection",
208+
)],
209+
})
210+
.await
211+
.expect("activating inventory background task");
212+
213+
// We don't use the variant of `CondCheckError` that carries
214+
// a permanent error, so we need to put a type here. We
215+
// want the resulting error to impl `ToString`, so we need a
216+
// type that impls that. We pick `String`.
217+
Err(CondCheckError::<String>::NotYet)
218+
}
219+
}
220+
},
221+
&Duration::from_millis(3000),
222+
&timeout,
223+
)
224+
.await
225+
.map_err(|error| {
226+
anyhow!(
227+
"waiting for blueprint {}'s sled configs to be propagated: {error}",
228+
blueprint.id
229+
)
230+
})
231+
}

0 commit comments

Comments
 (0)