-
Notifications
You must be signed in to change notification settings - Fork 54
Reduce test flakes #8989
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Reduce test flakes #8989
Changes from all commits
f2c6334
889ab2d
93c2bec
d251d45
b1b197b
6f129c4
b002fb2
b7ba833
f4157a5
459fcaa
b82f35d
1be06ae
5fa251a
04051f3
5c28a2c
fadd5a0
967a0d4
deb35db
0478bbd
3ae245c
43dd58f
cd16660
146023c
f5e7e7a
0cff9d8
1d2b8ba
ad9163c
a9f7450
e9e8c03
46e621d
5c43b98
f226c52
abf38b1
ff71337
9d5d02e
e45b4e5
5e83e94
35aa542
d13dfcf
67beca5
6fd635c
2151b30
044713a
3c4736c
666d5dd
fca7c7b
732cbb1
e06766a
7871810
687fe86
f4b3271
725da36
9c49f87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -203,7 +203,7 @@ pub(crate) async fn wait_for_all_replacements( | |
} | ||
}, | ||
&std::time::Duration::from_millis(50), | ||
&std::time::Duration::from_secs(180), | ||
&std::time::Duration::from_secs(260), | ||
) | ||
.await | ||
.expect("all replacements finished"); | ||
|
@@ -481,7 +481,7 @@ mod region_replacement { | |
} | ||
}, | ||
&std::time::Duration::from_millis(50), | ||
&std::time::Duration::from_secs(60), | ||
&std::time::Duration::from_secs(260), | ||
) | ||
.await | ||
.expect("request transitioned to expected state"); | ||
|
@@ -1071,24 +1071,34 @@ async fn test_racing_replacements_for_soft_deleted_disk_volume( | |
activate_background_task(&internal_client, "region_replacement_driver") | ||
.await; | ||
|
||
assert!(match last_background_task.last { | ||
let res = match last_background_task.last { | ||
LastResult::Completed(last_result_completed) => { | ||
match serde_json::from_value::<RegionReplacementDriverStatus>( | ||
last_result_completed.details, | ||
) { | ||
Err(e) => { | ||
eprintln!("Json not what we expected"); | ||
eprintln!("{e}"); | ||
false | ||
} | ||
|
||
Ok(v) => !v.drive_invoked_ok.is_empty(), | ||
Ok(v) => { | ||
if !v.drive_invoked_ok.is_empty() { | ||
eprintln!("v.drive_ok: {:?}", v.drive_invoked_ok); | ||
true | ||
} else { | ||
eprintln!("v.drive_ok: {:?} empty", v.drive_invoked_ok); | ||
false | ||
} | ||
} | ||
} | ||
} | ||
|
||
_ => { | ||
x => { | ||
eprintln!("Unexpected result here: {:?}", x); | ||
false | ||
} | ||
}); | ||
}; | ||
assert!(res); | ||
|
||
// wait for the drive saga to complete here | ||
wait_for_condition( | ||
|
@@ -1485,20 +1495,33 @@ mod region_snapshot_replacement { | |
|
||
// Assert no volumes are referencing the snapshot address | ||
|
||
let volumes = self | ||
.datastore | ||
.find_volumes_referencing_socket_addr( | ||
&self.opctx(), | ||
self.snapshot_socket_addr, | ||
) | ||
.await | ||
.unwrap(); | ||
let mut counter = 1; | ||
loop { | ||
let volumes = self | ||
.datastore | ||
.find_volumes_referencing_socket_addr( | ||
&self.opctx(), | ||
self.snapshot_socket_addr, | ||
) | ||
.await | ||
.unwrap(); | ||
|
||
if !volumes.is_empty() { | ||
eprintln!("{:?}", volumes); | ||
if !volumes.is_empty() { | ||
eprintln!( | ||
"Volume should be gone, try {counter} {:?}", | ||
volumes | ||
); | ||
tokio::time::sleep(std::time::Duration::from_secs(5)).await; | ||
counter += 1; | ||
if counter > 200 { | ||
panic!( | ||
"Tried 200 times, and still this did not finish" | ||
); | ||
} | ||
} else { | ||
break; | ||
} | ||
} | ||
|
||
assert!(volumes.is_empty()); | ||
} | ||
|
||
/// Assert no Crucible resources are leaked | ||
|
@@ -1649,32 +1672,59 @@ mod region_snapshot_replacement { | |
match result { | ||
InsertStepResult::Inserted { .. } => {} | ||
|
||
_ => { | ||
assert!( | ||
false, | ||
"bad result from create_region_snapshot_replacement_step" | ||
InsertStepResult::AlreadyHandled { existing_step_id } => { | ||
let region_snapshot_replace_request = self | ||
.datastore | ||
.get_region_snapshot_replacement_request_by_id( | ||
&self.opctx(), | ||
existing_step_id, | ||
) | ||
.await; | ||
eprintln!( | ||
"we were suppose to create this: {:?} but found it AlreadyHandled, then got {:?}", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like this error message better, but: did you see this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did see it in the beginning, yes. |
||
self.replacement_request_id, | ||
region_snapshot_replace_request | ||
); | ||
panic!("Something else created our replacement"); | ||
} | ||
} | ||
} | ||
|
||
pub async fn assert_read_only_target_gone(&self) { | ||
let region_snapshot_replace_request = self | ||
.datastore | ||
.get_region_snapshot_replacement_request_by_id( | ||
&self.opctx(), | ||
self.replacement_request_id, | ||
) | ||
.await | ||
.unwrap(); | ||
eprintln!( | ||
"Starting, replace_request_id: {:?}", | ||
self.replacement_request_id | ||
); | ||
let mut i = 1; | ||
loop { | ||
let region_snapshot_replace_request = self | ||
.datastore | ||
.get_region_snapshot_replacement_request_by_id( | ||
&self.opctx(), | ||
self.replacement_request_id, | ||
) | ||
.await | ||
.unwrap(); | ||
eprintln!( | ||
"In loop {i} with rs_replace_request: {:?}", | ||
region_snapshot_replace_request | ||
); | ||
|
||
assert!( | ||
self.datastore | ||
let res = self | ||
.datastore | ||
.read_only_target_addr(®ion_snapshot_replace_request) | ||
.await | ||
.unwrap() | ||
.is_none() | ||
); | ||
.unwrap(); | ||
|
||
eprintln!("In loop {i} target that should be gone: {:?}", res); | ||
if res.is_none() { | ||
// test pass, move on | ||
break; | ||
} | ||
eprintln!("loop {i}, snapshot that should be gone: {:?}", res); | ||
tokio::time::sleep(std::time::Duration::from_secs(5)).await; | ||
i += 1; | ||
} | ||
} | ||
|
||
pub async fn remove_disk_from_snapshot_rop(&self) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What state was this in when you saw this? This seems like a failure to properly unwind the saga as it should never have reached this check without going through
rsrss_set_saga_id_undo
. Likewise, theassert_eq
above checkingoperating_saga_id
should have fired if the request was in an intermediate state.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It does eventually get to the requested state.
When I saw the panic, it would be in Allocating state. At least that's what I had written down in an earlier comment.