Skip to content

Commit 13ae1ad

Browse files
authored
nvme_test: cc.enable() delay capability during servicing for the NvmeFaultController (#1922)
This PR will add a fault functionality while changing the cc enable bit of the nvme fault controller
1 parent 7b4b6ba commit 13ae1ad

File tree

6 files changed

+65
-2
lines changed

6 files changed

+65
-2
lines changed

vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use mesh::CellUpdater;
1212
use nvme::NvmeControllerCaps;
1313
use nvme_resources::fault::AdminQueueFaultConfig;
1414
use nvme_resources::fault::FaultConfiguration;
15+
use nvme_resources::fault::PciFaultConfig;
1516
use nvme_resources::fault::QueueFaultBehavior;
1617
use nvme_spec::AdminOpcode;
1718
use nvme_spec::Cap;
@@ -50,6 +51,7 @@ async fn test_nvme_command_fault(driver: DefaultDriver) {
5051
AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0,
5152
QueueFaultBehavior::Update(output_cmd),
5253
),
54+
pci_fault: PciFaultConfig::new(),
5355
},
5456
)
5557
.await;

vm/devices/storage/nvme_resources/src/fault.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,22 @@ pub enum QueueFaultBehavior<T> {
2323
Panic(String),
2424
}
2525

26+
#[derive(Clone, MeshPayload)]
27+
/// Supported fault behaviour for PCI faults
28+
pub enum PciFaultBehavior {
29+
/// Introduce a delay to the PCI operation
30+
Delay(Duration),
31+
/// Do nothing
32+
Default,
33+
}
34+
35+
#[derive(MeshPayload, Clone)]
36+
/// A buildable fault configuration for the controller management interface (cc.en(), csts.rdy(), ... )
37+
pub struct PciFaultConfig {
38+
/// Fault to apply to cc.en() bit during enablement
39+
pub controller_management_fault_enable: PciFaultBehavior,
40+
}
41+
2642
#[derive(MeshPayload, Clone)]
2743
/// A buildable fault configuration
2844
pub struct AdminQueueFaultConfig {
@@ -37,6 +53,23 @@ pub struct FaultConfiguration {
3753
pub fault_active: Cell<bool>,
3854
/// Fault to apply to the admin queues
3955
pub admin_fault: AdminQueueFaultConfig,
56+
/// Fault to apply to management layer of the controller
57+
pub pci_fault: PciFaultConfig,
58+
}
59+
60+
impl PciFaultConfig {
61+
/// Create a new no-op fault configuration
62+
pub fn new() -> Self {
63+
Self {
64+
controller_management_fault_enable: PciFaultBehavior::Default,
65+
}
66+
}
67+
68+
/// Add a cc.en() fault
69+
pub fn with_cc_enable_fault(mut self, behaviour: PciFaultBehavior) -> Self {
70+
self.controller_management_fault_enable = behaviour;
71+
self
72+
}
4073
}
4174

4275
impl AdminQueueFaultConfig {

vm/devices/storage/nvme_test/src/pci.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use guid::Guid;
3131
use inspect::Inspect;
3232
use inspect::InspectMut;
3333
use nvme_resources::fault::FaultConfiguration;
34+
use nvme_resources::fault::PciFaultBehavior;
3435
use parking_lot::Mutex;
3536
use pci_core::capabilities::msix::MsixEmulator;
3637
use pci_core::cfg_space_emu::BarMemoryKind;
@@ -59,6 +60,8 @@ pub struct NvmeFaultController {
5960
qe_sizes: Arc<Mutex<IoQueueEntrySizes>>,
6061
#[inspect(flatten, mut)]
6162
workers: NvmeWorkers,
63+
#[inspect(skip)]
64+
fault_configuration: FaultConfiguration,
6265
}
6366

6467
#[derive(Inspect)]
@@ -155,7 +158,7 @@ impl NvmeFaultController {
155158
max_cqs: caps.max_io_queues,
156159
qe_sizes: Arc::clone(&qe_sizes),
157160
subsystem_id: caps.subsystem_id,
158-
fault_configuration,
161+
fault_configuration: fault_configuration.clone(),
159162
});
160163

161164
Self {
@@ -164,6 +167,7 @@ impl NvmeFaultController {
164167
registers: RegState::new(),
165168
workers: admin,
166169
qe_sizes,
170+
fault_configuration,
167171
}
168172
}
169173

@@ -341,6 +345,18 @@ impl NvmeFaultController {
341345

342346
if cc.en() != self.registers.cc.en() {
343347
if cc.en() {
348+
// If any fault was configured for cc.en() process it here
349+
match self
350+
.fault_configuration
351+
.pci_fault
352+
.controller_management_fault_enable
353+
{
354+
PciFaultBehavior::Delay(duration) => {
355+
std::thread::sleep(duration);
356+
}
357+
PciFaultBehavior::Default => {}
358+
}
359+
344360
// Some drivers will write zeros to IOSQES and IOCQES, assuming that the defaults will work.
345361
if cc.iocqes() == 0 {
346362
cc.set_iocqes(IOCQES);
@@ -430,6 +446,7 @@ impl ChangeDeviceState for NvmeFaultController {
430446
registers,
431447
qe_sizes,
432448
workers,
449+
fault_configuration: _,
433450
} = self;
434451
workers.reset().await;
435452
cfg_space.reset();

vm/devices/storage/nvme_test/src/tests/controller_tests.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use guid::Guid;
1818
use mesh::CellUpdater;
1919
use nvme_resources::fault::AdminQueueFaultConfig;
2020
use nvme_resources::fault::FaultConfiguration;
21+
use nvme_resources::fault::PciFaultConfig;
2122
use nvme_resources::fault::QueueFaultBehavior;
2223
use nvme_spec::Command;
2324
use nvme_spec::Completion;
@@ -208,6 +209,7 @@ async fn test_basic_registers(driver: DefaultDriver) {
208209
let fault_configuration = FaultConfiguration {
209210
fault_active: CellUpdater::new(false).cell(),
210211
admin_fault: AdminQueueFaultConfig::new(),
212+
pci_fault: PciFaultConfig::new(),
211213
};
212214
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);
213215
let mut dword = 0u32;
@@ -236,6 +238,7 @@ async fn test_invalid_configuration(driver: DefaultDriver) {
236238
let fault_configuration = FaultConfiguration {
237239
fault_active: CellUpdater::new(false).cell(),
238240
admin_fault: AdminQueueFaultConfig::new(),
241+
pci_fault: PciFaultConfig::new(),
239242
};
240243
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);
241244
let mut dword = 0u32;
@@ -254,6 +257,7 @@ async fn test_enable_controller(driver: DefaultDriver) {
254257
let fault_configuration = FaultConfiguration {
255258
fault_active: CellUpdater::new(false).cell(),
256259
admin_fault: AdminQueueFaultConfig::new(),
260+
pci_fault: PciFaultConfig::new(),
257261
};
258262
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);
259263

@@ -285,6 +289,7 @@ async fn test_multi_page_admin_queues(driver: DefaultDriver) {
285289
let fault_configuration = FaultConfiguration {
286290
fault_active: CellUpdater::new(false).cell(),
287291
admin_fault: AdminQueueFaultConfig::new(),
292+
pci_fault: PciFaultConfig::new(),
288293
};
289294
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);
290295

@@ -359,6 +364,7 @@ async fn test_send_identify_no_fault(driver: DefaultDriver) {
359364
let fault_configuration = FaultConfiguration {
360365
fault_active: CellUpdater::new(false).cell(),
361366
admin_fault: AdminQueueFaultConfig::new(),
367+
pci_fault: PciFaultConfig::new(),
362368
};
363369
let cqe = send_identify(driver, fault_configuration).await;
364370

@@ -376,6 +382,7 @@ async fn test_send_identify_with_sq_fault(driver: DefaultDriver) {
376382
nvme_spec::AdminOpcode::IDENTIFY.0,
377383
QueueFaultBehavior::Update(faulty_identify),
378384
),
385+
pci_fault: PciFaultConfig::new(),
379386
};
380387
let cqe = send_identify(driver, fault_configuration).await;
381388

vm/devices/storage/nvme_test/src/tests/shadow_doorbell_tests.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use guestmem::GuestMemory;
1515
use mesh::CellUpdater;
1616
use nvme_resources::fault::AdminQueueFaultConfig;
1717
use nvme_resources::fault::FaultConfiguration;
18+
use nvme_resources::fault::PciFaultConfig;
1819
use pal_async::DefaultDriver;
1920
use pal_async::async_test;
2021
use pci_core::test_helpers::TestPciInterruptController;
@@ -42,6 +43,7 @@ async fn setup_shadow_doorbells(
4243
let fault_configuration = FaultConfiguration {
4344
fault_active: CellUpdater::new(false).cell(),
4445
admin_fault: AdminQueueFaultConfig::new(),
46+
pci_fault: PciFaultConfig::new(),
4547
}; // Build a controller with 64 entries in the admin queue (just so that the ASQ fits in one page).
4648
let mut nvmec = instantiate_and_build_admin_queue(
4749
cq_buf,

vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use nvme_resources::NamespaceDefinition;
1616
use nvme_resources::NvmeFaultControllerHandle;
1717
use nvme_resources::fault::AdminQueueFaultConfig;
1818
use nvme_resources::fault::FaultConfiguration;
19+
use nvme_resources::fault::PciFaultConfig;
1920
use nvme_resources::fault::QueueFaultBehavior;
2021
use petri::OpenHclServicingFlags;
2122
use petri::PetriVmBuilder;
@@ -255,8 +256,9 @@ async fn keepalive_with_nvme_fault(
255256
fault_active: fault_start_updater.cell(),
256257
admin_fault: AdminQueueFaultConfig::new().with_submission_queue_fault(
257258
nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0,
258-
QueueFaultBehavior::Panic("Received a CREATE_IO_COMPLETION_QUEUE command during servicing with keepalive enabled. This should never happen.".to_string()),
259+
QueueFaultBehavior::Panic("Received a CREATE_IO_COMPLETION_QUEUE command during servicing with keepalive enabled. THERE IS A BUG SOMEWHERE.".to_string()),
259260
),
261+
pci_fault: PciFaultConfig::new(),
260262
};
261263

262264
let (mut vm, agent) = config

0 commit comments

Comments
 (0)