From 3d713a3799198f463c1ed91be502c42739fd803d Mon Sep 17 00:00:00 2001 From: Guramrit Singh Date: Tue, 26 Aug 2025 20:36:09 -0700 Subject: [PATCH 1/4] Pausing work to create a PR --- .../disk_nvme/nvme_driver/src/tests.rs | 4 +- .../storage/nvme_resources/src/fault.rs | 38 ++++++++++++++++++- vm/devices/storage/nvme_test/src/pci.rs | 7 ++++ .../nvme_test/src/tests/controller_tests.rs | 11 +++++- .../src/tests/shadow_doorbell_tests.rs | 2 + .../storage/nvme_test/src/workers/admin.rs | 8 ++-- .../tests/multiarch/openhcl_servicing.rs | 2 +- 7 files changed, 61 insertions(+), 11 deletions(-) diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs index 64aaf243ea..4fc98b0a46 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs @@ -12,7 +12,7 @@ use mesh::CellUpdater; use nvme::NvmeControllerCaps; use nvme_resources::fault::AdminQueueFaultConfig; use nvme_resources::fault::FaultConfiguration; -use nvme_resources::fault::QueueFaultBehavior; +use nvme_resources::fault::FaultBehaviour; use nvme_spec::AdminOpcode; use nvme_spec::Cap; use nvme_spec::Command; @@ -48,7 +48,7 @@ async fn test_nvme_command_fault(driver: DefaultDriver) { fault_active: CellUpdater::new(true).cell(), admin_fault: AdminQueueFaultConfig::new().with_submission_queue_fault( AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0, - QueueFaultBehavior::Update(output_cmd), + FaultBehaviour::Update(output_cmd), ), }, ) diff --git a/vm/devices/storage/nvme_resources/src/fault.rs b/vm/devices/storage/nvme_resources/src/fault.rs index 9a93894efd..d44abba9df 100644 --- a/vm/devices/storage/nvme_resources/src/fault.rs +++ b/vm/devices/storage/nvme_resources/src/fault.rs @@ -23,11 +23,25 @@ pub enum QueueFaultBehavior { Panic(String), } +#[derive(MeshPayload)] +/// A buildable fault configuration for the controller management interface (cc.en(), csts.rdy(), ... ) +pub struct ControllerManagementFaultConfig { + /// Fault to apply to cc.en() bit during enablement + pub controller_management_fault_enable: FaultBehaviour, +} + +#[derive(MeshPayload)] +/// A buildable fault configuration for the controller management interface (cc.en(), csts.rdy(), ... ) +pub struct ControllerManagementFaultConfig { + /// Fault to apply to cc.en() bit during enablement + pub controller_management_fault_enable: FaultBehaviour, +} + #[derive(MeshPayload, Clone)] /// A buildable fault configuration pub struct AdminQueueFaultConfig { /// A map of NVME opcodes to the fault behavior for each. (This would ideally be a `HashMap`, but `mesh` doesn't support that type. Given that this is not performance sensitive, the lookup is okay) - pub admin_submission_queue_faults: Vec<(u8, QueueFaultBehavior)>, + pub admin_submission_queue_faults: Vec<(u8, FaultBehaviour)>, } #[derive(MeshPayload, Clone)] @@ -37,6 +51,26 @@ pub struct FaultConfiguration { pub fault_active: Cell, /// Fault to apply to the admin queues pub admin_fault: AdminQueueFaultConfig, + /// Fault to apply to management layer of the controller + pub controller_management_fault: ControllerManagementFaultConfig, +} + +impl ControllerManagementFaultConfig { + /// Create a new no-op fault configuration + pub fn new() -> Self { + Self { + controller_management_fault_enable: FaultBehaviour::Default, + } + } + + /// Create a new fault configuration + pub fn with_controller_management_enable_fault( + mut self, + behaviour: FaultBehaviour, + ) -> Self { + self.controller_management_fault_enable = behaviour; + self + } } impl AdminQueueFaultConfig { @@ -51,7 +85,7 @@ impl AdminQueueFaultConfig { pub fn with_submission_queue_fault( mut self, opcode: u8, - behaviour: QueueFaultBehavior, + behaviour: FaultBehaviour, ) -> Self { if self .admin_submission_queue_faults diff --git a/vm/devices/storage/nvme_test/src/pci.rs b/vm/devices/storage/nvme_test/src/pci.rs index 4837b74028..ab68490242 100644 --- a/vm/devices/storage/nvme_test/src/pci.rs +++ b/vm/devices/storage/nvme_test/src/pci.rs @@ -59,6 +59,8 @@ pub struct NvmeFaultController { qe_sizes: Arc>, #[inspect(flatten, mut)] workers: NvmeWorkers, + #[inspect(skip)] + fault_configuration: FaultConfiguration, } #[derive(Inspect)] @@ -341,6 +343,9 @@ impl NvmeFaultController { if cc.en() != self.registers.cc.en() { if cc.en() { + // If any, perform the fault at the beginning of the enable sequence. + if + // Some drivers will write zeros to IOSQES and IOCQES, assuming that the defaults will work. if cc.iocqes() == 0 { cc.set_iocqes(IOCQES); @@ -384,6 +389,8 @@ impl NvmeFaultController { } } + + self.registers.cc = cc; *self.qe_sizes.lock() = IoQueueEntrySizes { sqe_bits: cc.iosqes(), diff --git a/vm/devices/storage/nvme_test/src/tests/controller_tests.rs b/vm/devices/storage/nvme_test/src/tests/controller_tests.rs index 830614f6aa..60121f0fd3 100644 --- a/vm/devices/storage/nvme_test/src/tests/controller_tests.rs +++ b/vm/devices/storage/nvme_test/src/tests/controller_tests.rs @@ -17,8 +17,9 @@ use guestmem::GuestMemory; use guid::Guid; use mesh::CellUpdater; use nvme_resources::fault::AdminQueueFaultConfig; +use nvme_resources::fault::ControllerManagementFaultConfig; +use nvme_resources::fault::FaultBehaviour; use nvme_resources::fault::FaultConfiguration; -use nvme_resources::fault::QueueFaultBehavior; use nvme_spec::Command; use nvme_spec::Completion; use pal_async::DefaultDriver; @@ -208,6 +209,7 @@ async fn test_basic_registers(driver: DefaultDriver) { let fault_configuration = FaultConfiguration { fault_active: CellUpdater::new(false).cell(), admin_fault: AdminQueueFaultConfig::new(), + controller_management_fault: ControllerManagementFaultConfig::new(), }; let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration); let mut dword = 0u32; @@ -236,6 +238,7 @@ async fn test_invalid_configuration(driver: DefaultDriver) { let fault_configuration = FaultConfiguration { fault_active: CellUpdater::new(false).cell(), admin_fault: AdminQueueFaultConfig::new(), + controller_management_fault: ControllerManagementFaultConfig::new(), }; let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration); let mut dword = 0u32; @@ -254,6 +257,7 @@ async fn test_enable_controller(driver: DefaultDriver) { let fault_configuration = FaultConfiguration { fault_active: CellUpdater::new(false).cell(), admin_fault: AdminQueueFaultConfig::new(), + controller_management_fault: ControllerManagementFaultConfig::new(), }; let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration); @@ -285,6 +289,7 @@ async fn test_multi_page_admin_queues(driver: DefaultDriver) { let fault_configuration = FaultConfiguration { fault_active: CellUpdater::new(false).cell(), admin_fault: AdminQueueFaultConfig::new(), + controller_management_fault: ControllerManagementFaultConfig::new(), }; let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration); @@ -359,6 +364,7 @@ async fn test_send_identify_no_fault(driver: DefaultDriver) { let fault_configuration = FaultConfiguration { fault_active: CellUpdater::new(false).cell(), admin_fault: AdminQueueFaultConfig::new(), + controller_management_fault: ControllerManagementFaultConfig::new(), }; let cqe = send_identify(driver, fault_configuration).await; @@ -374,8 +380,9 @@ async fn test_send_identify_with_sq_fault(driver: DefaultDriver) { fault_active: CellUpdater::new(true).cell(), admin_fault: AdminQueueFaultConfig::new().with_submission_queue_fault( nvme_spec::AdminOpcode::IDENTIFY.0, - QueueFaultBehavior::Update(faulty_identify), + FaultBehaviour::Update(faulty_identify), ), + controller_management_fault: ControllerManagementFaultConfig::new(), }; let cqe = send_identify(driver, fault_configuration).await; diff --git a/vm/devices/storage/nvme_test/src/tests/shadow_doorbell_tests.rs b/vm/devices/storage/nvme_test/src/tests/shadow_doorbell_tests.rs index 76ab90e370..f027879654 100644 --- a/vm/devices/storage/nvme_test/src/tests/shadow_doorbell_tests.rs +++ b/vm/devices/storage/nvme_test/src/tests/shadow_doorbell_tests.rs @@ -14,6 +14,7 @@ use crate::tests::test_helpers::write_command_to_queue; use guestmem::GuestMemory; use mesh::CellUpdater; use nvme_resources::fault::AdminQueueFaultConfig; +use nvme_resources::fault::ControllerManagementFaultConfig; use nvme_resources::fault::FaultConfiguration; use pal_async::DefaultDriver; use pal_async::async_test; @@ -42,6 +43,7 @@ async fn setup_shadow_doorbells( let fault_configuration = FaultConfiguration { fault_active: CellUpdater::new(false).cell(), admin_fault: AdminQueueFaultConfig::new(), + controller_management_fault: ControllerManagementFaultConfig::new(), }; // Build a controller with 64 entries in the admin queue (just so that the ASQ fits in one page). let mut nvmec = instantiate_and_build_admin_queue( cq_buf, diff --git a/vm/devices/storage/nvme_test/src/workers/admin.rs b/vm/devices/storage/nvme_test/src/workers/admin.rs index 5ef4c76e17..18ad6ecd12 100644 --- a/vm/devices/storage/nvme_test/src/workers/admin.rs +++ b/vm/devices/storage/nvme_test/src/workers/admin.rs @@ -31,8 +31,8 @@ use futures_concurrency::future::Race; use guestmem::GuestMemory; use guid::Guid; use inspect::Inspect; +use nvme_resources::fault::FaultBehaviour; use nvme_resources::fault::FaultConfiguration; -use nvme_resources::fault::QueueFaultBehavior; use pal_async::task::Spawn; use pal_async::task::Task; use pal_async::timer::PolledTimer; @@ -481,7 +481,7 @@ impl AdminHandler { .unwrap_or_else(|| QueueFaultBehavior::Default); match fault { - QueueFaultBehavior::Update(command_updated) => { + FaultBehaviour::Update(command_updated) => { tracing::warn!( "configured fault: admin command updated in sq. original: {:?},\n new: {:?}", &command, @@ -489,14 +489,14 @@ impl AdminHandler { ); command = command_updated; } - QueueFaultBehavior::Drop => { + FaultBehaviour::Drop => { tracing::warn!( "configured fault: admin command dropped from sq {:?}", &command ); return Ok(()); } - QueueFaultBehavior::Delay(duration) => { + FaultBehaviour::Delay(duration) => { self.timer.sleep(duration).await; } QueueFaultBehavior::Panic(message) => { diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs index cdc5bb2c49..39c8f63616 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs @@ -15,8 +15,8 @@ use mesh::CellUpdater; use nvme_resources::NamespaceDefinition; use nvme_resources::NvmeFaultControllerHandle; use nvme_resources::fault::AdminQueueFaultConfig; +use nvme_resources::fault::FaultBehaviour; use nvme_resources::fault::FaultConfiguration; -use nvme_resources::fault::QueueFaultBehavior; use petri::OpenHclServicingFlags; use petri::PetriVmBuilder; use petri::PetriVmmBackend; From 6b5816bf5684ead196c3d02e5e23aa8a6b9f182c Mon Sep 17 00:00:00 2001 From: Guramrit Singh Date: Wed, 27 Aug 2025 10:27:15 -0700 Subject: [PATCH 2/4] Adding the ControllerManagementFaultConfiguration to the mix --- .../disk_nvme/nvme_driver/src/tests.rs | 4 ++- .../storage/nvme_resources/src/fault.rs | 2 +- vm/devices/storage/nvme_test/src/pci.rs | 26 +++++++++++++++---- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs index 4fc98b0a46..35cf2e7277 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs @@ -11,8 +11,9 @@ use inspect::InspectMut; use mesh::CellUpdater; use nvme::NvmeControllerCaps; use nvme_resources::fault::AdminQueueFaultConfig; -use nvme_resources::fault::FaultConfiguration; +use nvme_resources::fault::ControllerManagementFaultConfig; use nvme_resources::fault::FaultBehaviour; +use nvme_resources::fault::FaultConfiguration; use nvme_spec::AdminOpcode; use nvme_spec::Cap; use nvme_spec::Command; @@ -50,6 +51,7 @@ async fn test_nvme_command_fault(driver: DefaultDriver) { AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0, FaultBehaviour::Update(output_cmd), ), + controller_management_fault: ControllerManagementFaultConfig::new(), }, ) .await; diff --git a/vm/devices/storage/nvme_resources/src/fault.rs b/vm/devices/storage/nvme_resources/src/fault.rs index d44abba9df..de5f433ba8 100644 --- a/vm/devices/storage/nvme_resources/src/fault.rs +++ b/vm/devices/storage/nvme_resources/src/fault.rs @@ -30,7 +30,7 @@ pub struct ControllerManagementFaultConfig { pub controller_management_fault_enable: FaultBehaviour, } -#[derive(MeshPayload)] +#[derive(MeshPayload, Clone)] /// A buildable fault configuration for the controller management interface (cc.en(), csts.rdy(), ... ) pub struct ControllerManagementFaultConfig { /// Fault to apply to cc.en() bit during enablement diff --git a/vm/devices/storage/nvme_test/src/pci.rs b/vm/devices/storage/nvme_test/src/pci.rs index ab68490242..144cad1c97 100644 --- a/vm/devices/storage/nvme_test/src/pci.rs +++ b/vm/devices/storage/nvme_test/src/pci.rs @@ -30,6 +30,7 @@ use guestmem::GuestMemory; use guid::Guid; use inspect::Inspect; use inspect::InspectMut; +use nvme_resources::fault::FaultBehaviour; use nvme_resources::fault::FaultConfiguration; use parking_lot::Mutex; use pci_core::capabilities::msix::MsixEmulator; @@ -157,7 +158,7 @@ impl NvmeFaultController { max_cqs: caps.max_io_queues, qe_sizes: Arc::clone(&qe_sizes), subsystem_id: caps.subsystem_id, - fault_configuration, + fault_configuration: fault_configuration.clone(), }); Self { @@ -166,6 +167,7 @@ impl NvmeFaultController { registers: RegState::new(), workers: admin, qe_sizes, + fault_configuration, } } @@ -343,8 +345,23 @@ impl NvmeFaultController { if cc.en() != self.registers.cc.en() { if cc.en() { - // If any, perform the fault at the beginning of the enable sequence. - if + // If any fault was configured for cc.en() process it here + match self + .fault_configuration + .controller_management_fault + .controller_management_fault_enable + { + FaultBehaviour::Delay(duration) => { + std::thread::sleep(duration); + } + FaultBehaviour::Drop => { + tracelimit::warn_ratelimited!( + "Dropping enable command due to fault injection" + ); + return; + } + _ => {} // Update is not yet configured for this fault. Treat that as a default action for now + } // Some drivers will write zeros to IOSQES and IOCQES, assuming that the defaults will work. if cc.iocqes() == 0 { @@ -389,8 +406,6 @@ impl NvmeFaultController { } } - - self.registers.cc = cc; *self.qe_sizes.lock() = IoQueueEntrySizes { sqe_bits: cc.iosqes(), @@ -437,6 +452,7 @@ impl ChangeDeviceState for NvmeFaultController { registers, qe_sizes, workers, + fault_configuration: _, } = self; workers.reset().await; cfg_space.reset(); From e004a932a449df621d1e59e9b64c7e035af53c02 Mon Sep 17 00:00:00 2001 From: Guramrit Singh Date: Wed, 27 Aug 2025 17:18:52 -0700 Subject: [PATCH 3/4] Created a whole new struct for Pci related faults --- .../disk_nvme/nvme_driver/src/tests.rs | 8 ++--- .../storage/nvme_resources/src/fault.rs | 31 +++++++++---------- vm/devices/storage/nvme_test/src/pci.rs | 14 +++------ .../nvme_test/src/tests/controller_tests.rs | 18 +++++------ .../src/tests/shadow_doorbell_tests.rs | 4 +-- .../storage/nvme_test/src/workers/admin.rs | 8 ++--- .../tests/multiarch/openhcl_servicing.rs | 6 ++-- 7 files changed, 42 insertions(+), 47 deletions(-) diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs index 35cf2e7277..8de0da836c 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs @@ -11,9 +11,9 @@ use inspect::InspectMut; use mesh::CellUpdater; use nvme::NvmeControllerCaps; use nvme_resources::fault::AdminQueueFaultConfig; -use nvme_resources::fault::ControllerManagementFaultConfig; -use nvme_resources::fault::FaultBehaviour; use nvme_resources::fault::FaultConfiguration; +use nvme_resources::fault::PciFaultConfig; +use nvme_resources::fault::QueueFaultBehavior; use nvme_spec::AdminOpcode; use nvme_spec::Cap; use nvme_spec::Command; @@ -49,9 +49,9 @@ async fn test_nvme_command_fault(driver: DefaultDriver) { fault_active: CellUpdater::new(true).cell(), admin_fault: AdminQueueFaultConfig::new().with_submission_queue_fault( AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0, - FaultBehaviour::Update(output_cmd), + QueueFaultBehavior::Update(output_cmd), ), - controller_management_fault: ControllerManagementFaultConfig::new(), + pci_fault: PciFaultConfig::new(), }, ) .await; diff --git a/vm/devices/storage/nvme_resources/src/fault.rs b/vm/devices/storage/nvme_resources/src/fault.rs index de5f433ba8..4555fd9aa2 100644 --- a/vm/devices/storage/nvme_resources/src/fault.rs +++ b/vm/devices/storage/nvme_resources/src/fault.rs @@ -23,25 +23,27 @@ pub enum QueueFaultBehavior { Panic(String), } -#[derive(MeshPayload)] -/// A buildable fault configuration for the controller management interface (cc.en(), csts.rdy(), ... ) -pub struct ControllerManagementFaultConfig { - /// Fault to apply to cc.en() bit during enablement - pub controller_management_fault_enable: FaultBehaviour, +#[derive(Clone, MeshPayload)] +/// Supported fault behaviour for PCI faults +pub enum PciFaultBehavior { + /// Introduce a delay to the PCI operation + Delay(Duration), + /// Do nothing + Default, } #[derive(MeshPayload, Clone)] /// A buildable fault configuration for the controller management interface (cc.en(), csts.rdy(), ... ) -pub struct ControllerManagementFaultConfig { +pub struct PciFaultConfig { /// Fault to apply to cc.en() bit during enablement - pub controller_management_fault_enable: FaultBehaviour, + pub controller_management_fault_enable: PciFaultBehavior, } #[derive(MeshPayload, Clone)] /// A buildable fault configuration pub struct AdminQueueFaultConfig { /// A map of NVME opcodes to the fault behavior for each. (This would ideally be a `HashMap`, but `mesh` doesn't support that type. Given that this is not performance sensitive, the lookup is okay) - pub admin_submission_queue_faults: Vec<(u8, FaultBehaviour)>, + pub admin_submission_queue_faults: Vec<(u8, QueueFaultBehavior)>, } #[derive(MeshPayload, Clone)] @@ -52,22 +54,19 @@ pub struct FaultConfiguration { /// Fault to apply to the admin queues pub admin_fault: AdminQueueFaultConfig, /// Fault to apply to management layer of the controller - pub controller_management_fault: ControllerManagementFaultConfig, + pub pci_fault: PciFaultConfig, } -impl ControllerManagementFaultConfig { +impl PciFaultConfig { /// Create a new no-op fault configuration pub fn new() -> Self { Self { - controller_management_fault_enable: FaultBehaviour::Default, + controller_management_fault_enable: PciFaultBehavior::Default, } } /// Create a new fault configuration - pub fn with_controller_management_enable_fault( - mut self, - behaviour: FaultBehaviour, - ) -> Self { + pub fn with_cc_enable_fault(mut self, behaviour: PciFaultBehavior) -> Self { self.controller_management_fault_enable = behaviour; self } @@ -85,7 +84,7 @@ impl AdminQueueFaultConfig { pub fn with_submission_queue_fault( mut self, opcode: u8, - behaviour: FaultBehaviour, + behaviour: QueueFaultBehavior, ) -> Self { if self .admin_submission_queue_faults diff --git a/vm/devices/storage/nvme_test/src/pci.rs b/vm/devices/storage/nvme_test/src/pci.rs index 144cad1c97..c1d4815195 100644 --- a/vm/devices/storage/nvme_test/src/pci.rs +++ b/vm/devices/storage/nvme_test/src/pci.rs @@ -30,8 +30,8 @@ use guestmem::GuestMemory; use guid::Guid; use inspect::Inspect; use inspect::InspectMut; -use nvme_resources::fault::FaultBehaviour; use nvme_resources::fault::FaultConfiguration; +use nvme_resources::fault::PciFaultBehavior; use parking_lot::Mutex; use pci_core::capabilities::msix::MsixEmulator; use pci_core::cfg_space_emu::BarMemoryKind; @@ -348,19 +348,13 @@ impl NvmeFaultController { // If any fault was configured for cc.en() process it here match self .fault_configuration - .controller_management_fault + .pci_fault .controller_management_fault_enable { - FaultBehaviour::Delay(duration) => { + PciFaultBehavior::Delay(duration) => { std::thread::sleep(duration); } - FaultBehaviour::Drop => { - tracelimit::warn_ratelimited!( - "Dropping enable command due to fault injection" - ); - return; - } - _ => {} // Update is not yet configured for this fault. Treat that as a default action for now + PciFaultBehavior::Default => {} } // Some drivers will write zeros to IOSQES and IOCQES, assuming that the defaults will work. diff --git a/vm/devices/storage/nvme_test/src/tests/controller_tests.rs b/vm/devices/storage/nvme_test/src/tests/controller_tests.rs index 60121f0fd3..81bcd1e15c 100644 --- a/vm/devices/storage/nvme_test/src/tests/controller_tests.rs +++ b/vm/devices/storage/nvme_test/src/tests/controller_tests.rs @@ -17,9 +17,9 @@ use guestmem::GuestMemory; use guid::Guid; use mesh::CellUpdater; use nvme_resources::fault::AdminQueueFaultConfig; -use nvme_resources::fault::ControllerManagementFaultConfig; -use nvme_resources::fault::FaultBehaviour; use nvme_resources::fault::FaultConfiguration; +use nvme_resources::fault::PciFaultConfig; +use nvme_resources::fault::QueueFaultBehavior; use nvme_spec::Command; use nvme_spec::Completion; use pal_async::DefaultDriver; @@ -209,7 +209,7 @@ async fn test_basic_registers(driver: DefaultDriver) { let fault_configuration = FaultConfiguration { fault_active: CellUpdater::new(false).cell(), admin_fault: AdminQueueFaultConfig::new(), - controller_management_fault: ControllerManagementFaultConfig::new(), + pci_fault: PciFaultConfig::new(), }; let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration); let mut dword = 0u32; @@ -238,7 +238,7 @@ async fn test_invalid_configuration(driver: DefaultDriver) { let fault_configuration = FaultConfiguration { fault_active: CellUpdater::new(false).cell(), admin_fault: AdminQueueFaultConfig::new(), - controller_management_fault: ControllerManagementFaultConfig::new(), + pci_fault: PciFaultConfig::new(), }; let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration); let mut dword = 0u32; @@ -257,7 +257,7 @@ async fn test_enable_controller(driver: DefaultDriver) { let fault_configuration = FaultConfiguration { fault_active: CellUpdater::new(false).cell(), admin_fault: AdminQueueFaultConfig::new(), - controller_management_fault: ControllerManagementFaultConfig::new(), + pci_fault: PciFaultConfig::new(), }; let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration); @@ -289,7 +289,7 @@ async fn test_multi_page_admin_queues(driver: DefaultDriver) { let fault_configuration = FaultConfiguration { fault_active: CellUpdater::new(false).cell(), admin_fault: AdminQueueFaultConfig::new(), - controller_management_fault: ControllerManagementFaultConfig::new(), + pci_fault: PciFaultConfig::new(), }; let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration); @@ -364,7 +364,7 @@ async fn test_send_identify_no_fault(driver: DefaultDriver) { let fault_configuration = FaultConfiguration { fault_active: CellUpdater::new(false).cell(), admin_fault: AdminQueueFaultConfig::new(), - controller_management_fault: ControllerManagementFaultConfig::new(), + pci_fault: PciFaultConfig::new(), }; let cqe = send_identify(driver, fault_configuration).await; @@ -380,9 +380,9 @@ async fn test_send_identify_with_sq_fault(driver: DefaultDriver) { fault_active: CellUpdater::new(true).cell(), admin_fault: AdminQueueFaultConfig::new().with_submission_queue_fault( nvme_spec::AdminOpcode::IDENTIFY.0, - FaultBehaviour::Update(faulty_identify), + QueueFaultBehavior::Update(faulty_identify), ), - controller_management_fault: ControllerManagementFaultConfig::new(), + pci_fault: PciFaultConfig::new(), }; let cqe = send_identify(driver, fault_configuration).await; diff --git a/vm/devices/storage/nvme_test/src/tests/shadow_doorbell_tests.rs b/vm/devices/storage/nvme_test/src/tests/shadow_doorbell_tests.rs index f027879654..6c6bac3f1c 100644 --- a/vm/devices/storage/nvme_test/src/tests/shadow_doorbell_tests.rs +++ b/vm/devices/storage/nvme_test/src/tests/shadow_doorbell_tests.rs @@ -14,8 +14,8 @@ use crate::tests::test_helpers::write_command_to_queue; use guestmem::GuestMemory; use mesh::CellUpdater; use nvme_resources::fault::AdminQueueFaultConfig; -use nvme_resources::fault::ControllerManagementFaultConfig; use nvme_resources::fault::FaultConfiguration; +use nvme_resources::fault::PciFaultConfig; use pal_async::DefaultDriver; use pal_async::async_test; use pci_core::test_helpers::TestPciInterruptController; @@ -43,7 +43,7 @@ async fn setup_shadow_doorbells( let fault_configuration = FaultConfiguration { fault_active: CellUpdater::new(false).cell(), admin_fault: AdminQueueFaultConfig::new(), - controller_management_fault: ControllerManagementFaultConfig::new(), + pci_fault: PciFaultConfig::new(), }; // Build a controller with 64 entries in the admin queue (just so that the ASQ fits in one page). let mut nvmec = instantiate_and_build_admin_queue( cq_buf, diff --git a/vm/devices/storage/nvme_test/src/workers/admin.rs b/vm/devices/storage/nvme_test/src/workers/admin.rs index 18ad6ecd12..5ef4c76e17 100644 --- a/vm/devices/storage/nvme_test/src/workers/admin.rs +++ b/vm/devices/storage/nvme_test/src/workers/admin.rs @@ -31,8 +31,8 @@ use futures_concurrency::future::Race; use guestmem::GuestMemory; use guid::Guid; use inspect::Inspect; -use nvme_resources::fault::FaultBehaviour; use nvme_resources::fault::FaultConfiguration; +use nvme_resources::fault::QueueFaultBehavior; use pal_async::task::Spawn; use pal_async::task::Task; use pal_async::timer::PolledTimer; @@ -481,7 +481,7 @@ impl AdminHandler { .unwrap_or_else(|| QueueFaultBehavior::Default); match fault { - FaultBehaviour::Update(command_updated) => { + QueueFaultBehavior::Update(command_updated) => { tracing::warn!( "configured fault: admin command updated in sq. original: {:?},\n new: {:?}", &command, @@ -489,14 +489,14 @@ impl AdminHandler { ); command = command_updated; } - FaultBehaviour::Drop => { + QueueFaultBehavior::Drop => { tracing::warn!( "configured fault: admin command dropped from sq {:?}", &command ); return Ok(()); } - FaultBehaviour::Delay(duration) => { + QueueFaultBehavior::Delay(duration) => { self.timer.sleep(duration).await; } QueueFaultBehavior::Panic(message) => { diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs index 39c8f63616..988ef009b4 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs @@ -15,8 +15,9 @@ use mesh::CellUpdater; use nvme_resources::NamespaceDefinition; use nvme_resources::NvmeFaultControllerHandle; use nvme_resources::fault::AdminQueueFaultConfig; -use nvme_resources::fault::FaultBehaviour; use nvme_resources::fault::FaultConfiguration; +use nvme_resources::fault::PciFaultConfig; +use nvme_resources::fault::QueueFaultBehavior; use petri::OpenHclServicingFlags; use petri::PetriVmBuilder; use petri::PetriVmmBackend; @@ -256,8 +257,9 @@ async fn keepalive_with_nvme_fault( fault_active: fault_start_updater.cell(), admin_fault: AdminQueueFaultConfig::new().with_submission_queue_fault( nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0, - QueueFaultBehavior::Panic("Received a CREATE_IO_COMPLETION_QUEUE command during servicing with keepalive enabled. This should never happen.".to_string()), + QueueFaultBehavior::Panic("Received a CREATE_IO_COMPLETION_QUEUE command during servicing with keepalive enabled. THERE IS A BUG SOMEWHERE.".to_string()), ), + pci_fault: PciFaultConfig::new(), }; let (mut vm, agent) = config From ff82b9195637772a18b5b39b9c4adb3965d2eb11 Mon Sep 17 00:00:00 2001 From: Guramrit Singh Date: Wed, 27 Aug 2025 17:22:43 -0700 Subject: [PATCH 4/4] Added a minor comment --- vm/devices/storage/nvme_resources/src/fault.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vm/devices/storage/nvme_resources/src/fault.rs b/vm/devices/storage/nvme_resources/src/fault.rs index 4555fd9aa2..a7716b8535 100644 --- a/vm/devices/storage/nvme_resources/src/fault.rs +++ b/vm/devices/storage/nvme_resources/src/fault.rs @@ -65,7 +65,7 @@ impl PciFaultConfig { } } - /// Create a new fault configuration + /// Add a cc.en() fault pub fn with_cc_enable_fault(mut self, behaviour: PciFaultBehavior) -> Self { self.controller_management_fault_enable = behaviour; self