Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use mesh::CellUpdater;
use nvme::NvmeControllerCaps;
use nvme_resources::fault::AdminQueueFaultConfig;
use nvme_resources::fault::FaultConfiguration;
use nvme_resources::fault::PciFaultConfig;
use nvme_resources::fault::QueueFaultBehavior;
use nvme_spec::AdminOpcode;
use nvme_spec::Cap;
Expand Down Expand Up @@ -50,6 +51,7 @@ async fn test_nvme_command_fault(driver: DefaultDriver) {
AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0,
QueueFaultBehavior::Update(output_cmd),
),
pci_fault: PciFaultConfig::new(),
},
)
.await;
Expand Down
33 changes: 33 additions & 0 deletions vm/devices/storage/nvme_resources/src/fault.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ pub enum QueueFaultBehavior<T> {
Panic(String),
}

#[derive(Clone, MeshPayload)]
/// Supported fault behaviour for PCI faults
pub enum PciFaultBehavior {
/// Introduce a delay to the PCI operation
Delay(Duration),
/// Do nothing
Default,
}

#[derive(MeshPayload, Clone)]
/// A buildable fault configuration for the controller management interface (cc.en(), csts.rdy(), ... )
pub struct PciFaultConfig {
/// Fault to apply to cc.en() bit during enablement
pub controller_management_fault_enable: PciFaultBehavior,
}

#[derive(MeshPayload, Clone)]
/// A buildable fault configuration
pub struct AdminQueueFaultConfig {
Expand All @@ -37,6 +53,23 @@ pub struct FaultConfiguration {
pub fault_active: Cell<bool>,
/// Fault to apply to the admin queues
pub admin_fault: AdminQueueFaultConfig,
/// Fault to apply to management layer of the controller
pub pci_fault: PciFaultConfig,
}

impl PciFaultConfig {
/// Create a new no-op fault configuration
pub fn new() -> Self {
Self {
controller_management_fault_enable: PciFaultBehavior::Default,
}
}

/// Add a cc.en() fault
pub fn with_cc_enable_fault(mut self, behaviour: PciFaultBehavior) -> Self {
self.controller_management_fault_enable = behaviour;
self
}
}

impl AdminQueueFaultConfig {
Expand Down
19 changes: 18 additions & 1 deletion vm/devices/storage/nvme_test/src/pci.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use guid::Guid;
use inspect::Inspect;
use inspect::InspectMut;
use nvme_resources::fault::FaultConfiguration;
use nvme_resources::fault::PciFaultBehavior;
use parking_lot::Mutex;
use pci_core::capabilities::msix::MsixEmulator;
use pci_core::cfg_space_emu::BarMemoryKind;
Expand Down Expand Up @@ -59,6 +60,8 @@ pub struct NvmeFaultController {
qe_sizes: Arc<Mutex<IoQueueEntrySizes>>,
#[inspect(flatten, mut)]
workers: NvmeWorkers,
#[inspect(skip)]
fault_configuration: FaultConfiguration,
}

#[derive(Inspect)]
Expand Down Expand Up @@ -155,7 +158,7 @@ impl NvmeFaultController {
max_cqs: caps.max_io_queues,
qe_sizes: Arc::clone(&qe_sizes),
subsystem_id: caps.subsystem_id,
fault_configuration,
fault_configuration: fault_configuration.clone(),
});

Self {
Expand All @@ -164,6 +167,7 @@ impl NvmeFaultController {
registers: RegState::new(),
workers: admin,
qe_sizes,
fault_configuration,
}
}

Expand Down Expand Up @@ -341,6 +345,18 @@ impl NvmeFaultController {

if cc.en() != self.registers.cc.en() {
if cc.en() {
// If any fault was configured for cc.en() process it here
match self
.fault_configuration
.pci_fault
.controller_management_fault_enable
{
PciFaultBehavior::Delay(duration) => {
std::thread::sleep(duration);
}
PciFaultBehavior::Default => {}
}

// Some drivers will write zeros to IOSQES and IOCQES, assuming that the defaults will work.
if cc.iocqes() == 0 {
cc.set_iocqes(IOCQES);
Expand Down Expand Up @@ -430,6 +446,7 @@ impl ChangeDeviceState for NvmeFaultController {
registers,
qe_sizes,
workers,
fault_configuration: _,
} = self;
workers.reset().await;
cfg_space.reset();
Expand Down
7 changes: 7 additions & 0 deletions vm/devices/storage/nvme_test/src/tests/controller_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use guid::Guid;
use mesh::CellUpdater;
use nvme_resources::fault::AdminQueueFaultConfig;
use nvme_resources::fault::FaultConfiguration;
use nvme_resources::fault::PciFaultConfig;
use nvme_resources::fault::QueueFaultBehavior;
use nvme_spec::Command;
use nvme_spec::Completion;
Expand Down Expand Up @@ -208,6 +209,7 @@ async fn test_basic_registers(driver: DefaultDriver) {
let fault_configuration = FaultConfiguration {
fault_active: CellUpdater::new(false).cell(),
admin_fault: AdminQueueFaultConfig::new(),
pci_fault: PciFaultConfig::new(),
};
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);
let mut dword = 0u32;
Expand Down Expand Up @@ -236,6 +238,7 @@ async fn test_invalid_configuration(driver: DefaultDriver) {
let fault_configuration = FaultConfiguration {
fault_active: CellUpdater::new(false).cell(),
admin_fault: AdminQueueFaultConfig::new(),
pci_fault: PciFaultConfig::new(),
};
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);
let mut dword = 0u32;
Expand All @@ -254,6 +257,7 @@ async fn test_enable_controller(driver: DefaultDriver) {
let fault_configuration = FaultConfiguration {
fault_active: CellUpdater::new(false).cell(),
admin_fault: AdminQueueFaultConfig::new(),
pci_fault: PciFaultConfig::new(),
};
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);

Expand Down Expand Up @@ -285,6 +289,7 @@ async fn test_multi_page_admin_queues(driver: DefaultDriver) {
let fault_configuration = FaultConfiguration {
fault_active: CellUpdater::new(false).cell(),
admin_fault: AdminQueueFaultConfig::new(),
pci_fault: PciFaultConfig::new(),
};
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);

Expand Down Expand Up @@ -359,6 +364,7 @@ async fn test_send_identify_no_fault(driver: DefaultDriver) {
let fault_configuration = FaultConfiguration {
fault_active: CellUpdater::new(false).cell(),
admin_fault: AdminQueueFaultConfig::new(),
pci_fault: PciFaultConfig::new(),
};
let cqe = send_identify(driver, fault_configuration).await;

Expand All @@ -376,6 +382,7 @@ async fn test_send_identify_with_sq_fault(driver: DefaultDriver) {
nvme_spec::AdminOpcode::IDENTIFY.0,
QueueFaultBehavior::Update(faulty_identify),
),
pci_fault: PciFaultConfig::new(),
};
let cqe = send_identify(driver, fault_configuration).await;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use guestmem::GuestMemory;
use mesh::CellUpdater;
use nvme_resources::fault::AdminQueueFaultConfig;
use nvme_resources::fault::FaultConfiguration;
use nvme_resources::fault::PciFaultConfig;
use pal_async::DefaultDriver;
use pal_async::async_test;
use pci_core::test_helpers::TestPciInterruptController;
Expand Down Expand Up @@ -42,6 +43,7 @@ async fn setup_shadow_doorbells(
let fault_configuration = FaultConfiguration {
fault_active: CellUpdater::new(false).cell(),
admin_fault: AdminQueueFaultConfig::new(),
pci_fault: PciFaultConfig::new(),
}; // Build a controller with 64 entries in the admin queue (just so that the ASQ fits in one page).
let mut nvmec = instantiate_and_build_admin_queue(
cq_buf,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use nvme_resources::NamespaceDefinition;
use nvme_resources::NvmeFaultControllerHandle;
use nvme_resources::fault::AdminQueueFaultConfig;
use nvme_resources::fault::FaultConfiguration;
use nvme_resources::fault::PciFaultConfig;
use nvme_resources::fault::QueueFaultBehavior;
use petri::OpenHclServicingFlags;
use petri::PetriVmBuilder;
Expand Down Expand Up @@ -255,8 +256,9 @@ async fn keepalive_with_nvme_fault(
fault_active: fault_start_updater.cell(),
admin_fault: AdminQueueFaultConfig::new().with_submission_queue_fault(
nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0,
QueueFaultBehavior::Panic("Received a CREATE_IO_COMPLETION_QUEUE command during servicing with keepalive enabled. This should never happen.".to_string()),
QueueFaultBehavior::Panic("Received a CREATE_IO_COMPLETION_QUEUE command during servicing with keepalive enabled. THERE IS A BUG SOMEWHERE.".to_string()),
),
pci_fault: PciFaultConfig::new(),
};

let (mut vm, agent) = config
Expand Down
Loading