diff --git a/propolis/Cargo.toml b/propolis/Cargo.toml index d2404d5fa..0e1ca1b52 100644 --- a/propolis/Cargo.toml +++ b/propolis/Cargo.toml @@ -23,4 +23,6 @@ tokio = { version = "1", features = ["full"] } futures = "0.3" [dev-dependencies] -tempfile = "3.2" +crossbeam-channel = "0.5" +rand = "0.8" +tempfile = "3.2" \ No newline at end of file diff --git a/propolis/src/block/mod.rs b/propolis/src/block/mod.rs index 6bdec99aa..4c7aec61c 100644 --- a/propolis/src/block/mod.rs +++ b/propolis/src/block/mod.rs @@ -33,7 +33,8 @@ pub enum Result { Unsupported, } -pub type CompleteFn = dyn FnOnce(Result, &DispCtx) + Send + Sync + 'static; +pub type CompleteFn = + dyn FnOnce(Operation, Result, &DispCtx) + Send + Sync + 'static; /// Block device operation request pub struct Request { @@ -100,7 +101,7 @@ impl Request { /// Indiciate disposition of completed request pub fn complete(mut self, res: Result, ctx: &DispCtx) { let func = self.donef.take().unwrap(); - func(res, ctx); + func(self.op, res, ctx); } } impl Drop for Request { diff --git a/propolis/src/hw/mod.rs b/propolis/src/hw/mod.rs index 471965195..9e57162b3 100644 --- a/propolis/src/hw/mod.rs +++ b/propolis/src/hw/mod.rs @@ -1,7 +1,6 @@ pub mod chipset; pub mod ibmpc; -// XXX: skip nvme for now -//pub mod nvme; +pub mod nvme; pub mod pci; pub mod ps2ctrl; pub mod qemu; diff --git a/propolis/src/hw/nvme/admin.rs b/propolis/src/hw/nvme/admin.rs index b171323c1..f8d208e64 100644 --- a/propolis/src/hw/nvme/admin.rs +++ b/propolis/src/hw/nvme/admin.rs @@ -33,7 +33,7 @@ impl NvmeCtrl { cmd.qid, cmd.intr_vector, GuestAddr(cmd.prp), - cmd.qsize as u32, + cmd.qsize, ctx, ) { Ok(_) => cmds::Completion::success(), @@ -67,7 +67,7 @@ impl NvmeCtrl { cmd.qid, cmd.cqid, GuestAddr(cmd.prp), - cmd.qsize as u32, + cmd.qsize, ctx, ) { Ok(_) => cmds::Completion::success(), @@ -117,18 +117,14 @@ impl NvmeCtrl { ) -> cmds::Completion { match cmd.cns { IDENT_CNS_NAMESPACE => match cmd.nsid { - n if n > 0 && n <= super::ns::MAX_NUM_NAMESPACES as u32 => { + 1 => { assert!(size_of::() <= PAGE_SIZE); let buf = cmd .data(ctx.mctx.memctx()) .next() .expect("missing prp entry for ident response"); - if let Ok(ns) = self.get_ns(n) { - assert!(ctx.mctx.memctx().write(buf.0, &ns.ident)); - cmds::Completion::success() - } else { - cmds::Completion::generic_err(STS_INVALID_NS) - } + assert!(ctx.mctx.memctx().write(buf.0, &self.ns_ident)); + cmds::Completion::success() } // 0 is not a valid NSID (See NVMe 1.0e, Section 6.1 Namespaces) // We also don't currently support namespace management @@ -143,7 +139,7 @@ impl NvmeCtrl { .data(ctx.mctx.memctx()) .next() .expect("missing prp entry for ident response"); - assert!(ctx.mctx.memctx().write(buf.0, &self.ident)); + assert!(ctx.mctx.memctx().write(buf.0, &self.ctrl_ident)); cmds::Completion::success() } // We currently present NVMe version 1.0 in which CNS is a 1-bit field diff --git a/propolis/src/hw/nvme/cmds.rs b/propolis/src/hw/nvme/cmds.rs index f779fa5f3..0d6fefec3 100644 --- a/propolis/src/hw/nvme/cmds.rs +++ b/propolis/src/hw/nvme/cmds.rs @@ -61,7 +61,7 @@ impl AdminCmd { }; AdminCmd::CreateIOSubQ(CreateIOSQCmd { prp: raw.prp1, - qsize: (raw.cdw10 >> 16) as u16 + 1, // Convert from 0's based + qsize: (raw.cdw10 >> 16) + 1, // Convert from 0's based qid: raw.cdw10 as u16, cqid: (raw.cdw11 >> 16) as u16, queue_prio, @@ -84,7 +84,7 @@ impl AdminCmd { bits::ADMIN_OPC_CREATE_IO_CQ => { AdminCmd::CreateIOCompQ(CreateIOCQCmd { prp: raw.prp1, - qsize: (raw.cdw10 >> 16) as u16 + 1, // Convert from 0's based + qsize: (raw.cdw10 >> 16) + 1, // Convert from 0's based qid: raw.cdw10 as u16, intr_vector: (raw.cdw11 >> 16) as u16, intr_enable: (raw.cdw11 & 0b10) != 0, @@ -132,7 +132,8 @@ pub struct CreateIOCQCmd { /// /// The size of the Completion Queue to be created. /// See NVMe 1.0e Section 4.1.3 Queue Size - pub qsize: u16, + /// NOTE: This has already been converted from a 0's based value. + pub qsize: u32, /// Queue Identifier (QID) /// @@ -171,7 +172,8 @@ pub struct CreateIOSQCmd { /// /// The size of the Completion Queue to be created. /// See NVMe 1.0e Section 4.1.3 Queue Size - pub qsize: u16, + /// NOTE: This has already been converted from a 0's based value. + pub qsize: u32, /// Queue Identifier (QID) /// diff --git a/propolis/src/hw/nvme/mod.rs b/propolis/src/hw/nvme/mod.rs index f988936ec..2e0ddaecf 100644 --- a/propolis/src/hw/nvme/mod.rs +++ b/propolis/src/hw/nvme/mod.rs @@ -1,31 +1,31 @@ -use std::collections::BTreeMap; use std::convert::TryInto; use std::mem::size_of; use std::sync::{Arc, Mutex, MutexGuard}; -use crate::common::*; use crate::dispatch::DispCtx; use crate::hw::pci; use crate::util::regmap::RegMap; +use crate::{block, common::*}; use lazy_static::lazy_static; use thiserror::Error; -pub use ns::{NvmeNs, Request}; - mod admin; mod bits; mod cmds; -mod ns; mod queue; +mod requests; use bits::*; -use ns::MAX_NUM_NAMESPACES; use queue::{CompQueue, QueueId, SubQueue}; /// The max number of MSI-X interrupts we support const NVME_MSIX_COUNT: u16 = 1024; +/// Supported block size. +/// TODO: Support more +const BLOCK_SZ: u64 = 512; + /// NVMe errors #[derive(Debug, Error)] pub enum NvmeError { @@ -49,6 +49,9 @@ pub enum NvmeError { #[error("failed to create queue: {0}")] QueueCreateErr(#[from] queue::QueueCreateErr), + #[error("failed to update queue: {0}")] + QueueUpdateError(#[from] queue::QueueUpdateError), + /// MSI-X Interrupt handle is unavailable #[error("the MSI-X interrupt handle is unavailable")] MsixHdlUnavailable, @@ -111,16 +114,19 @@ struct NvmeCtrl { msix_hdl: Option, /// The list of Completion Queues handled by the controller - cqs: [Option>>; MAX_NUM_QUEUES], + cqs: [Option>; MAX_NUM_QUEUES], /// The list of Submission Queues handled by the controller - sqs: [Option>>; MAX_NUM_QUEUES], - - /// The list of namespaces handled by the controller - nss: [Option; MAX_NUM_NAMESPACES], + sqs: [Option>; MAX_NUM_QUEUES], /// The Identify structure returned for Identify controller commands - ident: IdentifyController, + ctrl_ident: IdentifyController, + + /// The Identify structure returned for Identify namespace commands + ns_ident: IdentifyNamespace, + + /// Underlying Block Device info + binfo: block::DeviceInfo, } impl NvmeCtrl { @@ -171,7 +177,7 @@ impl NvmeCtrl { .ok_or(NvmeError::MsixHdlUnavailable)? .clone(); let cq = CompQueue::new(cqid, iv, size, base, ctx, msix_hdl)?; - self.cqs[cqid as usize] = Some(Arc::new(Mutex::new(cq))); + self.cqs[cqid as usize] = Some(Arc::new(cq)); Ok(()) } @@ -190,25 +196,20 @@ impl NvmeCtrl { if (sqid as usize) >= MAX_NUM_QUEUES { return Err(NvmeError::InvalidSubQueue(sqid)); } - if (cqid as usize) >= MAX_NUM_QUEUES - || self.cqs[cqid as usize].is_none() - { - return Err(NvmeError::InvalidCompQueue(cqid)); - } if self.sqs[sqid as usize].is_some() { return Err(NvmeError::SubQueueAlreadyExists(sqid)); } - let sq = SubQueue::new(sqid, cqid, size, base, ctx)?; - self.sqs[sqid as usize] = Some(Arc::new(Mutex::new(sq))); + let cq = self.get_cq(cqid)?; + let sq = SubQueue::new(sqid, cq, size, base, ctx)?; + self.sqs[sqid as usize] = Some(Arc::new(sq)); Ok(()) } /// Returns a reference to the [`CompQueue`] which corresponds to the given completion queue id (`cqid`). - fn get_cq( - &self, - cqid: QueueId, - ) -> Result>, NvmeError> { - debug_assert!((cqid as usize) < MAX_NUM_QUEUES); + fn get_cq(&self, cqid: QueueId) -> Result, NvmeError> { + if (cqid as usize) >= MAX_NUM_QUEUES { + return Err(NvmeError::InvalidCompQueue(cqid)); + } self.cqs[cqid as usize] .as_ref() .map(Arc::clone) @@ -216,8 +217,10 @@ impl NvmeCtrl { } /// Returns a reference to the [`SubQueue`] which corresponds to the given submission queue id (`cqid`). - fn get_sq(&self, sqid: QueueId) -> Result>, NvmeError> { - debug_assert!((sqid as usize) < MAX_NUM_QUEUES); + fn get_sq(&self, sqid: QueueId) -> Result, NvmeError> { + if (sqid as usize) >= MAX_NUM_QUEUES { + return Err(NvmeError::InvalidSubQueue(sqid)); + } self.sqs[sqid as usize] .as_ref() .map(Arc::clone) @@ -229,7 +232,7 @@ impl NvmeCtrl { /// # Panics /// /// Panics if the Admin Completion Queue hasn't been created yet. - fn get_admin_cq(&self) -> Arc> { + fn get_admin_cq(&self) -> Arc { self.get_cq(queue::ADMIN_QUEUE_ID).unwrap() } @@ -238,30 +241,10 @@ impl NvmeCtrl { /// # Panics /// /// Panics if the Admin Submission Queue hasn't been created yet. - fn get_admin_sq(&self) -> Arc> { + fn get_admin_sq(&self) -> Arc { self.get_sq(queue::ADMIN_QUEUE_ID).unwrap() } - /// Add a new namespace to the controller - fn add_ns(&mut self, ns: NvmeNs) -> Result<(), NvmeError> { - // Find the first empty spot - if let Some(spot) = self.nss.iter_mut().find(|n| n.is_none()) { - *spot = Some(ns); - self.ident.nn += 1; - Ok(()) - } else { - Err(NvmeError::TooManyNamespaces) - } - } - - /// Returns a reference to the [`NvmeNs`] which corresponds to the given namespace id (`nsid`). - fn get_ns(&self, nsid: u32) -> Result<&NvmeNs, NvmeError> { - debug_assert!((nsid as usize) <= MAX_NUM_NAMESPACES); - self.nss[nsid as usize - 1] - .as_ref() - .ok_or(NvmeError::InvalidNamespace(nsid)) - } - /// Performs a Controller Reset. /// /// The reset deletes all I/O Submission & Completion Queues, resets @@ -283,17 +266,29 @@ impl NvmeCtrl { *cq = None; } } + + /// Convert some number of logical blocks to bytes with the currently active LBA data size + fn nlb_to_size(&self, b: usize) -> usize { + b << (self.ns_ident.lbaf[(self.ns_ident.flbas & 0xF) as usize]).lbads + } } /// NVMe over PCIe pub struct PciNvme { /// NVMe Controller state: Mutex, + + /// Underlying Block Device notifier + notifier: block::Notifier, } impl PciNvme { /// Create a new pci-nvme device with the given values - pub fn create(vendor: u16, device: u16) -> Arc { + pub fn create( + vendor: u16, + device: u16, + binfo: block::DeviceInfo, + ) -> Arc { let builder = pci::Builder::new(pci::Ident { vendor_id: vendor, device_id: device, @@ -314,7 +309,7 @@ impl PciNvme { // Initialize the Identify structure returned when the host issues // an Identify Controller command. - let ident = bits::IdentifyController { + let ctrl_ident = bits::IdentifyController { vid: vendor, ssvid: vendor, // TODO: fill out serial number @@ -323,13 +318,36 @@ impl PciNvme { // data, so required (minimum) == maximum sqes: NvmQueueEntrySize(0).with_maximum(sqes).with_required(sqes), cqes: NvmQueueEntrySize(0).with_maximum(cqes).with_required(cqes), - // No namespaces initially - nn: 0, + // Supporting multiple namespaces complicates I/O dispatching, + // so for now we limit the device to a single namespace. + nn: 1, // bit 0 indicates volatile write cache is present vwc: 1, ..Default::default() }; + // Initialize the Identify structure returned when the host issues + // an Identify Namespace command. + let total_bytes = binfo.total_size * binfo.block_size as u64; + let nsze = total_bytes / BLOCK_SZ; + let mut ns_ident = bits::IdentifyNamespace { + // No thin provisioning so nsze == ncap == nuse + nsze, + ncap: nsze, + nuse: nsze, + nlbaf: 0, // We only support a single LBA format (1 but 0-based) + flbas: 0, // And it is at index 0 in the lbaf array + ..Default::default() + }; + + // Update the block format we support + debug_assert!( + BLOCK_SZ.is_power_of_two(), + "BLOCK_SZ must be a power of 2" + ); + debug_assert!(BLOCK_SZ >= 512, "BLOCK_SZ must be at least 512 bytes"); + ns_ident.lbaf[0].lbads = BLOCK_SZ.trailing_zeros() as u8; + // Initialize the CAP "register" leaving most values // at their defaults (0): // TO = 0 => 0ms to wait for controller to be ready @@ -370,11 +388,13 @@ impl PciNvme { msix_hdl: None, cqs: Default::default(), sqs: Default::default(), - nss: Default::default(), - ident, + ctrl_ident, + ns_ident, + binfo, }; - let nvme = PciNvme { state: Mutex::new(state) }; + let notifier = block::Notifier::new(); + let nvme = PciNvme { state: Mutex::new(state), notifier }; builder // XXX: add room for doorbells @@ -386,12 +406,6 @@ impl PciNvme { .finish(Arc::new(nvme)) } - /// Add a new namespace to the controller - pub fn add_ns(&self, ns: NvmeNs) -> Result<(), NvmeError> { - let mut state = self.state.lock().unwrap(); - state.add_ns(ns) - } - /// Service a write to the NVMe Controller Configuration from the VM fn ctrlr_cfg_write( &self, @@ -533,25 +547,24 @@ impl PciNvme { let val = wo.read_u32().try_into().unwrap(); let state = self.state.lock().unwrap(); let admin_sq = state.get_admin_sq(); - let mut sq = admin_sq.lock().unwrap(); - match sq.notify_tail(val) { - Ok(_) => {} - Err(_) => todo!("set controller error state"), - } + admin_sq.notify_tail(val)?; // Process any new SQ entries - self.process_admin_queue(state, sq, ctx)?; + self.process_admin_queue(state, admin_sq, ctx)?; } CtrlrReg::DoorBellAdminCQ => { let val = wo.read_u32().try_into().unwrap(); let state = self.state.lock().unwrap(); let admin_cq = state.get_admin_cq(); - let mut cq = admin_cq.lock().unwrap(); - match cq.notify_head(val) { - Ok(_) => {} - Err(_) => todo!("set controller error state"), + admin_cq.notify_head(val)?; + + // We may have skipped pulling entries off the admin sq + // due to no available completion entry permit, so just + // kick it here again in case. + if admin_cq.kick() { + let admin_sq = state.get_admin_sq(); + self.process_admin_queue(state, admin_sq, ctx)?; } - // TODO: post any entries to the CQ now that it has more space } CtrlrReg::IOQueueDoorBells => { @@ -571,24 +584,25 @@ impl PciNvme { if (off >> 2) & 0b1 == 0b1 { // Completion Queue y Head Doorbell let y = (off - 4) >> 3; - let io_cq = state.get_cq(y as u16)?; - let mut cq = io_cq.lock().unwrap(); - match cq.notify_head(val) { - Ok(_) => {} - Err(_) => todo!("set controller error state"), + let cq = state.get_cq(y as u16)?; + cq.notify_head(val)?; + + // We may have skipped pulling entries off some SQ due to this + // CQ having no available entry slots. Since we've just free'd + // up some slots, kick the SQs (excl. admin) here just in case. + // TODO: worth kicking only the SQs specifically associated + // with this CQ? + if cq.kick() { + self.notifier.notify(self, ctx); } - // TODO: post any entries to the CQ now that it has more space } else { // Submission Queue y Tail Doorbell let y = off >> 3; - let io_sq = state.get_sq(y as u16)?; - let mut sq = io_sq.lock().unwrap(); - match sq.notify_tail(val) { - Ok(_) => {} - Err(_) => todo!("set controller error state"), - } - drop(sq); - self.process_io_queue(state, io_sq, ctx)?; + let sq = state.get_sq(y as u16)?; + sq.notify_tail(val)?; + + // Poke block device to service new requests + self.notifier.notify(self, ctx); } } } @@ -600,14 +614,13 @@ impl PciNvme { fn process_admin_queue( &self, mut state: MutexGuard, - mut sq: MutexGuard, + sq: Arc, ctx: &DispCtx, ) -> Result<(), NvmeError> { // Grab the Admin CQ too - let admin_cq = state.get_admin_cq(); - let mut cq = admin_cq.lock().unwrap(); + let cq = state.get_admin_cq(); - while let Some(sub) = sq.pop(ctx) { + while let Some((sub, cqe_permit)) = sq.pop(ctx) { use cmds::AdminCmd; let parsed = AdminCmd::parse(sub); @@ -638,55 +651,10 @@ impl PciNvme { } }; - let completion = RawCompletion { - dw0: comp.dw0, - rsvd: 0, - sqhd: sq.head(), - sqid: sq.id(), - cid: sub.cid(), - status_phase: comp.status | cq.phase(), - }; - cq.push(completion, ctx); - } - - // Notify for any newly added completions - cq.fire_interrupt(ctx); - - Ok(()) - } - - /// Process any new entries in an I/O Submission Queue - fn process_io_queue( - &self, - state: MutexGuard, - io_sq: Arc>, - ctx: &DispCtx, - ) -> Result<(), NvmeError> { - let mut sq = io_sq.lock().unwrap(); - - // Grab the corresponding CQ - let io_cq = state.get_cq(sq.cqid())?; - - // Collect all the IO SQ entries, per namespace - let mut io_cmds = BTreeMap::new(); - while let Some(sub) = sq.pop(ctx) { - io_cmds.entry(sub.nsid).or_insert_with(Vec::new).push(sub); - } - - drop(sq); - - // Queue up said IO entries to the underlying block device, per namespace - for (nsid, io_cmds) in io_cmds { - state.get_ns(nsid)?.queue_io_cmds( - io_cmds, - io_cq.clone(), - io_sq.clone(), - ctx, - )?; + cqe_permit.push_completion(sub.cid(), comp, ctx); } // Notify for any newly added completions - let cq = io_cq.lock().unwrap(); cq.fire_interrupt(ctx); Ok(()) diff --git a/propolis/src/hw/nvme/ns.rs b/propolis/src/hw/nvme/ns.rs deleted file mode 100644 index 95d86ad58..000000000 --- a/propolis/src/hw/nvme/ns.rs +++ /dev/null @@ -1,289 +0,0 @@ -use std::collections::VecDeque; -use std::sync::{Arc, Mutex}; - -use crate::block::*; -use crate::hw::nvme::bits::RawCompletion; -use crate::hw::nvme::cmds::{self, NvmCmd}; -use crate::{common, dispatch::DispCtx}; - -use super::bits::{self, RawSubmission}; -use super::cmds::{Completion, ReadCmd, WriteCmd}; -use super::queue::{CompQueue, SubQueue}; -use super::NvmeError; - -/// Max number of namespaces we support -pub const MAX_NUM_NAMESPACES: usize = 16; - -/// Supported block size. -/// TODO: Support more -const BLOCK_SZ: u64 = 512; - -/// NVMe Namespace with underlying block device -pub struct NvmeNs { - /// The Identify structure returned for Identify namespace commands - pub ident: bits::IdentifyNamespace, - - /// The underlying block device to service read/write requests - bdev: Arc>, - - /// Whether the underlying block device readonly - is_ro: bool, -} - -impl NvmeNs { - /// Create a new NVMe namespace with the given block device - pub fn create(bdev: Arc>) -> Self { - let binfo = bdev.inquire(); - let total_bytes = binfo.total_size * binfo.block_size as u64; - let nsze = total_bytes / BLOCK_SZ; - - let mut ident = bits::IdentifyNamespace { - // No thin provisioning so nsze == ncap == nuse - nsze, - ncap: nsze, - nuse: nsze, - nlbaf: 0, // We only support a single LBA format (1 but 0-based) - flbas: 0, // And it is at index 0 in the lbaf array - ..Default::default() - }; - - debug_assert_eq!( - BLOCK_SZ.count_ones(), - 1, - "BLOCK_SZ must be a power of 2" - ); - debug_assert!( - BLOCK_SZ.trailing_zeros() >= 9, - "BLOCK_SZ must be at least 512 bytes" - ); - ident.lbaf[0].lbads = BLOCK_SZ.trailing_zeros() as u8; - - NvmeNs { ident, bdev, is_ro: !binfo.writable } - } - - /// Convert some number of logical blocks to bytes with the currently active LBA data size - fn nlb_to_size(&self, b: usize) -> usize { - b << (self.ident.lbaf[(self.ident.flbas & 0xF) as usize].lbads) - } - - /// Takes the given list of raw IO commands and queues up reads and writes to the underlying - /// block device as appropriate. - pub(super) fn queue_io_cmds( - &self, - cmds: Vec, - cq: Arc>, - sq: Arc>, - ctx: &DispCtx, - ) -> Result<(), NvmeError> { - for sub in cmds { - let cmd = NvmCmd::parse(sub)?; - match cmd { - NvmCmd::Write(_) if self.is_ro => { - let mut cq = cq.lock().unwrap(); - let sq = sq.lock().unwrap(); - let comp = Completion::specific_err( - bits::StatusCodeType::CmdSpecific, - bits::STS_WRITE_READ_ONLY_RANGE, - ); - let completion = RawCompletion { - dw0: comp.dw0, - rsvd: 0, - sqhd: sq.head(), - sqid: sq.id(), - cid: sub.cid(), - status_phase: comp.status | cq.phase(), - }; - - cq.push(completion, ctx); - } - NvmCmd::Write(cmd) => { - self.write_cmd(sub.cid(), cmd, ctx, cq.clone(), sq.clone()) - } - NvmCmd::Read(cmd) => { - self.read_cmd(sub.cid(), cmd, ctx, cq.clone(), sq.clone()) - } - NvmCmd::Flush => { - self.flush_cmd(sub.cid(), cq.clone(), sq.clone()) - } - NvmCmd::Unknown(_) => { - // For any other command, just immediately complete it - let mut cq = cq.lock().unwrap(); - let sq = sq.lock().unwrap(); - - let comp = Completion::generic_err(bits::STS_INTERNAL_ERR); - let completion = RawCompletion { - dw0: comp.dw0, - rsvd: 0, - sqhd: sq.head(), - sqid: sq.id(), - cid: sub.cid(), - status_phase: comp.status | cq.phase(), - }; - - cq.push(completion, ctx); - } - } - } - - Ok(()) - } - - /// Enqueues a flush to the underlying block device - fn flush_cmd( - &self, - cid: u16, - cq: Arc>, - sq: Arc>, - ) { - // TODO: handles if it gets unmapped? - self.bdev.enqueue(Request { - op: BlockOp::Flush, - off: 0, - xfer_left: 0, - bufs: VecDeque::new(), - cid, - cq, - sq, - }); - } - - /// Enqueues a read to the underlying block device - fn read_cmd( - &self, - cid: u16, - cmd: ReadCmd, - ctx: &DispCtx, - cq: Arc>, - sq: Arc>, - ) { - probe_nvme_read_enqueue!(|| (cid, cmd.slba, cmd.nlb)); - let off = self.nlb_to_size(cmd.slba as usize); - let size = self.nlb_to_size(cmd.nlb as usize); - // TODO: handles if it gets unmapped? - let bufs = cmd.data(size as u64, ctx.mctx.memctx()).collect(); - self.bdev.enqueue(Request { - op: BlockOp::Read, - off, - xfer_left: size, - bufs, - cid, - cq, - sq, - }); - } - - /// Enqueues a write to the underlying block device - fn write_cmd( - &self, - cid: u16, - cmd: WriteCmd, - ctx: &DispCtx, - cq: Arc>, - sq: Arc>, - ) { - probe_nvme_write_enqueue!(|| (cid, cmd.slba, cmd.nlb)); - let off = self.nlb_to_size(cmd.slba as usize); - let size = self.nlb_to_size(cmd.nlb as usize); - // TODO: handles if it gets unmapped? - let bufs = cmd.data(size as u64, ctx.mctx.memctx()).collect(); - self.bdev.enqueue(Request { - op: BlockOp::Write, - off, - xfer_left: size, - bufs, - cid, - cq, - sq, - }); - } -} - -/// I/O Request to block device -pub struct Request { - /// The operation type - op: BlockOp, - - /// The offset at which to begin reading/writing - off: usize, - - /// How many bytes to read/write - xfer_left: usize, - - /// The buffers to read/write from/to - bufs: VecDeque, - - /// The associated command id - cid: u16, - - /// The associated Completion Queue - cq: Arc>, - - /// The associated Submission Queue - sq: Arc>, -} - -impl BlockReq for Request { - fn oper(&self) -> BlockOp { - self.op - } - - fn offset(&self) -> usize { - self.off - } - - fn next_buf(&mut self) -> Option { - if self.xfer_left == 0 { - return None; - } - - if let Some(mut region) = self.bufs.pop_front() { - if region.1 > self.xfer_left { - region.1 = self.xfer_left; - } - self.xfer_left -= region.1; - Some(region) - } else { - None - } - } - - fn complete(self, res: BlockResult, ctx: &DispCtx) { - let comp = match res { - BlockResult::Success => cmds::Completion::success(), - BlockResult::Failure => { - cmds::Completion::generic_err(bits::STS_DATA_XFER_ERR) - } - BlockResult::Unsupported => cmds::Completion::specific_err( - bits::StatusCodeType::CmdSpecific, - bits::STS_READ_CONFLICTING_ATTRS, - ), - }; - - match self.op { - BlockOp::Read => { - probe_nvme_read_complete!(|| (self.cid)); - } - BlockOp::Write => { - probe_nvme_write_complete!(|| (self.cid)); - } - _ => {} - } - - let sq = self.sq.lock().unwrap(); - let mut cq = self.cq.lock().unwrap(); - - let completion = bits::RawCompletion { - dw0: comp.dw0, - rsvd: 0, - sqhd: sq.head(), - sqid: sq.id(), - cid: self.cid, - status_phase: comp.status | cq.phase(), - }; - - cq.push(completion, ctx); - - // TODO: should this be done here? - cq.fire_interrupt(ctx); - } -} diff --git a/propolis/src/hw/nvme/queue.rs b/propolis/src/hw/nvme/queue.rs index 5ddebe3cc..96def2905 100644 --- a/propolis/src/hw/nvme/queue.rs +++ b/propolis/src/hw/nvme/queue.rs @@ -1,6 +1,7 @@ -use std::marker::PhantomData; +use std::sync::{Arc, Mutex}; use super::bits::{self, RawCompletion, RawSubmission}; +use super::cmds::Completion; use crate::common::*; use crate::dispatch::DispCtx; use crate::hw::pci; @@ -14,7 +15,8 @@ pub type QueueId = u16; /// The minimum number of entries in either a Completion or Submission Queue. /// -/// Note: One entry will always be unavailable for use due to Head and Tail entry pointer defition. +/// Note: One entry will always be unavailable for use due to the Head and Tail +/// entry pointer definitions. /// See NVMe 1.0e Section 4.1.3 Queue Size const MIN_QUEUE_SIZE: u32 = 2; @@ -33,25 +35,50 @@ const MAX_ADMIN_QUEUE_SIZE: u32 = 1 << 12; /// See NVMe 1.0e Section 1.6.1 Admin Queue pub const ADMIN_QUEUE_ID: QueueId = 0; -/// Marker type to indicate a Completion Queue. -enum CompletionQueueType {} +/// Completion Queue State +#[derive(Debug)] +struct CompQueueState { + /// The Queue Head entry pointer. + /// + /// The consumer of entries on a queue uses the current Head entry pointer + /// to identify the next entry to be pulled off the queue. + /// + /// See NVMe 1.0e Section 4.1 Submission Queue & Completion Queue Definition + head: u16, -/// Marker type to indicate a Submission Queue. -enum SubmissionQueueType {} + /// The Queue Tail entry pointer. + /// + /// The submitter of entries to a queue uses the current Tail entry pointer + /// to identify the next open queue entry space. + /// + /// See NVMe 1.0e Section 4.1 Submission Queue & Completion Queue Definition + tail: u16, -/// Helper for manipulating Completion/Submission Queues -/// -/// The type parameter `QT` is used to constrain the set of -/// methods exposed based on whether the queue in question -/// is a Completion or Submission queue. Use either -/// `CompletionQueueType` or `SubmissionQueueType`. -struct QueueState { - /// The size of the queue in question. + /// Number of entries that are available for use. /// - /// See NVMe 1.0e Section 4.1.3 Queue Size - size: u32, + /// Starts off as queue size - 1 and gets decremented for each corresponding + /// Submission Queue entry we begin servicing. + avail: u16, - /// The current Head entry pointer. + /// The current phase tag. + /// + /// The Phase Tag is used to identify to the host (VM) that a Completion + /// entry is new. Flips every time the Tail entry pointer wraps around. + /// + /// See NVMe 1.0e Section 4.5 Completion Queue Entry - Phase Tag (P) + phase: bool, + + /// Whether the CQ should kick its SQs due to no permits being available previously. + /// + /// One may only pop something off the SQ if there's at least one space available in + /// the corresponding CQ. If there isn't, we set the kick flag. + kick: bool, +} + +/// Submission Queue State +#[derive(Debug)] +struct SubQueueState { + /// The Queue Head entry pointer. /// /// The consumer of entries on a queue uses the current Head entry pointer /// to identify the next entry to be pulled off the queue. @@ -59,44 +86,55 @@ struct QueueState { /// See NVMe 1.0e Section 4.1 Submission Queue & Completion Queue Definition head: u16, - /// The current Tail entry pointer. + /// The Queue Tail entry pointer. /// /// The submitter of entries to a queue uses the current Tail entry pointer /// to identify the next open queue entry space. /// /// See NVMe 1.0e Section 4.1 Submission Queue & Completion Queue Definition tail: u16, - - /// Marker type to indicate what type of Queue we're modeling. - _qt: PhantomData, } -impl QueueState { - /// Create a new `QueueState` - fn new(size: u32, head: u16, tail: u16) -> Self { - assert!(size >= MIN_QUEUE_SIZE && size <= MAX_QUEUE_SIZE); - Self { size, head, tail, _qt: PhantomData } - } +/// Helper for manipulating Completion/Submission Queues +/// +/// The type parameter `QT` is used to constrain the set of +/// methods exposed based on whether the queue in question +/// is a Completion or Submission queue. Use either +/// `CompletionQueueType` or `SubmissionQueueType`. +#[derive(Debug)] +struct QueueState { + /// The size of the queue in question. + /// + /// See NVMe 1.0e Section 4.1.3 Queue Size + size: u32, + + /// The actual queue state that gets updated during the normal course of operation. + /// + /// Either `CompQueueState` for a Completion Queue or + /// a `SubQueueState` for a Submission Queue. + inner: Mutex, +} - /// Returns if the queue is currently empty. +impl QueueState { + /// Returns if the queue is currently empty with the given head and tail pointers. /// /// A queue is empty when the Head entry pointer equals the Tail entry pointer. /// /// See: NVMe 1.0e Section 4.1.1 Empty Queue - fn is_empty(&self) -> bool { - self.head == self.tail + fn is_empty(&self, head: u16, tail: u16) -> bool { + head == tail } - /// Returns if the queue is currently full. + /// Returns if the queue is currently full with the given head and tail pointers. /// /// The queue is full when the Head entry pointer equals one more than the Tail /// entry pointer. The number of entries in a queue will always be 1 less than /// the queue size. /// /// See: NVMe 1.0e Section 4.1.2 Full Queue - fn is_full(&self) -> bool { - (self.head > 0 && self.tail == (self.head - 1)) - || (self.head == 0 && self.tail == (self.size - 1) as u16) + fn is_full(&self, head: u16, tail: u16) -> bool { + (head > 0 && tail == (head - 1)) + || (head == 0 && tail == (self.size - 1) as u16) } /// Helper method to calculate a positive offset for a given index, wrapping at @@ -125,35 +163,48 @@ impl QueueState { idx - off } } +} - /// How many slots are empty between the tail and the head i.e., how many - /// entries can we write to the queue currently. - fn avail_empty(&self) -> u16 { - self.wrap_sub(self.wrap_sub(self.head, 1), self.tail) - } - - /// How many slots are occupied between the head and the tail i.e., how - /// many entries can we read from the queue currently. - fn avail_occupied(&self) -> u16 { - self.wrap_sub(self.tail, self.head) +impl QueueState { + /// Create a new `QueueState` for a Completion Queue + fn new_completion_state(size: u32) -> QueueState { + assert!(size >= MIN_QUEUE_SIZE && size <= MAX_QUEUE_SIZE); + // As the device side, we start with our phase tag as asserted (1) + // as the host side (VM) will create all the Completion Queue entries + // with the phase initially zeroed out. + let inner = CompQueueState { + head: 0, + tail: 0, + avail: (size - 1) as u16, + phase: true, + kick: false, + }; + Self { size, inner: Mutex::new(inner) } } -} -impl QueueState { /// Attempt to return the Tail entry pointer and then move it forward by 1. /// /// If the queue is full this method returns [`None`]. /// Otherwise, this method returns the current Tail entry pointer and then - /// increments the Tail entry pointer by 1 (wrapping if necessary). It will - /// also return whether the Tail entry pointer wrapped after incrementing. - fn push_tail(&mut self) -> Option<(u16, bool)> { - if self.is_full() { - None - } else { - let result = Some(self.tail); - self.tail = self.wrap_add(self.tail, 1); - result.map(|r| (r, (r + 1) as u32 >= self.size)) + /// increments the Tail entry pointer by 1 (wrapping if necessary). + fn push_tail(&self) -> Option { + let mut state = self.inner.lock().unwrap(); + if self.is_full(state.head, state.tail) { + return None; + } + if state.tail as u32 + 1 >= self.size { + // We wrapped so flip phase + state.phase = !state.phase; } + let old_tail = state.tail; + state.tail = self.wrap_add(old_tail, 1); + Some(old_tail) + } + + /// How many slots are occupied between the head and the tail i.e., how + /// many entries can we read from the queue currently. + fn avail_occupied(&self, head: u16, tail: u16) -> u16 { + self.wrap_sub(tail, head) } /// Attempt to move the Head entry pointer forward to the given index. @@ -162,33 +213,50 @@ impl QueueState { /// must have enough occupied slots otherwise we return an error. /// Conceptually this method indicates some entries have been consumed /// from the queue. - fn pop_head_to(&mut self, idx: u16) -> Result<(), &'static str> { + fn pop_head_to(&self, idx: u16) -> Result<(), QueueUpdateError> { if idx as u32 >= self.size { - return Err("invalid index"); + return Err(QueueUpdateError::InvalidEntry); } - let pop_count = self.wrap_sub(idx, self.head); - if pop_count > self.avail_occupied() { - return Err("index too far"); + let mut state = self.inner.lock().unwrap(); + let pop_count = self.wrap_sub(idx, state.head); + if pop_count > self.avail_occupied(state.head, state.tail) { + return Err(QueueUpdateError::TooManyEntries); } - self.head = idx; + // Replace head with given idx and update the number of available slots + state.head = idx; + state.avail += pop_count; + Ok(()) } } -impl QueueState { +impl QueueState { + /// Create a new `QueueState` for a Submission Queue + fn new_submission_state(size: u32) -> QueueState { + assert!(size >= MIN_QUEUE_SIZE && size <= MAX_QUEUE_SIZE); + let inner = SubQueueState { head: 0, tail: 0 }; + Self { size, inner: Mutex::new(inner) } + } + /// Attempt to return the Head entry pointer and then move it forward by 1. /// /// If the queue is empty this method returns [`None`]. /// Otherwise, this method returns the current Head entry pointer and then /// increments the Head entry pointer by 1 (wrapping if necessary). - fn pop_head(&mut self) -> Option { - if self.is_empty() { - None - } else { - let result = Some(self.head); - self.head = self.wrap_add(self.head, 1); - result + fn pop_head(&self) -> Option { + let mut state = self.inner.lock().unwrap(); + if self.is_empty(state.head, state.tail) { + return None; } + let old_head = state.head; + state.head = self.wrap_add(old_head, 1); + Some(old_head) + } + + /// How many slots are empty between the tail and the head i.e., how many + /// entries can we write to the queue currently. + fn avail_empty(&self, head: u16, tail: u16) -> u16 { + self.wrap_sub(self.wrap_sub(head, 1), tail) } /// Attempt to move the Tail entry pointer forward to the given index. @@ -197,15 +265,18 @@ impl QueueState { /// must have enough empty slots available otherwise we return an error. /// Conceptually this method indicates new entries have been added to the /// queue. - fn push_tail_to(&mut self, idx: u16) -> Result<(), &'static str> { + fn push_tail_to(&self, idx: u16) -> Result<(), QueueUpdateError> { if idx as u32 >= self.size { - return Err("invalid index"); + return Err(QueueUpdateError::InvalidEntry); } - let push_count = self.wrap_sub(idx, self.tail); - if push_count > self.avail_empty() { - return Err("index too far"); + let mut state = self.inner.lock().unwrap(); + let push_count = self.wrap_sub(idx, state.tail); + if push_count > self.avail_empty(state.head, state.tail) { + return Err(QueueUpdateError::TooManyEntries); } - self.tail = idx; + // Replace tail with given idx + state.tail = idx; + Ok(()) } } @@ -222,16 +293,29 @@ pub enum QueueCreateErr { InvalidSize, } +/// Errors that may be encountered while adjusting Queue head/tail pointers. +#[derive(Error, Debug)] +pub enum QueueUpdateError { + #[error("tried to move head or tail pointer to an invalid index")] + InvalidEntry, + + #[error( + "tried to push or pop too many entries given the current head/tail" + )] + TooManyEntries, +} + /// Type for manipulating Submission Queues. +#[derive(Debug)] pub struct SubQueue { - /// The ID of queue in question. + /// The ID of this Submission Queue. id: QueueId, - /// The ID of the corresponding Completion Queue. - cqid: QueueId, + /// The corresponding Completion Queue. + cq: Arc, /// Queue state such as the size and current head/tail entry pointers. - state: QueueState, + state: QueueState, /// The [`GuestAddr`] at which the Queue is mapped. base: GuestAddr, @@ -242,48 +326,50 @@ impl SubQueue { /// given base address. pub fn new( id: QueueId, - cqid: QueueId, + cq: Arc, size: u32, base: GuestAddr, ctx: &DispCtx, ) -> Result { Self::validate(id, base, size, ctx)?; - Ok(Self { id, cqid, state: QueueState::new(size, 0, 0), base }) + Ok(Self { id, cq, state: QueueState::new_submission_state(size), base }) } /// Attempt to move the Tail entry pointer forward to the given index. - pub fn notify_tail(&mut self, idx: u16) -> Result<(), &'static str> { + pub fn notify_tail(&self, idx: u16) -> Result<(), QueueUpdateError> { self.state.push_tail_to(idx) } /// Returns the next entry off of the Queue or [`None`] if it is empty. - pub fn pop(&mut self, ctx: &DispCtx) -> Option { + pub fn pop( + self: &Arc, + ctx: &DispCtx, + ) -> Option<(bits::RawSubmission, CompQueueEntryPermit)> { + // Attempt to reserve an entry on the Completion Queue + let cqe_permit = self.cq.reserve_entry(self.clone())?; if let Some(idx) = self.state.pop_head() { let mem = ctx.mctx.memctx(); let ent: Option = mem.read(self.entry_addr(idx)); // XXX: handle a guest addr that becomes unmapped later - ent + ent.map(|ent| (ent, cqe_permit)) } else { + // No Submission Queue entry, so return the CQE permit + cqe_permit.remit(); None } } /// Returns the current Head entry pointer. - pub fn head(&self) -> u16 { - self.state.head + fn head(&self) -> u16 { + let state = self.state.inner.lock().unwrap(); + state.head } /// Returns the ID of this Submission Queue. - pub fn id(&self) -> QueueId { + fn id(&self) -> QueueId { self.id } - /// Returns the ID of the corresponding Completion Queue - /// to this Submission Queue. - pub fn cqid(&self) -> QueueId { - self.cqid - } - /// Returns the corresponding [`GuestAddr`] for a given entry in /// the Submission Queue. fn entry_addr(&self, idx: u16) -> GuestAddr { @@ -321,23 +407,18 @@ impl SubQueue { } /// Type for manipulating Completion Queues. +#[derive(Debug)] pub struct CompQueue { /// The Interrupt Vector used to signal to the host (VM) upon pushing /// entries onto the Completion Queue. iv: u16, /// Queue state such as the size and current head/tail entry pointers. - state: QueueState, + state: QueueState, /// The [`GuestAddr`] at which the Queue is mapped. base: GuestAddr, - /// The current Phase Tag to identify to the host (VM) that a Completion - /// entry is new. Flips every time the Tail entry pointer wraps around. - /// - /// See NVMe 1.0e Section 4.5 Completion Queue Entry - Phase Tag (P) - phase: bool, - /// MSI-X object associated with PCIe device to signal host (VM). hdl: pci::MsixHdl, } @@ -356,50 +437,83 @@ impl CompQueue { Self::validate(id, base, size, ctx)?; Ok(Self { iv, - state: QueueState::new(size, 0, 0), + state: QueueState::new_completion_state(size), base, - phase: true, hdl, }) } /// Attempt to move the Head entry pointer forward to the given index. - pub fn notify_head(&mut self, idx: u16) -> Result<(), &'static str> { + pub fn notify_head(&self, idx: u16) -> Result<(), QueueUpdateError> { self.state.pop_head_to(idx) } - /// Attempt to add a new entry to the Completion Queue. + /// Fires an interrupt to the guest with the associated interrupt vector + /// if the queue is not currently empty. + pub fn fire_interrupt(&self, ctx: &DispCtx) { + let state = self.state.inner.lock().unwrap(); + if !self.state.is_empty(state.head, state.tail) { + self.hdl.fire(self.iv, ctx); + } + } + + /// Returns whether the SQ's should be kicked due to no permits being available previously. /// - /// TODO: handle the case where the queue may be currently full. - pub fn push(&mut self, entry: RawCompletion, ctx: &DispCtx) { - if let Some((idx, wrapped)) = self.state.push_tail() { - let mem = ctx.mctx.memctx(); - let addr = self.entry_addr(idx); - mem.write(addr, &entry); - if wrapped { - self.phase = !self.phase; - } - // XXX: handle a guest addr that becomes unmapped later - // XXX: figure out interrupts + /// If the value was true, it will also get reset to false. + pub fn kick(&self) -> bool { + let mut state = self.state.inner.lock().unwrap(); + let old_kick = state.kick; + state.kick = false; + old_kick + } + + /// Attempt to reserve an entry in the Completion Queue. + /// + /// An entry permit allows the user to push onto the Completion Queue. + fn reserve_entry( + self: &Arc, + sq: Arc, + ) -> Option { + let mut state = self.state.inner.lock().unwrap(); + // No more spots available + if state.avail == 0 { + // Make sure we kick the SQ's when we have space available again + state.kick = true; + + None + } else { + // Otherwise claim a spot + state.avail -= 1; + + Some(CompQueueEntryPermit { cq: self.clone(), sq }) } } + /// Add a new entry to the Completion Queue while consuming a `CompQueueEntryPermit`. + fn push( + &self, + _permit: CompQueueEntryPermit, + entry: RawCompletion, + ctx: &DispCtx, + ) { + // Since we have a permit, there should always be at least + // one space in the queue and this unwrap shouldn't fail. + let idx = self.state.push_tail().unwrap(); + let mem = ctx.mctx.memctx(); + let addr = self.entry_addr(idx); + mem.write(addr, &entry); + // XXX: handle a guest addr that becomes unmapped later + } + /// Returns the current Phase Tag bit. /// /// The current Phase Tag to identify to the host (VM) that a Completion /// entry is new. Flips every time the Tail entry pointer wraps around. /// /// See NVMe 1.0e Section 4.5 Completion Queue Entry - Phase Tag (P) - pub fn phase(&self) -> u16 { - self.phase as u16 - } - - /// Fires an interrupt to the guest with the associated interrupt vector - /// if the queue is not currently empty. - pub fn fire_interrupt(&self, ctx: &DispCtx) { - if !self.state.is_empty() { - self.hdl.fire(self.iv, ctx); - } + fn phase(&self) -> u16 { + let state = self.state.inner.lock().unwrap(); + state.phase as u16 } /// Returns the corresponding [`GuestAddr`] for a given entry in @@ -437,3 +551,500 @@ impl CompQueue { region.map(|_| ()).ok_or(QueueCreateErr::InvalidBaseAddr) } } + +/// A type which allows pushing a Completion Entry onto the Completion Queue. +#[derive(Debug)] +pub struct CompQueueEntryPermit { + /// The corresponding Completion Queue for which we have a permit. + cq: Arc, + + /// The Submission Queue for which this entry is reserved. + sq: Arc, +} + +impl CompQueueEntryPermit { + /// Consume the permit by placing an entry into the Completion Queue. + pub fn push_completion(self, cid: u16, comp: Completion, ctx: &DispCtx) { + let completion = bits::RawCompletion { + dw0: comp.dw0, + rsvd: 0, + sqhd: self.sq.head(), + sqid: self.sq.id(), + cid, + status_phase: comp.status | self.cq.phase(), + }; + + let cq = self.cq.clone(); + + cq.push(self, completion, ctx); + + // TODO: should this be done here? + cq.fire_interrupt(ctx); + } + + /// Consume the permit by placing an entry into the Completion Queue. + /// + /// This is a simpler version of `CompQueueEntryPermit::push_completion` + /// just for testing purposes that doesn't require passing in the actual + /// completion data. Meant just for excercising the Submission & Completion + /// Queues in unit tests. + #[cfg(test)] + fn push_completion_test(self, ctx: &DispCtx) { + let cq = self.cq.clone(); + cq.push(self, bits::RawCompletion::default(), ctx); + } + + /// Return the permit without having actually used it. + /// + /// Frees up the space for someone else to grab it via `CompQueue::reserve_entry`. + fn remit(self) { + let mut state = self.cq.state.inner.lock().unwrap(); + state.avail += 1; + } +} + +#[cfg(test)] +mod tests { + use rand::Rng; + + use super::*; + + use crate::{common::GuestAddr, instance::Instance}; + use std::assert_matches::assert_matches; + use std::io::Error; + use std::thread::{sleep, spawn}; + use std::time::Duration; + + #[test] + fn create_cqs() -> Result<(), Error> { + let instance = Instance::new_test(None)?; + let hdl = pci::MsixHdl::new_test(); + let read_base = GuestAddr(0); + let write_base = GuestAddr(1024 * 1024); + + instance.disp.with_ctx(|ctx| { + // Admin queues must be less than 4K + let cq = CompQueue::new( + ADMIN_QUEUE_ID, + 0, + 1024, + write_base, + ctx, + hdl.clone(), + ); + assert_matches!(cq, Ok(_)); + let cq = CompQueue::new( + ADMIN_QUEUE_ID, + 0, + 5 * 1024, + write_base, + ctx, + hdl.clone(), + ); + assert_matches!(cq, Err(QueueCreateErr::InvalidSize)); + + // I/O queues must be less than 64K + let cq = CompQueue::new(1, 0, 1024, write_base, ctx, hdl.clone()); + assert_matches!(cq, Ok(_)); + let cq = + CompQueue::new(1, 0, 65 * 1024, write_base, ctx, hdl.clone()); + assert_matches!(cq, Err(QueueCreateErr::InvalidSize)); + + // Neither must be less than 2 + let cq = CompQueue::new( + ADMIN_QUEUE_ID, + 0, + 1, + write_base, + ctx, + hdl.clone(), + ); + assert_matches!(cq, Err(QueueCreateErr::InvalidSize)); + let cq = CompQueue::new(1, 0, 1, write_base, ctx, hdl.clone()); + assert_matches!(cq, Err(QueueCreateErr::InvalidSize)); + + // Completion Queue's must be mapped to writable memory + let cq = CompQueue::new( + ADMIN_QUEUE_ID, + 0, + 2, + read_base, + ctx, + hdl.clone(), + ); + assert_matches!(cq, Err(QueueCreateErr::InvalidBaseAddr)); + let cq = CompQueue::new(1, 0, 2, read_base, ctx, hdl.clone()); + assert_matches!(cq, Err(QueueCreateErr::InvalidBaseAddr)); + }); + + Ok(()) + } + + #[test] + fn create_sqs() -> Result<(), Error> { + let instance = Instance::new_test(None)?; + let hdl = pci::MsixHdl::new_test(); + let read_base = GuestAddr(0); + let write_base = GuestAddr(1024 * 1024); + + instance.disp.with_ctx(|ctx| { + // Create corresponding CQs + let admin_cq = Arc::new( + CompQueue::new( + ADMIN_QUEUE_ID, + 0, + 1024, + write_base, + ctx, + hdl.clone(), + ) + .unwrap(), + ); + let io_cq = Arc::new( + CompQueue::new(1, 0, 1024, write_base, ctx, hdl.clone()) + .unwrap(), + ); + + // Admin queues must be less than 4K + let sq = SubQueue::new( + ADMIN_QUEUE_ID, + admin_cq.clone(), + 1024, + read_base, + ctx, + ); + assert_matches!(sq, Ok(_)); + let sq = SubQueue::new( + ADMIN_QUEUE_ID, + admin_cq.clone(), + 5 * 1024, + read_base, + ctx, + ); + assert_matches!(sq, Err(QueueCreateErr::InvalidSize)); + + // I/O queues must be less than 64K + let sq = SubQueue::new(1, io_cq.clone(), 1024, read_base, ctx); + assert_matches!(sq, Ok(_)); + let sq = SubQueue::new(1, io_cq.clone(), 65 * 1024, read_base, ctx); + assert_matches!(sq, Err(QueueCreateErr::InvalidSize)); + + // Neither must be less than 2 + let sq = SubQueue::new( + ADMIN_QUEUE_ID, + admin_cq.clone(), + 1, + read_base, + ctx, + ); + assert_matches!(sq, Err(QueueCreateErr::InvalidSize)); + let sq = SubQueue::new(1, admin_cq.clone(), 1, read_base, ctx); + assert_matches!(sq, Err(QueueCreateErr::InvalidSize)); + + // Completion Queue's must be mapped to readable memory + let sq = SubQueue::new( + ADMIN_QUEUE_ID, + admin_cq.clone(), + 2, + write_base, + ctx, + ); + assert_matches!(sq, Err(QueueCreateErr::InvalidBaseAddr)); + let sq = SubQueue::new(1, admin_cq.clone(), 2, write_base, ctx); + assert_matches!(sq, Err(QueueCreateErr::InvalidBaseAddr)); + }); + + Ok(()) + } + + #[test] + fn push_failures() -> Result<(), Error> { + let instance = Instance::new_test(None)?; + let hdl = pci::MsixHdl::new_test(); + let read_base = GuestAddr(0); + let write_base = GuestAddr(1024 * 1024); + + instance.disp.with_ctx(|ctx| { + // Create our queues + let cq = Arc::new( + CompQueue::new(1, 0, 4, write_base, ctx, hdl.clone()).unwrap(), + ); + let sq = Arc::new( + SubQueue::new(1, cq.clone(), 4, read_base, ctx).unwrap(), + ); + + // Replicate guest VM notifying us things were pushed to the SQ + let mut sq_tail = 0; + for _ in 0..sq.state.size - 1 { + sq_tail = sq.state.wrap_add(sq_tail, 1); + // These should all succeed + assert_matches!(sq.notify_tail(sq_tail), Ok(_)); + } + + // But anything more should fail + sq_tail = sq.state.wrap_add(sq_tail, 1); + assert_matches!( + sq.notify_tail(sq_tail), + Err(QueueUpdateError::TooManyEntries) + ); + + // Also anything that falls outside the boundaries (i.e. we didn't wrap properly) + assert_matches!( + sq.notify_tail(sq.state.size as u16), + Err(QueueUpdateError::InvalidEntry) + ); + + // Now pop those SQ items and complete them in the CQ + while let Some((_, permit)) = sq.pop(ctx) { + permit.push_completion_test(ctx); + } + + // Replicate guest VM notifying us things were consumed off the CQ + let mut cq_head = 0; + for _ in 0..sq.state.size - 1 { + cq_head = cq.state.wrap_add(cq_head, 1); + // These should all succeed + assert_matches!(cq.notify_head(cq_head), Ok(_)); + } + + // There's nothing else to pop so this should fail + cq_head = cq.state.wrap_add(cq_head, 1); + assert_matches!( + cq.notify_head(cq_head), + Err(QueueUpdateError::TooManyEntries) + ); + + // Also anything that falls outside the boundaries (i.e. we didn't wrap properly) + assert_matches!( + cq.notify_head(cq.state.size as u16), + Err(QueueUpdateError::InvalidEntry) + ); + }); + + Ok(()) + } + + #[test] + fn cq_kicks() -> Result<(), Error> { + let instance = Instance::new_test(None)?; + let hdl = pci::MsixHdl::new_test(); + let read_base = GuestAddr(0); + let write_base = GuestAddr(1024 * 1024); + + instance.disp.with_ctx(|ctx| { + // Create our queues + // Purposely make the CQ smaller to test kicks + let cq = Arc::new( + CompQueue::new(1, 0, 2, write_base, ctx, hdl.clone()).unwrap(), + ); + let sq = Arc::new( + SubQueue::new(1, cq.clone(), 4, read_base, ctx).unwrap(), + ); + + // Replicate guest VM notifying us things were pushed to the SQ + let mut sq_tail = 0; + for _ in 0..sq.state.size - 1 { + sq_tail = sq.state.wrap_add(sq_tail, 1); + assert_matches!(sq.notify_tail(sq_tail), Ok(_)); + } + + // We should be able to pop based on how much space is in the CQ + for _ in 0..cq.state.size - 1 { + let pop = sq.pop(ctx); + assert_matches!(pop, Some(_)); + + // Complete these in the CQ (but note guest won't have acknowledged them yet) + pop.unwrap().1.push_completion_test(ctx); + } + + // But we can't pop anymore due to no more CQ space to reserve + assert_matches!(sq.pop(ctx), None); + + // The guest consuming things off the CQ should let free us + assert_matches!(cq.notify_head(1), Ok(_)); + + // Kick should've been set in the failed pop + assert!(cq.kick()); + + // We should have one more space now and should be able to pop 1 more + assert_matches!(sq.pop(ctx), Some(_)); + }); + + Ok(()) + } + + #[test] + fn push_pop() -> Result<(), Error> { + let instance = Instance::new_test(None)?; + let hdl = pci::MsixHdl::new_test(); + let read_base = GuestAddr(0); + let write_base = GuestAddr(1024 * 1024); + + instance.disp.with_ctx(|ctx| { + // Create a pair of Completion and Submission Queues + // with a random size. We purposefully give the CQ a smaller + // size to exercise the "kick" conditions where we have some + // request available in the SQ but can't pop it until there's + // space available in the CQ. + let mut rng = rand::thread_rng(); + let sq_size = rng.gen_range(512..2048); + let cq = Arc::new( + CompQueue::new(1, 0, 4, write_base, ctx, hdl.clone()).unwrap(), + ); + let sq = Arc::new( + SubQueue::new(1, cq.clone(), sq_size, read_base, ctx).unwrap(), + ); + + // We'll be generating a random number of submissions + let submissions_rand = rng.gen_range(2..sq.state.size - 1); + + let (doorbell_tx, doorbell_rx) = + crossbeam_channel::unbounded::(); + let (workers_tx, workers_rx) = crossbeam_channel::unbounded(); + let (comp_tx, comp_rx) = crossbeam_channel::unbounded(); + + // Create a thread to mimic the main device thread that + // will handle "doorbell" read/write ops. + enum Doorbell { + Cq(u16), + Sq(u16), + } + let (doorbell_cq, doorbell_sq) = (cq.clone(), sq.clone()); + let doorbell_handler = spawn(move || { + // Keep track of the "host" side CQ head and SQ tail as + // we receive "doorbell" hits. + let mut cq_head = 0; + let mut sq_tail = 0; + loop { + match doorbell_rx.recv() { + Ok(Doorbell::Cq(n)) => { + cq_head = doorbell_cq.state.wrap_add(cq_head, n); + assert_matches!( + doorbell_cq.notify_head(cq_head), + Ok(_) + ); + if doorbell_cq.kick() { + assert!(workers_tx.send(()).is_ok()); + } + } + Ok(Doorbell::Sq(n)) => { + sq_tail = doorbell_sq.state.wrap_add(sq_tail, n); + // The "doorbell" was rung and so let's have the SQ + // update its internal state before poking the workers + assert_matches!( + doorbell_sq.notify_tail(sq_tail), + Ok(_) + ); + assert!(workers_tx.send(()).is_ok()); + } + Err(_) => break, + } + } + }); + + // Create a number of worker threads to simulate the block + // dev backend workers that will be notified every time the + // SQ "doorbell" is hit and will attempt to pull a new IO + // request off the SQ. At the end, each will return a count + // of how many requests they received and then completed. + let io_workers = (0..4) + .map(|_| { + let worker_instance = instance.clone(); + let worker_rx = workers_rx.clone(); + let worker_sq = sq.clone(); + let worker_comp_tx = comp_tx.clone(); + spawn(move || { + let mut submissions = 0; + worker_instance.disp.with_ctx(|ctx| { + let mut rng = rand::thread_rng(); + loop { + match worker_rx.recv() { + Ok(()) => { + while let Some((_, cqe_permit)) = + worker_sq.pop(ctx) + { + submissions += 1; + + // Sleep for a bit to mimic actually doing some + // work before we complete the IO + sleep(Duration::from_micros( + rng.gen_range(0..500), + )); + + cqe_permit + .push_completion_test(ctx); + + // Signal the "guest" side of the completion handler + assert!(worker_comp_tx + .send(()) + .is_ok()); + } + } + Err(_) => break, + } + } + }); + submissions + }) + }) + .collect::>(); + + // Create a thread to "consume" things off the Completion Queue. + // This simulates the host reacting to our CQ pushes and "ringing" the + // CQ doorbell. At the end, it returns how many completion were handled. + // Regardless, it'll stop after the number of completions is at least + // as many as the number of submissions we decided to generate. + let comp_doorbell_tx = doorbell_tx.clone(); + let comp_handler = spawn(move || { + let exit_after = submissions_rand; + let mut completions = 0; + loop { + match comp_rx.recv() { + Ok(()) => { + // "Ring" the CQ doorbell + // TODO: test completing more than 1 at a time + assert!(comp_doorbell_tx + .send(Doorbell::Cq(1)) + .is_ok()); + completions += 1; + } + Err(_) => break completions, + } + if completions >= exit_after { + break completions; + } + } + }); + + // Now, start generating a random number of submissions + for _ in 0..submissions_rand { + // "Ring" the SQ doorbell + // TODO: test submitting more than 1 at a time + //let doorbell_tx = doorbell_tx.clone(); + assert!(doorbell_tx.send(Doorbell::Sq(1)).is_ok()); + + // Sleep up to 100us in between + sleep(Duration::from_micros(rng.gen_range(0..100))); + } + drop(doorbell_tx); + + // Wait for the completion handler and its count + let completions: u32 = comp_handler.join().unwrap(); + + // Wait for doorbell handler + doorbell_handler.join().unwrap(); + + // Wait for the IO workers to complete and sum the total + // number of submissions they recevied + let submissions: u32 = + io_workers.into_iter().map(|j| j.join().unwrap()).sum(); + + // Make sure the number of submission we recevied matched the + // number we generated and completed + assert_eq!(submissions, submissions_rand); + assert_eq!(submissions, completions); + }); + + Ok(()) + } +} diff --git a/propolis/src/hw/nvme/requests.rs b/propolis/src/hw/nvme/requests.rs new file mode 100644 index 000000000..547c3d6c6 --- /dev/null +++ b/propolis/src/hw/nvme/requests.rs @@ -0,0 +1,160 @@ +use crate::{ + block::{self, Operation, Request}, + dispatch::DispCtx, + hw::nvme::{bits, cmds::Completion}, +}; + +use super::{ + cmds::{self, NvmCmd}, + queue::CompQueueEntryPermit, + NvmeCtrl, PciNvme, +}; + +impl block::Device for PciNvme { + fn next(&self, ctx: &DispCtx) -> Option { + self.notifier.next_arming(|| self.next_req(ctx)) + } + + fn set_notifier(&self, f: Option>) { + self.notifier.set(f) + } +} + +impl PciNvme { + /// Pop an available I/O request off of a Submission Queue to begin + /// processing by the underlying Block Device. + fn next_req(&self, ctx: &DispCtx) -> Option { + let state = self.state.lock().unwrap(); + + // Go through all the queues (skip admin as we just want I/O queues) + // looking for a request to service + for sq in state.sqs.iter().skip(1).flatten() { + while let Some((sub, cqe_permit)) = sq.pop(ctx) { + let cmd = NvmCmd::parse(sub); + match cmd { + Ok(NvmCmd::Write(_)) if !state.binfo.writable => { + let comp = Completion::specific_err( + bits::StatusCodeType::CmdSpecific, + bits::STS_WRITE_READ_ONLY_RANGE, + ); + cqe_permit.push_completion(sub.cid(), comp, ctx); + } + Ok(NvmCmd::Write(cmd)) => { + return Some(write_op( + &state, + sub.cid(), + cmd, + cqe_permit, + ctx, + )); + } + Ok(NvmCmd::Read(cmd)) => { + return Some(read_op( + &state, + sub.cid(), + cmd, + cqe_permit, + ctx, + )); + } + Ok(NvmCmd::Flush) => { + return Some(flush_op(sub.cid(), cqe_permit)); + } + Ok(NvmCmd::Unknown(_)) | Err(_) => { + // For any other unrecognized or malformed command, + // just immediately complete it with an error + let comp = + Completion::generic_err(bits::STS_INTERNAL_ERR); + cqe_permit.push_completion(sub.cid(), comp, ctx); + } + } + } + } + + None + } +} + +fn read_op( + state: &NvmeCtrl, + cid: u16, + cmd: cmds::ReadCmd, + cqe_permit: CompQueueEntryPermit, + ctx: &DispCtx, +) -> Request { + probe_nvme_read_enqueue!(|| (cid, cmd.slba, cmd.nlb)); + let off = state.nlb_to_size(cmd.slba as usize); + let size = state.nlb_to_size(cmd.nlb as usize); + let bufs = cmd.data(size as u64, ctx.mctx.memctx()).collect(); + Request::new_read( + off, + bufs, + Box::new(move |op, res, ctx| { + complete_block_req(cid, op, res, cqe_permit, ctx) + }), + ) +} + +fn write_op( + state: &NvmeCtrl, + cid: u16, + cmd: cmds::WriteCmd, + cqe_permit: CompQueueEntryPermit, + ctx: &DispCtx, +) -> Request { + probe_nvme_write_enqueue!(|| (cid, cmd.slba, cmd.nlb)); + let off = state.nlb_to_size(cmd.slba as usize); + let size = state.nlb_to_size(cmd.nlb as usize); + let bufs = cmd.data(size as u64, ctx.mctx.memctx()).collect(); + Request::new_write( + off, + bufs, + Box::new(move |op, res, ctx| { + complete_block_req(cid, op, res, cqe_permit, ctx) + }), + ) +} + +fn flush_op(cid: u16, cqe_permit: CompQueueEntryPermit) -> Request { + Request::new_flush( + 0, + 0, // TODO: is 0 enough or do we pass total size? + Box::new(move |op, res, ctx| { + complete_block_req(cid, op, res, cqe_permit, ctx) + }), + ) +} + +/// Callback invoked by the underlying Block Device once it has completed an I/O op. +/// +/// Place the operation result (success or failure) onto the corresponding Completion Queue. +fn complete_block_req( + cid: u16, + op: Operation, + res: block::Result, + cqe_permit: CompQueueEntryPermit, + ctx: &DispCtx, +) { + let comp = match res { + block::Result::Success => Completion::success(), + block::Result::Failure => { + Completion::generic_err(bits::STS_DATA_XFER_ERR) + } + block::Result::Unsupported => Completion::specific_err( + bits::StatusCodeType::CmdSpecific, + bits::STS_READ_CONFLICTING_ATTRS, + ), + }; + + match op { + Operation::Read(..) => { + probe_nvme_read_complete!(|| (cid)); + } + Operation::Write(..) => { + probe_nvme_write_complete!(|| (cid)); + } + _ => {} + } + + cqe_permit.push_completion(cid, comp, ctx); +} diff --git a/propolis/src/hw/pci/device.rs b/propolis/src/hw/pci/device.rs index 17a174265..4607d2c6f 100644 --- a/propolis/src/hw/pci/device.rs +++ b/propolis/src/hw/pci/device.rs @@ -917,6 +917,7 @@ pub trait Device: Send + Sync + 'static + Entity { // fn cap_write(&self); } +#[derive(Debug)] enum MsixBarReg { Addr(u16), Data(u16), @@ -946,7 +947,7 @@ const MSIX_VEC_MASK: u32 = 1 << 0; const MSIX_MSGCTRL_ENABLE: u16 = 1 << 15; const MSIX_MSGCTRL_FMASK: u16 = 1 << 14; -#[derive(Default)] +#[derive(Debug, Default)] struct MsixEntry { addr: u64, data: u32, @@ -982,6 +983,7 @@ impl MsixEntry { } } +#[derive(Debug)] struct MsixCfg { count: u16, bar: BarN, @@ -990,7 +992,7 @@ struct MsixCfg { entries: Vec>, state: Mutex, } -#[derive(Default)] +#[derive(Debug, Default)] struct MsixCfgState { enabled: bool, func_mask: bool, @@ -1256,6 +1258,7 @@ pub struct MsiEnt { pub pending: bool, } +#[derive(Debug)] pub struct MsixHdl { cfg: Arc, } @@ -1263,6 +1266,10 @@ impl MsixHdl { fn new(cfg: &Arc) -> Self { Self { cfg: Arc::clone(cfg) } } + #[cfg(test)] + pub(crate) fn new_test() -> Self { + Self { cfg: MsixCfg::new(2048, BarN::BAR0).0 } + } pub fn fire(&self, idx: u16, ctx: &DispCtx) { self.cfg.fire(idx, ctx); } diff --git a/propolis/src/hw/virtio/block.rs b/propolis/src/hw/virtio/block.rs index bfaf92678..650e17686 100644 --- a/propolis/src/hw/virtio/block.rs +++ b/propolis/src/hw/virtio/block.rs @@ -91,7 +91,7 @@ impl VirtioBlock { Ok(block::Request::new_read( breq.sector as usize * SECTOR_SZ, regions, - Box::new(move |res, ctx| { + Box::new(move |_op, res, ctx| { complete_blockreq(res, chain, mvq, ctx); }), )) @@ -108,7 +108,7 @@ impl VirtioBlock { Ok(block::Request::new_write( breq.sector as usize * SECTOR_SZ, regions, - Box::new(move |res, ctx| { + Box::new(move |_op, res, ctx| { complete_blockreq(res, chain, mvq, ctx); }), )) diff --git a/propolis/src/lib.rs b/propolis/src/lib.rs index e1683f3fb..d165e9aaa 100644 --- a/propolis/src/lib.rs +++ b/propolis/src/lib.rs @@ -1,6 +1,8 @@ +#![allow(clippy::style)] // Pull in asm!() support for USDT #![feature(asm)] -#![allow(clippy::style)] +// Pull in `assert_matches` for tests +#![cfg_attr(test, feature(assert_matches))] use usdt::dtrace_provider; diff --git a/propolis/src/util/regmap.rs b/propolis/src/util/regmap.rs index f78ca177a..78ac05e56 100644 --- a/propolis/src/util/regmap.rs +++ b/propolis/src/util/regmap.rs @@ -4,12 +4,14 @@ use std::ops::Bound::Included; use super::aspace::ASpace; use crate::common::*; +#[derive(Debug)] struct RegDef { id: ID, flags: Flags, } /// Represents a mapping of registers within an address space. +#[derive(Debug)] pub struct RegMap { len: usize, space: ASpace>, diff --git a/propolis/src/vmm/hdl.rs b/propolis/src/vmm/hdl.rs index 5e5717900..61c448b10 100644 --- a/propolis/src/vmm/hdl.rs +++ b/propolis/src/vmm/hdl.rs @@ -144,7 +144,7 @@ impl VmmFile { /// A handle to an existing virtual machine monitor. pub struct VmmHdl { - inner: VmmFile, + pub(super) inner: VmmFile, destroyed: AtomicBool, name: String, } @@ -380,8 +380,10 @@ impl VmmHdl { /// Build a VmmHdl instance suitable for unit tests, but nothing else, since /// it will not be backed by any real vmm reousrces. pub(crate) fn new_test() -> Result { - // TODO: use something else - let fp = File::open("/dev/null")?; + use tempfile::tempfile; + // Create a 2M temp file to use as our VM "memory" + let fp = tempfile()?; + fp.set_len(2 * 1024 * 1024).unwrap(); Ok(Self { inner: VmmFile(fp), destroyed: AtomicBool::new(false), diff --git a/propolis/src/vmm/machine.rs b/propolis/src/vmm/machine.rs index e4ac785bb..6f4d0b867 100644 --- a/propolis/src/vmm/machine.rs +++ b/propolis/src/vmm/machine.rs @@ -125,7 +125,43 @@ impl Machine { // TODO: meaningfully populate these let guard_space = GuardSpace::new(crate::common::PAGE_SIZE)?; - let map = ASpace::new(0, MAX_PHYSMEM); + let mut map = ASpace::new(0, MAX_PHYSMEM); + map.register( + 0, + 1024 * 1024, + MapEnt { + kind: MapKind::SysMem(0, Prot::READ), + name: "test-readable".to_string(), + guest_map: Some(Mapping::new( + 1024 * 1024, + Prot::READ, + &hdl.inner, + 0, + )?), + dev_map: None, + }, + ) + .map_err(|e| { + std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)) + })?; + map.register( + 1024 * 1024, + 1024 * 1024, + MapEnt { + kind: MapKind::SysMem(0, Prot::WRITE), + name: "test-writable".to_string(), + guest_map: Some(Mapping::new( + 1024 * 1024, + Prot::WRITE, + &hdl.inner, + 1024 * 1024, + )?), + dev_map: None, + }, + ) + .map_err(|e| { + std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)) + })?; Ok(Machine { hdl: Arc::new(hdl), diff --git a/propolis/src/vmm/mapping.rs b/propolis/src/vmm/mapping.rs index a63aafa6b..eca4fdf2a 100644 --- a/propolis/src/vmm/mapping.rs +++ b/propolis/src/vmm/mapping.rs @@ -619,7 +619,7 @@ pub mod tests { let input: u64 = 0xDEADBEEF; mapping.as_ref().write(&input).unwrap(); - let output = mapping.as_ref().read().unwrap(); + let output: u64 = mapping.as_ref().read().unwrap(); assert_eq!(input, output); } diff --git a/standalone/src/main.rs b/standalone/src/main.rs index 74ce95036..c1506ce7f 100644 --- a/standalone/src/main.rs +++ b/standalone/src/main.rs @@ -184,8 +184,6 @@ fn main() { inv.register(&debug_device, "debug".to_string(), None) .map_err(|e| -> std::io::Error { e.into() })?; - // let mut devices = HashMap::new(); - for (name, dev) in config.devs() { let driver = &dev.driver as &str; let bdf = if driver.starts_with("pci-") { @@ -230,43 +228,24 @@ fn main() { .map_err(|e| -> std::io::Error { e.into() })?; chipset.pci_attach(bdf.unwrap(), viona); } - // "pci-nvme" => { - // let nvme = hw::nvme::PciNvme::create(0x1de, 0x1000); - // devices.insert(&**name, nvme.clone()); - // chipset.pci_attach(bdf.unwrap(), nvme); - // } - // "nvme-ns" => { - // let nvme_ctrl = dev - // .options - // .get("controller") - // .unwrap() - // .as_str() - // .unwrap(); - - // let nvme = devices.get(nvme_ctrl).unwrap_or_else(|| { - // panic!("no such nvme controller: {}", nvme_ctrl) - // }); - - // let block_dev = - // dev.options.get("block_dev").unwrap().as_str().unwrap(); - - // let block_dev = - // config.block_dev::(block_dev); - - // let ns = hw::nvme::NvmeNs::create(block_dev.clone()); - - // if let Err(e) = - // nvme.with_inner(|nvme: Arc| { - // nvme.add_ns(ns) - // }) - // { - // eprintln!("failed to attach nvme-ns: {}", e); - // std::process::exit(libc::EXIT_FAILURE); - // } - - // block_dev - // .start_dispatch(format!("bdev-{} thread", name), disp); - // } + "pci-nvme" => { + let block_dev = + dev.options.get("block_dev").unwrap().as_str().unwrap(); + + let (backend, creg) = config.block_dev(block_dev); + + let info = backend.info(); + let nvme = hw::nvme::PciNvme::create(0x1de, 0x1000, info); + + let id = + inv.register(&nvme, format!("nvme-{}", name), None)?; + let _be_id = inv.register_child(creg, id)?; + + let blk = nvme.inner_dev::(); + backend.attach(blk, disp); + + chipset.pci_attach(bdf.unwrap(), nvme); + } _ => { eprintln!("unrecognized driver: {}", name); std::process::exit(libc::EXIT_FAILURE);