diff --git a/bin/propolis-cli/src/main.rs b/bin/propolis-cli/src/main.rs index 4e04bb366..d27f9a0cb 100644 --- a/bin/propolis-cli/src/main.rs +++ b/bin/propolis-cli/src/main.rs @@ -155,8 +155,8 @@ enum Command { /// Call the VolumeConstructionRequest replace endpoint Vcr { /// Uuid for the disk - #[clap(short = 'u', action)] - uuid: Uuid, + #[clap(short = 'd', action)] + disk_id: String, /// File with a JSON InstanceVcrReplace struct #[clap(long, action)] @@ -510,7 +510,7 @@ async fn new_instance( async fn replace_vcr( client: &Client, - id: Uuid, + id: String, vcr_replace: InstanceVcrReplace, ) -> anyhow::Result<()> { // Try to call the endpoint @@ -941,9 +941,9 @@ async fn main() -> anyhow::Result<()> { } Command::Monitor => monitor(addr).await?, Command::InjectNmi => inject_nmi(&client).await?, - Command::Vcr { uuid, vcr_replace } => { + Command::Vcr { disk_id, vcr_replace } => { let replace: InstanceVcrReplace = parse_json_file(&vcr_replace)?; - replace_vcr(&client, uuid, replace).await? + replace_vcr(&client, disk_id, replace).await? } } diff --git a/bin/propolis-server/src/lib/vm/objects.rs b/bin/propolis-server/src/lib/vm/objects.rs index f328f6ad3..0992a9f4a 100644 --- a/bin/propolis-server/src/lib/vm/objects.rs +++ b/bin/propolis-server/src/lib/vm/objects.rs @@ -23,9 +23,7 @@ use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use crate::{serial::Serial, spec::Spec, vcpu_tasks::VcpuTaskController}; -use super::{ - state_driver::VmStartReason, BlockBackendMap, CrucibleBackendMap, DeviceMap, -}; +use super::{BlockBackendMap, CrucibleBackendMap, DeviceMap}; /// A collection of components that make up a Propolis VM instance. pub(crate) struct VmObjects { @@ -189,6 +187,14 @@ impl VmObjectsLocked { &self.ps2ctrl } + pub(crate) fn device_map(&self) -> &DeviceMap { + &self.devices + } + + pub(crate) fn block_backend_map(&self) -> &BlockBackendMap { + &self.block_backends + } + /// Iterates over all of the lifecycle trait objects in this VM and calls /// `func` on each one. pub(crate) fn for_each_device( @@ -244,33 +250,6 @@ impl VmObjectsLocked { self.machine.reinitialize().unwrap(); } - /// Starts a VM's devices and allows all of its vCPU tasks to run. - /// - /// This function may be called either after initializing a new VM from - /// scratch or after an inbound live migration. In the latter case, this - /// routine assumes that the caller initialized and activated the VM's vCPUs - /// prior to importing state from the migration source. - pub(super) async fn start( - &mut self, - reason: VmStartReason, - ) -> anyhow::Result<()> { - match reason { - VmStartReason::ExplicitRequest => { - self.reset_vcpus(); - } - VmStartReason::MigratedIn => { - self.resume_kernel_vm(); - } - } - - let result = self.start_devices().await; - if result.is_ok() { - self.vcpu_tasks.resume_all(); - } - - result - } - /// Pauses this VM's devices and its kernel VMM. pub(crate) async fn pause(&mut self) { // Order matters here: the Propolis lifecycle trait's pause function @@ -288,6 +267,16 @@ impl VmObjectsLocked { // that all devices resume before any vCPUs do. self.resume_kernel_vm(); self.resume_devices(); + self.resume_vcpus(); + } + + /// Resumes this VM's vCPU tasks. + /// + /// This is intended for use in VM startup sequences where the state driver + /// needs fine-grained control over the order in which devices and vCPUs + /// start. When pausing and resuming a VM that's already been started, use + /// [`Self::pause`] and [`Self::resume`] instead. + pub(crate) fn resume_vcpus(&mut self) { self.vcpu_tasks.resume_all(); } @@ -321,31 +310,7 @@ impl VmObjectsLocked { // Resume devices so they're ready to do more work, then resume // vCPUs. self.resume_devices(); - self.vcpu_tasks.resume_all(); - } - - /// Starts all of a VM's devices and allows its block backends to process - /// requests from their devices. - async fn start_devices(&self) -> anyhow::Result<()> { - self.for_each_device_fallible(|name, dev| { - info!(self.log, "sending startup complete to {}", name); - let res = dev.start(); - if let Err(e) = &res { - error!(self.log, "startup failed for {}: {:?}", name, e); - } - res - })?; - - for (name, backend) in self.block_backends.iter() { - info!(self.log, "starting block backend {}", name); - let res = backend.start().await; - if let Err(e) = &res { - error!(self.log, "startup failed for {}: {:?}", name, e); - return res; - } - } - - Ok(()) + self.resume_vcpus(); } /// Pauses all of a VM's devices. diff --git a/bin/propolis-server/src/lib/vm/request_queue.rs b/bin/propolis-server/src/lib/vm/request_queue.rs index 9a2b58c9c..57ea9ed84 100644 --- a/bin/propolis-server/src/lib/vm/request_queue.rs +++ b/bin/propolis-server/src/lib/vm/request_queue.rs @@ -88,10 +88,6 @@ impl std::fmt::Debug for StateChangeRequest { pub enum ComponentChangeRequest { /// Attempts to update the volume construction request for the supplied /// Crucible volume. - /// - /// TODO: Due to https://github.com/oxidecomputer/crucible/issues/871, this - /// is only allowed once the VM is started and the volume has activated, but - /// it should be allowed even before the VM has started. ReconfigureCrucibleVolume { /// The ID of the Crucible backend in the VM's Crucible backend map. backend_id: SpecKey, @@ -356,7 +352,6 @@ impl ExternalRequestQueue { "request" => ?request, "reason" => %reason ); - return Err(reason); } } @@ -795,7 +790,6 @@ mod test { fn mutation_disallowed_after_stopped() { let mut queue = ExternalRequestQueue::new(test_logger(), InstanceAutoStart::Yes); - queue.notify_request_completed(CompletedRequest::Start { succeeded: true, }); diff --git a/bin/propolis-server/src/lib/vm/state_driver.rs b/bin/propolis-server/src/lib/vm/state_driver.rs index a37967668..22354578f 100644 --- a/bin/propolis-server/src/lib/vm/state_driver.rs +++ b/bin/propolis-server/src/lib/vm/state_driver.rs @@ -109,7 +109,7 @@ use crate::{ destination::DestinationProtocol, source::SourceProtocol, MigrateRole, }, spec::StorageBackend, - vm::state_publisher::ExternalStateUpdate, + vm::{state_publisher::ExternalStateUpdate, BlockBackendMap}, }; use super::{ @@ -141,6 +141,28 @@ pub(super) enum VmStartReason { ExplicitRequest, } +/// The outcome of a request to start a VM. +enum VmStartOutcome { + Succeeded, + Failed, + Aborted, +} + +impl VmStartOutcome { + /// If this start outcome implies that the state driver should return + /// immediately and allow the VM to be torn down, this routine returns + /// `Some(state)` where `state` is the final VM state to return from the + /// driver. If the driver should continue running the VM, this routine + /// returns `None`. + fn final_vm_state(&self) -> Option { + match self { + Self::Succeeded => None, + Self::Failed => Some(InstanceState::Failed), + Self::Aborted => Some(InstanceState::Destroyed), + } + } +} + /// A kind of event the state driver can handle. #[derive(Debug)] enum InputQueueEvent { @@ -195,12 +217,17 @@ impl InputQueue { } } - /// Waits for an event to arrive on the input queue and returns it for - /// processing. + /// Waits for a new event to arrive on one of the queue's sub-queues and + /// dispatches it for processing. + /// + /// The sub-queues, listed here in priority order, are: /// - /// External requests and guest events are stored in separate queues. If - /// both queues have events when this routine is called, the guest event - /// queue takes precedence. + /// - Guest events: These are signals raised from the VM's vCPUs and + /// devices (e.g. a request to reboot or halt the VM arising from a vCPU + /// asserting a virtual chipset signal). + /// - External requests: These are state change requests received via the + /// server API. See [`super::request_queue`] for more details about how + /// these requests are queued. /// /// # Synchronization /// @@ -478,10 +505,16 @@ impl StateDriver { info!(self.log, "state driver launched"); let final_state = if migrated_in { - if self.start_vm(VmStartReason::MigratedIn).await.is_ok() { - self.event_loop().await - } else { - InstanceState::Failed + // If the final state is known merely from the attempt to start the + // VM, return it immediately; otherwise, run the event loop and wait + // for it to return the final state. + match self + .start_vm(VmStartReason::MigratedIn) + .await + .final_vm_state() + { + None => self.event_loop().await, + Some(s) => s, } } else { self.event_loop().await @@ -524,32 +557,218 @@ impl StateDriver { async fn start_vm( &mut self, start_reason: VmStartReason, - ) -> anyhow::Result<()> { + ) -> VmStartOutcome { info!(self.log, "starting instance"; "reason" => ?start_reason); - let start_result = - self.objects.lock_exclusive().await.start(start_reason).await; + // Tell listeners that the VM's components are now starting up and not + // merely being created (but keep the VM in the Migrating state if it's + // being started pursuant to a migration in). + if let VmStartReason::ExplicitRequest = start_reason { + self.external_state + .update(ExternalStateUpdate::Instance(InstanceState::Starting)); + } - self.input_queue.notify_request_completed(CompletedRequest::Start { - succeeded: start_result.is_ok(), - }); + // The start sequence is arranged so that calls to block backends can be + // interleaved with processing of requests from the external request + // queue. This allows Nexus to reconfigure Crucible backends while they + // are being activated, which can be necessary if the VM's original + // specification specifies a Crucible downstairs server that is offline + // or unavailable. (Downstairs instances can disappear at any time, + // e.g. due to sled failure, so these configurations aren't necessarily + // client errors.) + // + // Before getting into any of that, handle the synchronous portions of + // VM startup. First, ensure that the kernel VM and all its associated + // devices are in the correct initial states. + let objects = self.objects.lock_shared().await; + match start_reason { + // If this VM is a migration target, migration will have properly + // initialized the vCPUs, but will have left the kernel VM paused. + // Resume it here before asking any in-kernel components to start. + VmStartReason::MigratedIn => objects.resume_kernel_vm(), + + // If this VM is starting from scratch, its kernel VM is active, but + // its vCPUs have not been initialized yet. + VmStartReason::ExplicitRequest => objects.reset_vcpus(), + } - match &start_result { - Ok(()) => { - self.external_state.update(ExternalStateUpdate::Instance( - InstanceState::Running, - )); + // Send synchronous start commands to all devices. + for (name, dev) in objects.device_map() { + info!(self.log, "sending start request to {}", name); + let res = dev.start(); + if let Err(e) = res { + error!( + self.log, "device start() returned an error"; + "device" => %name, + "error" => %e + ); + + return VmStartOutcome::Failed; } - Err(e) => { - error!(&self.log, "failed to start devices"; - "error" => ?e); - self.external_state.update(ExternalStateUpdate::Instance( - InstanceState::Failed, - )); + } + + // Next, prepare to start block backends. This is done by capturing the + // current block backend set and creating a future that issues all the + // start requests. + // + // For this to work, the set of block backends to be started must not + // change while the VM is starting. This is guaranteed because all such + // requests to hotplug a block backend will be dispatched to the VM's + // request queue; if any such requests are seen below, they can simply + // be buffered and handled after the rest of the VM has started. + async fn start_block_backends( + log: slog::Logger, + backends: BlockBackendMap, + ) -> anyhow::Result<()> { + for (name, backend) in backends { + info!(log, "starting block backend {}", name); + let res = backend.start().await; + if let Err(e) = &res { + error!( + log, + "block backend start() returned an error"; + "backend" => %name, + "error" => %e + ); + + return res; + } } + + Ok(()) + } + + let block_backends = objects.block_backend_map().clone(); + let block_backend_fut = + start_block_backends(self.log.clone(), block_backends); + tokio::pin!(block_backend_fut); + + // Drop the VM object lock before proceeding to allow other API calls + // that simply read the VM to make progress. Again, note that the set of + // objects being started still can't change, not because the lock is + // held, but because the only entity that can change them is the current + // task, which can decide whether and how to buffer incoming requests. + drop(objects); + + // Keep track of whether the external queue produced a request to stop + // the VM while it was being started. If such a request is seen, send a + // self-request to stop just before returning so that the VM will stop + // immediately. + enum Selection { + BackendFuture(anyhow::Result<()>), + Event(InputQueueEvent), } + loop { + let selection = tokio::select! { + // If the VM successfully starts, return immediately and let + // the caller process any events that may happen to be on the + // queue. + biased; + + res = &mut block_backend_fut => { + Selection::BackendFuture(res) + } - start_result + event = self.input_queue.wait_for_next_event() => { + Selection::Event(event) + } + }; + + let req: ExternalRequest = match selection { + Selection::BackendFuture(Ok(())) => { + let objects = &self.objects; + objects.lock_exclusive().await.resume_vcpus(); + self.external_state.update(ExternalStateUpdate::Instance( + InstanceState::Running, + )); + + self.input_queue.notify_request_completed( + CompletedRequest::Start { succeeded: true }, + ); + + info!(&self.log, "VM successfully started"); + return VmStartOutcome::Succeeded; + } + + Selection::BackendFuture(Err(e)) => { + info!(&self.log, "VM startup failed: {e}"); + self.input_queue.notify_request_completed( + CompletedRequest::Start { succeeded: false }, + ); + + return VmStartOutcome::Failed; + } + + // The VM's vCPUs only start when the block backend startup + // future resolves and is selected above. If control reached + // that point, that branch wasn't selected, so the vCPUs should + // still be paused, which means the dequeued event should not be + // a guest event. + Selection::Event(InputQueueEvent::GuestEvent(_)) => { + unreachable!("can't get guest events before vCPUs start") + } + + Selection::Event(InputQueueEvent::ExternalRequest(req)) => req, + }; + + match req { + ExternalRequest::State(StateChangeRequest::Stop) => { + info!( + &self.log, + "got request to stop while still starting" + ); + + // Don't send any pause/halt notifications here, since + // (depending on what async work was in flight when this + // notification was received) there may be a + // partially-started component that is not prepared to be + // paused and halted. Instead, simply move the VM to + // Stopped, return an "aborted" status, and let the caller + // arrange to drop all the VM's components. (Note that no + // vCPUs have started yet, so no guest work is in flight at + // this point.) + self.external_state.update(ExternalStateUpdate::Instance( + InstanceState::Stopped, + )); + + self.input_queue.notify_stopped(); + return VmStartOutcome::Aborted; + } + ExternalRequest::Component( + ComponentChangeRequest::ReconfigureCrucibleVolume { + backend_id, + new_vcr_json, + result_tx, + }, + ) => { + // The API caller who requested this operation can hang up + // and drop the receiver. This isn't fatal; just keep + // starting the VM if it happens. + let _ = result_tx.send( + self.reconfigure_crucible_volume( + &backend_id, + new_vcr_json, + ) + .await, + ); + } + // The request queue is expected to reject (or at least silently + // ignore) requests to migrate or reboot an instance that hasn't + // reported that it's fully started. Similarly, requests to + // start a VM that's already starting are expected to be ignored + // for idempotency. + r @ ExternalRequest::State(StateChangeRequest::Start) + | r @ ExternalRequest::State( + StateChangeRequest::MigrateAsSource { .. }, + ) + | r @ ExternalRequest::State(StateChangeRequest::Reboot) => { + unreachable!( + "external request {r:?} shouldn't be queued while \ + starting" + ); + } + } + } } async fn handle_guest_event( @@ -608,11 +827,17 @@ impl StateDriver { ) -> HandleEventOutcome { match request { ExternalRequest::State(StateChangeRequest::Start) => { - match self.start_vm(VmStartReason::ExplicitRequest).await { - Ok(_) => HandleEventOutcome::Continue, - Err(_) => HandleEventOutcome::Exit { - final_state: InstanceState::Failed, - }, + // If this start attempt produces a terminal VM state, return it + // to the driver and indicate that the driver should exit. + match self + .start_vm(VmStartReason::ExplicitRequest) + .await + .final_vm_state() + { + None => HandleEventOutcome::Continue, + Some(final_state) => { + HandleEventOutcome::Exit { final_state } + } } } ExternalRequest::State(StateChangeRequest::MigrateAsSource { diff --git a/lib/propolis/src/block/mod.rs b/lib/propolis/src/block/mod.rs index cd210ed25..45a590ec1 100644 --- a/lib/propolis/src/block/mod.rs +++ b/lib/propolis/src/block/mod.rs @@ -251,6 +251,16 @@ pub trait Backend: Send + Sync + 'static { /// /// Spawning of any tasks required to do such request processing can be done /// as part of this start-up. + /// + /// This operation will be invoked only once per backend (when its VM + /// starts). Block backends are not explicitly resumed during VM lifecycle + /// events; instead, their corresponding devices will stop issuing new + /// requests while paused and resume issuing them when they are resumed. + /// + /// WARNING: The caller may abort VM startup and cancel the future created + /// by this routine. In this case the caller may not call [`stop`] prior to + /// dropping the backend. This routine is, however, guaranteed to be called + /// before the VM's vCPUs are started. async fn start(&self) -> anyhow::Result<()>; /// Stop attempting to process new [Request]s from [Device] (if attached) @@ -260,6 +270,12 @@ pub trait Backend: Send + Sync + 'static { /// /// If any tasks were spawned as part of [Backend::start()], they should be /// brought to rest as part of this call. + /// + /// This operation will be invoked only once per backend (when its VM + /// stops). Block backends are not explicitly paused during VM lifecycle + /// events; instead, their corresponding devices will stop issuing new + /// requests when they are told to pause (and will only report they are + /// fully paused when all their in-flight requests have completed). async fn stop(&self) -> (); /// Attempt to detach from associated [Device] diff --git a/phd-tests/framework/src/disk/crucible.rs b/phd-tests/framework/src/disk/crucible.rs index 5ac2f2747..a4553990a 100644 --- a/phd-tests/framework/src/disk/crucible.rs +++ b/phd-tests/framework/src/disk/crucible.rs @@ -5,7 +5,7 @@ //! Abstractions for Crucible-backed disks. use std::{ - net::{Ipv4Addr, SocketAddr, SocketAddrV4}, + net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6}, path::{Path, PathBuf}, process::Stdio, sync::Mutex, @@ -47,10 +47,24 @@ impl Drop for DataDirectory { #[derive(Debug)] struct Downstairs { process_handle: std::process::Child, + + /// The address on which this downstairs is serving its API. address: SocketAddr, + + /// The address to insert as a connection target when constructing a VCR + /// that refers to this downstairs. If `None`, the downstairs's API address + /// is used instead. + vcr_address_override: Option, + data_dir: DataDirectory, } +impl Downstairs { + fn vcr_address(&self) -> SocketAddr { + self.vcr_address_override.unwrap_or(self.address) + } +} + impl Drop for Downstairs { fn drop(&mut self) { info!(?self, "Stopping Crucible downstairs process"); @@ -107,6 +121,39 @@ impl CrucibleDisk { pub fn set_generation(&self, generation: u64) { self.inner.lock().unwrap().generation = generation; } + + /// Changes this disk's downstairs configuration so that the returned IP + /// address of the first downstairs is an IPv6 black hole instead of its + /// actual address. This will prevent VMs from activating this disk until + /// the VCR is replaced with one bearing the correct IP address. + pub fn enable_vcr_black_hole(&self) { + info!(disk = self.device_name.as_str(), "enabling vcr black hole"); + + // 100::/64 is the IPv6 discard prefix (per RFC 6666). + let address = SocketAddr::V6(SocketAddrV6::new( + Ipv6Addr::new(0x100, 0, 0, 0, 0, 0, 0, 0), + 9000, + 0, + 0, + )); + + let mut inner = self.inner.lock().unwrap(); + + // Crucible rejects VCR replacement requests that change more than one + // downstairs address, so just invalidate the first downstairs address. + // This ensures that if the black hole is disabled, subsequent VCRs are + // valid replacements for the ones produced while the black hole was + // enabled. + inner.downstairs_instances[0].vcr_address_override = Some(address); + } + + /// Ensures that this disk's downstairs configuration will return the + /// correct addresses for all its downstairs instances. + pub fn disable_vcr_black_hole(&self) { + info!(disk = self.device_name.as_str(), "disabling vcr black hole"); + let mut inner = self.inner.lock().unwrap(); + inner.downstairs_instances[0].vcr_address_override = None; + } } impl super::DiskConfig for CrucibleDisk { @@ -302,6 +349,7 @@ impl Inner { stderr, )?, address: SocketAddr::V4(addr), + vcr_address_override: None, data_dir: dir, }; @@ -338,8 +386,11 @@ impl Inner { } fn vcr(&self, disk_id: Uuid) -> VolumeConstructionRequest { - let downstairs_addrs = - self.downstairs_instances.iter().map(|ds| ds.address).collect(); + let downstairs_addrs = self + .downstairs_instances + .iter() + .map(Downstairs::vcr_address) + .collect(); VolumeConstructionRequest::Volume { id: disk_id, diff --git a/phd-tests/framework/src/lib.rs b/phd-tests/framework/src/lib.rs index 327b1804b..35ead0df1 100644 --- a/phd-tests/framework/src/lib.rs +++ b/phd-tests/framework/src/lib.rs @@ -290,15 +290,8 @@ impl Framework { vm: &TestVm, environment: Option<&EnvironmentSpec>, ) -> anyhow::Result { - let mut vm_spec = - VmSpec { vm_name: vm_name.to_owned(), ..vm.vm_spec() }; - - // Reconcile any differences between the generation numbers in the VM - // objects' instance spec and the associated Crucible disk handles. - // This may be needed because a test can call `set_generation` on a disk - // handle to change its active generation number mid-test, and this - // won't automatically be reflected in the VM's instance spec. - vm_spec.refresh_crucible_backends(); + let mut vm_spec = vm.vm_spec().clone(); + vm_spec.set_vm_name(vm_name.to_owned()); // Create new metadata for an instance based on this predecessor. It // should have the same project and silo IDs, but the sled identifiers diff --git a/phd-tests/framework/src/test_vm/config.rs b/phd-tests/framework/src/test_vm/config.rs index 06a0c7c2d..2bf54d43c 100644 --- a/phd-tests/framework/src/test_vm/config.rs +++ b/phd-tests/framework/src/test_vm/config.rs @@ -387,14 +387,14 @@ impl<'dr> VmConfig<'dr> { sled_serial: sled_id.to_string(), }; - Ok(VmSpec { - vm_name: vm_name.clone(), - instance_spec: spec, + Ok(VmSpec::new( + vm_name.clone(), + spec, disk_handles, guest_os_kind, bootrom_path, metadata, - }) + )) } } diff --git a/phd-tests/framework/src/test_vm/mod.rs b/phd-tests/framework/src/test_vm/mod.rs index 88fcfb2d9..ea480a54a 100644 --- a/phd-tests/framework/src/test_vm/mod.rs +++ b/phd-tests/framework/src/test_vm/mod.rs @@ -292,7 +292,7 @@ impl TestVm { let init = match migrate { None => InstanceInitializationMethod::Spec { - spec: self.spec.instance_spec.clone(), + spec: self.spec.instance_spec(), }, Some(info) => InstanceInitializationMethod::MigrationTarget { migration_id: info.migration_id, @@ -489,7 +489,7 @@ impl TestVm { let timeout_duration = match Into::::into(timeout) { MigrationTimeout::Explicit(val) => val, MigrationTimeout::InferFromMemorySize => { - let mem_mib = self.spec.instance_spec.board.memory_mb; + let mem_mib = self.spec.instance_spec().board.memory_mb; std::time::Duration::from_secs( (MIGRATION_SECS_PER_GUEST_GIB * mem_mib) / 1024, ) @@ -573,7 +573,7 @@ impl TestVm { fn generate_replacement_components(&self) -> ReplacementComponents { let mut map = ReplacementComponents::new(); - for (id, comp) in &self.spec.instance_spec.components { + for (id, comp) in &self.spec.instance_spec().components { match comp { ComponentV0::MigrationFailureInjector(inj) => { map.insert( diff --git a/phd-tests/framework/src/test_vm/spec.rs b/phd-tests/framework/src/test_vm/spec.rs index 970e4424d..d0ef3f8c5 100644 --- a/phd-tests/framework/src/test_vm/spec.rs +++ b/phd-tests/framework/src/test_vm/spec.rs @@ -18,7 +18,7 @@ pub struct VmSpec { pub vm_name: String, /// The instance spec to pass to the VM when starting the guest. - pub instance_spec: InstanceSpecV0, + base_instance_spec: InstanceSpecV0, /// A set of handles to disk files that the VM's disk backends refer to. pub disk_handles: Vec>, @@ -43,9 +43,37 @@ impl VmSpec { .find(|disk| disk.device_name().as_str() == name) } + pub(crate) fn new( + vm_name: String, + instance_spec: InstanceSpecV0, + disk_handles: Vec>, + guest_os_kind: GuestOsKind, + bootrom_path: Utf8PathBuf, + metadata: InstanceMetadata, + ) -> Self { + Self { + vm_name, + base_instance_spec: instance_spec, + disk_handles, + guest_os_kind, + bootrom_path, + metadata, + } + } + + pub(crate) fn set_vm_name(&mut self, name: String) { + self.vm_name = name + } + + pub(crate) fn instance_spec(&self) -> InstanceSpecV0 { + let mut spec = self.base_instance_spec.clone(); + self.set_crucible_backends(&mut spec); + spec + } + /// Update the Crucible backend specs in the instance spec to match the /// current backend specs given by this specification's disk handles. - pub(crate) fn refresh_crucible_backends(&mut self) { + fn set_crucible_backends(&self, spec: &mut InstanceSpecV0) { for disk in &self.disk_handles { let disk = if let Some(disk) = disk.as_crucible() { disk @@ -57,11 +85,9 @@ impl VmSpec { let backend_name = disk.device_name().clone().into_backend_name().into_string(); if let Some(ComponentV0::CrucibleStorageBackend(_)) = - self.instance_spec.components.get(&backend_name) + spec.components.get(&backend_name) { - self.instance_spec - .components - .insert(backend_name, backend_spec); + spec.components.insert(backend_name, backend_spec); } } } diff --git a/phd-tests/tests/src/crucible/smoke.rs b/phd-tests/tests/src/crucible/smoke.rs index fd34e34b8..a69ac2f5b 100644 --- a/phd-tests/tests/src/crucible/smoke.rs +++ b/phd-tests/tests/src/crucible/smoke.rs @@ -88,8 +88,13 @@ async fn shutdown_persistence_test(ctx: &Framework) { } #[phd_testcase] -async fn vcr_replace_test(ctx: &Framework) { - let mut config = ctx.vm_config_builder("crucible_vcr_replace_test"); +async fn vcr_replace_during_start_test(ctx: &Framework) { + if !ctx.crucible_enabled() { + phd_skip!("Crucible backends not enabled (no downstairs path)"); + } + + let mut config = + ctx.vm_config_builder("crucible_vcr_replace_during_start_test"); // Create a blank data disk on which to perform VCR replacement. This is // necessary because Crucible doesn't permit VCR replacements for volumes @@ -107,15 +112,38 @@ async fn vcr_replace_test(ctx: &Framework) { 5, ); + // Configure the disk so that when the VM starts, it will have an invalid + // downstairs address. let spec = config.vm_spec(ctx).await?; let disk_hdl = spec.get_disk_by_device_name(DATA_DISK_NAME).cloned().unwrap(); let disk = disk_hdl.as_crucible().unwrap(); + disk.enable_vcr_black_hole(); + // Try to start the VM, but don't wait for it to boot; it should get stuck + // while activating using an invalid downstairs address. let mut vm = ctx.spawn_vm_with_spec(spec, None).await?; vm.launch().await?; - vm.wait_to_boot().await?; + // The VM is expected not to reach the Running state. Unfortunately, there's + // no great way to test that this is never going to happen; as a best-effort + // alternative, wait for a short while and assert that the VM doesn't reach + // Running in the timeout interval. + vm.wait_for_state(InstanceState::Running, Duration::from_secs(5)) + .await + .unwrap_err(); + + // Fix the disk's downstairs address and send a replacement request. This + // should be processed and should allow the VM to boot. + disk.disable_vcr_black_hole(); disk.set_generation(2); vm.replace_crucible_vcr(disk).await?; + vm.wait_to_boot().await?; + + assert_eq!(vm.get().await?.instance.state, InstanceState::Running); + + // VCR replacements should continue to be accepted now that the instance is + // running. + disk.set_generation(3); + vm.replace_crucible_vcr(disk).await?; } diff --git a/phd-tests/tests/src/server_state_machine.rs b/phd-tests/tests/src/server_state_machine.rs index dba7d3cd6..c15a59641 100644 --- a/phd-tests/tests/src/server_state_machine.rs +++ b/phd-tests/tests/src/server_state_machine.rs @@ -6,6 +6,10 @@ use std::time::Duration; +use phd_framework::{ + disk::{BlockSize, DiskSource}, + test_vm::{DiskBackend, DiskInterface}, +}; use phd_testcase::*; use propolis_client::types::InstanceState; @@ -89,3 +93,49 @@ async fn instance_reset_requires_running_test(ctx: &Framework) { vm.launch().await?; vm.wait_for_state(InstanceState::Running, Duration::from_secs(60)).await?; } + +#[phd_testcase] +async fn stop_while_blocked_on_start_test(ctx: &Framework) { + // This test uses a Crucible disk backend to cause VM startup to block. + if !ctx.crucible_enabled() { + phd_skip!("test requires Crucible support"); + } + + let mut config = ctx.vm_config_builder("stop_while_blocked_on_start_test"); + + // Create a VM that blocks while starting by attaching a Crucible data disk + // to it and enabling the black hole address in its volume construction + // request. The invalid address will keep Crucible from activating and so + // will block the VM from fully starting. + const DATA_DISK_NAME: &str = "vcr-replacement-target"; + config.data_disk( + DATA_DISK_NAME, + DiskSource::Blank(1024 * 1024 * 1024), + DiskInterface::Nvme, + DiskBackend::Crucible { + min_disk_size_gib: 1, + block_size: BlockSize::Bytes512, + }, + 5, + ); + + let spec = config.vm_spec(ctx).await?; + let disk_hdl = + spec.get_disk_by_device_name(DATA_DISK_NAME).cloned().unwrap(); + let disk = disk_hdl.as_crucible().unwrap(); + disk.enable_vcr_black_hole(); + + // Launch the VM and wait for it to advertise that its components are + // starting. + let mut vm = ctx.spawn_vm_with_spec(spec, None).await?; + vm.launch().await?; + vm.wait_for_state(InstanceState::Starting, Duration::from_secs(15)) + .await + .unwrap(); + + // Send a stop request. This should enqueue successfully, and the VM should + // shut down even though activation is blocked. + vm.stop().await?; + vm.wait_for_state(InstanceState::Destroyed, Duration::from_secs(60)) + .await?; +}