From a95c1c30d7759d6c6bf833a898ae0ff67eb0031b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 16 Sep 2025 14:00:04 -0700 Subject: [PATCH 1/6] Add omdb command to display db_metadata_nexus_state --- dev-tools/omdb/src/bin/omdb/db.rs | 144 ++++++++++++++++++ dev-tools/omdb/tests/usage_errors.out | 2 + nexus/db-model/src/db_metadata.rs | 11 ++ .../src/db/datastore/db_metadata.rs | 17 +++ nexus/types/src/deployment.rs | 19 +++ 5 files changed, 193 insertions(+) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 70d158e0441..24a66207bce 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -61,6 +61,7 @@ use nexus_db_errors::OptionalError; use nexus_db_lookup::DataStoreConnection; use nexus_db_lookup::LookupPath; use nexus_db_model::CrucibleDataset; +use nexus_db_model::DbMetadataNexusState; use nexus_db_model::Disk; use nexus_db_model::DnsGroup; use nexus_db_model::DnsName; @@ -144,11 +145,13 @@ use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Generation; use omicron_common::api::external::InstanceState; use omicron_common::api::external::MacAddr; +use omicron_uuid_kinds::BlueprintUuid; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::DownstairsRegionUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::ParseError; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::PropolisUuid; @@ -338,6 +341,8 @@ pub struct DbFetchOptions { /// Subcommands that query or update the database #[derive(Debug, Subcommand, Clone)] enum DbCommands { + /// Commands for database metadata + DbMetadata(DbMetadataArgs), /// Commands relevant to Crucible datasets CrucibleDataset(CrucibleDatasetArgs), /// Print any Crucible resources that are located on expunged physical disks @@ -396,6 +401,18 @@ enum DbCommands { UserDataExport(user_data_export::UserDataExportArgs), } +#[derive(Debug, Args, Clone)] +struct DbMetadataArgs { + #[command(subcommand)] + command: DbMetadataCommands, +} + +#[derive(Debug, Subcommand, Clone)] +enum DbMetadataCommands { + #[clap(alias = "ls-nexus")] + ListNexus, +} + #[derive(Debug, Args, Clone)] struct CrucibleDatasetArgs { #[command(subcommand)] @@ -1128,6 +1145,11 @@ impl DbArgs { self.db_url_opts.with_datastore(omdb, log, |opctx, datastore| { async move { match &self.command { + DbCommands::DbMetadata(DbMetadataArgs { + command: DbMetadataCommands::ListNexus, + }) => { + cmd_db_metadata_list_nexus(&opctx, &datastore).await + } DbCommands::CrucibleDataset(CrucibleDatasetArgs { command: CrucibleDatasetCommands::List, }) => { @@ -1635,6 +1657,128 @@ async fn lookup_project( .with_context(|| format!("loading project {project_id}")) } +// DB Metadata + +#[derive(Tabled)] +#[tabled(rename_all = "SCREAMING_SNAKE_CASE")] +struct DbMetadataNexusRow { + id: OmicronZoneUuid, + #[tabled(display_with = "option_impl_display")] + last_drained_blueprint: Option, + + // Identifies the state we observe in the database + state: String, + + // Identifies the state this Nexus is trying to achieve, based on the target + // blueprint, if it's different from the current state + #[tabled(display_with = "display_option_blank")] + transitioning_to: Option, +} + +fn get_intended_nexus_state( + bp_nexus_generation: Generation, + bp_nexus_generation_by_zone: &BTreeMap, + id: OmicronZoneUuid, +) -> Option { + let Some(gen) = bp_nexus_generation_by_zone.get(&id) else { + return None; + }; + + Some(if *gen < bp_nexus_generation { + // This Nexus is either quiescing, or has already quiesced + DbMetadataNexusState::Quiesced + } else if *gen == bp_nexus_generation { + // This Nexus is either active, or will become active once + // the prior generation has quiesced + DbMetadataNexusState::Active + } else { + // This Nexus is not ready to be run yet + DbMetadataNexusState::NotYet + }) +} + +fn get_nexus_state_transition( + observed: DbMetadataNexusState, + intended: Option, +) -> Option { + match (observed, intended) { + (observed, Some(intended)) if observed == intended => None, + (_, Some(intended)) => Some(intended.to_string()), + (_, None) => Some("Unknown".to_string()), + } +} + +async fn get_db_metadata_nexus_rows( + opctx: &OpContext, + datastore: &DataStore, + blueprint: &Blueprint, +) -> Result, anyhow::Error> { + let states = [ + DbMetadataNexusState::Active, + DbMetadataNexusState::NotYet, + DbMetadataNexusState::Quiesced, + ]; + + let nexus_generation_by_zone = blueprint + .all_nexus_zones(BlueprintZoneDisposition::is_in_service) + .map(|(_, zone, nexus_zone)| (zone.id, nexus_zone.nexus_generation)) + .collect::>(); + + Ok(datastore + .get_db_metadata_nexus_in_state(opctx, &states) + .await? + .into_iter() + .map(|db_metadata_nexus| { + let id = db_metadata_nexus.nexus_id(); + let last_drained_blueprint = + db_metadata_nexus.last_drained_blueprint_id(); + let state = db_metadata_nexus.state().to_string(); + let intended_state = get_intended_nexus_state( + blueprint.nexus_generation, + &nexus_generation_by_zone, + id, + ); + + let transitioning_to = get_nexus_state_transition( + db_metadata_nexus.state(), + intended_state, + ); + + DbMetadataNexusRow { + id, + last_drained_blueprint, + state, + transitioning_to, + } + }) + .collect()) +} + +async fn cmd_db_metadata_list_nexus( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), anyhow::Error> { + let (_, current_target_blueprint) = datastore + .blueprint_target_get_current_full(opctx) + .await + .context("loading current target blueprint")?; + println!( + "Target Blueprint {} @ nexus_generation: {}", + current_target_blueprint.id, current_target_blueprint.nexus_generation + ); + + let rows: Vec<_> = + get_db_metadata_nexus_rows(opctx, datastore, ¤t_target_blueprint) + .await?; + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::psql()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + println!("{}", table); + + Ok(()) +} + // Crucible datasets #[derive(Tabled)] diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 1ebff1e231f..c9cbdbadbcb 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -115,6 +115,7 @@ Query the control plane database (CockroachDB) Usage: omdb db [OPTIONS] Commands: + db-metadata Commands for database metadata crucible-dataset Commands relevant to Crucible datasets replacements-to-do Print any Crucible resources that are located on expunged physical disks @@ -174,6 +175,7 @@ Query the control plane database (CockroachDB) Usage: omdb db [OPTIONS] Commands: + db-metadata Commands for database metadata crucible-dataset Commands relevant to Crucible datasets replacements-to-do Print any Crucible resources that are located on expunged physical disks diff --git a/nexus/db-model/src/db_metadata.rs b/nexus/db-model/src/db_metadata.rs index 080da4d423c..91e7fa92a94 100644 --- a/nexus/db-model/src/db_metadata.rs +++ b/nexus/db-model/src/db_metadata.rs @@ -12,6 +12,7 @@ use omicron_uuid_kinds::{ BlueprintKind, BlueprintUuid, OmicronZoneKind, OmicronZoneUuid, }; use serde::{Deserialize, Serialize}; +use std::fmt; /// Internal database metadata #[derive( @@ -52,6 +53,16 @@ impl_enum_type!( Quiesced => b"quiesced" ); +impl fmt::Display for DbMetadataNexusState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(match self { + DbMetadataNexusState::Active => "active", + DbMetadataNexusState::NotYet => "not yet", + DbMetadataNexusState::Quiesced => "quiesced", + }) + } +} + #[derive( Queryable, Insertable, Debug, Clone, Selectable, Serialize, Deserialize, )] diff --git a/nexus/db-queries/src/db/datastore/db_metadata.rs b/nexus/db-queries/src/db/datastore/db_metadata.rs index 6dd529cd40a..92d4f678547 100644 --- a/nexus/db-queries/src/db/datastore/db_metadata.rs +++ b/nexus/db-queries/src/db/datastore/db_metadata.rs @@ -300,6 +300,23 @@ impl DatastoreSetupAction { } impl DataStore { + /// Returns [`DbMetadataNexus`] records in any of the supplied states. + pub async fn get_db_metadata_nexus_in_state( + &self, + opctx: &OpContext, + states: &[DbMetadataNexusState], + ) -> Result, Error> { + use nexus_db_schema::schema::db_metadata_nexus::dsl; + + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + dsl::db_metadata_nexus + .filter(dsl::state.eq_any(states.to_vec())) + .load_async(&*self.pool_connection_authorized(&opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + // Checks if the specified Nexus has access to the database. async fn check_nexus_access( &self, diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 3b103a7b6c4..fb602899709 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -310,6 +310,25 @@ impl Blueprint { ) } + /// Iterate over all Nexus zones that match the provided filter. + pub fn all_nexus_zones( + &self, + filter: F, + ) -> impl Iterator< + Item = (SledUuid, &BlueprintZoneConfig, &blueprint_zone_type::Nexus), + > + where + F: FnMut(BlueprintZoneDisposition) -> bool, + { + self.all_omicron_zones(filter).filter_map(|(sled_id, zone)| { + if let BlueprintZoneType::Nexus(nexus_config) = &zone.zone_type { + Some((sled_id, zone, nexus_config)) + } else { + None + } + }) + } + /// Iterate over the [`BlueprintZoneConfig`] instances that match the /// provided filter, along with the associated sled id. // From 5d8edc37ec135ede401a8b9821f747dcf1a009a4 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 16 Sep 2025 15:44:39 -0700 Subject: [PATCH 2/6] Add success case test --- dev-tools/omdb/tests/successes.out | 13 +++++++++++++ dev-tools/omdb/tests/test_all_output.rs | 1 + 2 files changed, 14 insertions(+) diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 095e88b2fd6..4014b0c4943 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -1,3 +1,16 @@ +EXECUTING COMMAND: omdb ["db", "db-metadata", "ls-nexus"] +termination: Exited(0) +--------------------------------------------- +stdout: +Target Blueprint ............. @ nexus_generation: 1 +ID |LAST_DRAINED_BLUEPRINT |STATE |TRANSITIONING_TO +-------------------------------------+-----------------------+-------+----------------- +..................... |n/a |active | +--------------------------------------------- +stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected () +============================================= EXECUTING COMMAND: omdb ["db", "disks", "list"] termination: Exited(0) --------------------------------------------- diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index e7041d16419..99d46b935c9 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -177,6 +177,7 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { let mut output = String::new(); let invocations: &[&[&str]] = &[ + &["db", "db-metadata", "ls-nexus"], &["db", "disks", "list"], &["db", "dns", "show"], &["db", "dns", "diff", "external", "2"], From 91b032da85a330b0cd946922bbc48afe12d38226 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 16 Sep 2025 16:18:47 -0700 Subject: [PATCH 3/6] Add omdb command to forcefully quiesce db_metadata_nexus records --- dev-tools/omdb/src/bin/omdb/db.rs | 74 +++++++++++++++++++ dev-tools/omdb/tests/successes.out | 20 +++++ dev-tools/omdb/tests/test_all_output.rs | 28 +++++++ .../src/db/datastore/db_metadata.rs | 20 +++++ 4 files changed, 142 insertions(+) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 24a66207bce..e279740ff18 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -409,8 +409,37 @@ struct DbMetadataArgs { #[derive(Debug, Subcommand, Clone)] enum DbMetadataCommands { + /// Lists the `db_metadata_nexus` records for all Nexuses. #[clap(alias = "ls-nexus")] ListNexus, + + /// !!! DANGEROUS !!! Updates a `db_metadata_nexus` record to 'Quiesced' + /// + /// THIS OPERATION IS DANGEROUS. It is the responsibility of the caller + /// to ensure that the specified Nexus zone is not running. + /// + /// If the Nexus being updated is actually running, this operation + /// may cause arbitrary data corruption, as it can allow multiple Nexuses + /// at distinct database verions to inadvertently be running concurrently. + /// + /// This operation is intended to assist in the explicit case where a Nexus + /// is unable to finish marking itself quiesced during the handoff process, + /// and cannot be expunged. + ForceNexusQuiesce(ForceNexusQuiesceArgs), +} + +#[derive(Debug, Args, Clone)] +struct ForceNexusQuiesceArgs { + /// The UUID of the Nexus zone to be marked quiesced + id: OmicronZoneUuid, + + /// If "true": don't bother parsing the target blueprint to identify the + /// validity of the [`id`] argument. + /// + /// Forcing Nexus to quiesce is already an unsafe operation; this makes + /// it even less safe. Use with caution. + #[arg(long, action=ArgAction::SetTrue)] + ignore_target_blueprint: bool, } #[derive(Debug, Args, Clone)] @@ -1150,6 +1179,12 @@ impl DbArgs { }) => { cmd_db_metadata_list_nexus(&opctx, &datastore).await } + DbCommands::DbMetadata(DbMetadataArgs { + command: DbMetadataCommands::ForceNexusQuiesce(args), + }) => { + let token = omdb.check_allow_destructive()?; + cmd_db_metadata_force_nexus_quiesce(&opctx, &datastore, args, token).await + } DbCommands::CrucibleDataset(CrucibleDatasetArgs { command: CrucibleDatasetCommands::List, }) => { @@ -1779,6 +1814,45 @@ async fn cmd_db_metadata_list_nexus( Ok(()) } +async fn cmd_db_metadata_force_nexus_quiesce( + opctx: &OpContext, + datastore: &DataStore, + args: &ForceNexusQuiesceArgs, + _destruction_token: DestructiveOperationToken, +) -> Result<(), anyhow::Error> { + if !args.ignore_target_blueprint { + let (_, current_target_blueprint) = datastore + .blueprint_target_get_current_full(opctx) + .await + .context("loading current target blueprint")?; + let nexus_generation = current_target_blueprint + .all_nexus_zones(BlueprintZoneDisposition::is_in_service) + .find_map(|(_, zone, nexus_zone)| { + if zone.id == args.id { + Some(nexus_zone.nexus_generation) + } else { + None + } + }); + + let Some(gen) = nexus_generation else { + bail!("Nexus {} not found in blueprint", args.id); + }; + let bp_gen = current_target_blueprint.nexus_generation; + if bp_gen <= gen { + bail!( + "Nexus {} not ready to quiesce (nexus generation {gen} >= blueprint gen {bp_gen})", + args.id + ); + } + } + + datastore.database_nexus_access_quiesce(opctx, args.id).await?; + println!("Quiesced {}", args.id); + + Ok(()) +} + // Crucible datasets #[derive(Tabled)] diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 4014b0c4943..3aa2845a06d 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -11,6 +11,16 @@ stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected () ============================================= +EXECUTING COMMAND: omdb ["--destructive", "db", "db-metadata", "force-nexus-quiesce", "....................."] +termination: Exited(1) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected () +Error: Nexus ..................... not ready to quiesce (nexus generation 1 >= blueprint gen 1) +============================================= EXECUTING COMMAND: omdb ["db", "disks", "list"] termination: Exited(0) --------------------------------------------- @@ -1756,3 +1766,13 @@ note: database schema version matches expected () assembling reconfigurator state ... done wrote ============================================= +EXECUTING COMMAND: omdb ["--destructive", "db", "db-metadata", "force-nexus-quiesce", "--ignore-target-blueprint", "....................."] +termination: Exited(0) +--------------------------------------------- +stdout: +Quiesced ..................... +--------------------------------------------- +stderr: +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected () +============================================= diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 99d46b935c9..97f59af527e 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -178,6 +178,20 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { let invocations: &[&[&str]] = &[ &["db", "db-metadata", "ls-nexus"], + + // We expect this operation to fail (the nexus generation is the same + // as the one in the target blueprint - it shouldn't be trying to + // quiesce yet). + // + // We tests a version of this command which sets this record to + // quiesced anyway as the final invocation. + &[ + "--destructive", + "db", + "db-metadata", + "force-nexus-quiesce", + &cptestctx.server.server_context().nexus.id().to_string(), + ], &["db", "disks", "list"], &["db", "dns", "show"], &["db", "dns", "diff", "external", "2"], @@ -274,6 +288,20 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { // We can't easily test the sled agent output because that's only // provided by a real sled agent, which is not available in the // ControlPlaneTestContext. + + // This operation will set the "db_metadata_nexus" state to quiesced. + // + // This would normally only be set by a Nexus as it shuts itself down; + // save it for last to avoid causing a weird state while testing other + // commands. + &[ + "--destructive", + "db", + "db-metadata", + "force-nexus-quiesce", + "--ignore-target-blueprint", + &cptestctx.server.server_context().nexus.id().to_string(), + ], ]; let mut redactor = Redactor::default(); diff --git a/nexus/db-queries/src/db/datastore/db_metadata.rs b/nexus/db-queries/src/db/datastore/db_metadata.rs index 92d4f678547..35c7b02ecac 100644 --- a/nexus/db-queries/src/db/datastore/db_metadata.rs +++ b/nexus/db-queries/src/db/datastore/db_metadata.rs @@ -842,6 +842,26 @@ impl DataStore { Ok(()) } + /// Updates a nexus access record to "Quiesced" + pub async fn database_nexus_access_quiesce( + &self, + opctx: &OpContext, + nexus_id: OmicronZoneUuid, + ) -> Result<(), Error> { + use nexus_db_schema::schema::db_metadata_nexus::dsl; + + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + + diesel::update(dsl::db_metadata_nexus) + .filter(dsl::nexus_id.eq(nexus_id.into_untyped_uuid())) + .set(dsl::state.eq(DbMetadataNexusState::Quiesced)) + .execute_async(&*self.pool_connection_unauthorized().await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + /// Initializes Nexus database access records from a blueprint using an /// existing connection /// From 7d7b54adb5cbd53ebc9ecc2a119b29af20da1d86 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 16 Sep 2025 16:59:57 -0700 Subject: [PATCH 4/6] fmt --- dev-tools/omdb/tests/test_all_output.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 97f59af527e..123704da25f 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -178,7 +178,6 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { let invocations: &[&[&str]] = &[ &["db", "db-metadata", "ls-nexus"], - // We expect this operation to fail (the nexus generation is the same // as the one in the target blueprint - it shouldn't be trying to // quiesce yet). From 5ad9ad5e84d3e3f1773bdb311910afe37b169f92 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 29 Sep 2025 16:18:20 -0700 Subject: [PATCH 5/6] Review feedback --- dev-tools/omdb/src/bin/omdb/db.rs | 6 +-- dev-tools/omdb/src/bin/omdb/db/db_metadata.rs | 37 +++++++++++++------ dev-tools/omdb/tests/successes.out | 16 +++++--- dev-tools/omdb/tests/test_all_output.rs | 6 ++- 4 files changed, 43 insertions(+), 22 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 8967da910e1..d4833c6d187 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -45,7 +45,7 @@ use clap::builder::PossibleValuesParser; use clap::builder::TypedValueParser; use db_metadata::DbMetadataArgs; use db_metadata::DbMetadataCommands; -use db_metadata::cmd_db_metadata_force_nexus_quiesce; +use db_metadata::cmd_db_metadata_force_mark_nexus_quiesced; use db_metadata::cmd_db_metadata_list_nexus; use diesel::BoolExpressionMethods; use diesel::ExpressionMethods; @@ -1150,10 +1150,10 @@ impl DbArgs { cmd_db_metadata_list_nexus(&opctx, &datastore).await } DbCommands::DbMetadata(DbMetadataArgs { - command: DbMetadataCommands::ForceNexusQuiesce(args), + command: DbMetadataCommands::ForceMarkNexusQuiesced(args), }) => { let token = omdb.check_allow_destructive()?; - cmd_db_metadata_force_nexus_quiesce(&opctx, &datastore, args, token).await + cmd_db_metadata_force_mark_nexus_quiesced(&opctx, &datastore, args, token).await } DbCommands::CrucibleDataset(CrucibleDatasetArgs { command: CrucibleDatasetCommands::List, diff --git a/dev-tools/omdb/src/bin/omdb/db/db_metadata.rs b/dev-tools/omdb/src/bin/omdb/db/db_metadata.rs index ed3af5e3db3..bf2fb50dbe9 100644 --- a/dev-tools/omdb/src/bin/omdb/db/db_metadata.rs +++ b/dev-tools/omdb/src/bin/omdb/db/db_metadata.rs @@ -7,6 +7,7 @@ use super::display_option_blank; use crate::check_allow_destructive::DestructiveOperationToken; +use crate::helpers::ConfirmationPrompt; use anyhow::Context; use anyhow::bail; use clap::ArgAction; @@ -47,21 +48,26 @@ pub enum DbMetadataCommands { /// This operation is intended to assist in the explicit case where a Nexus /// is unable to finish marking itself quiesced during the handoff process, /// and cannot be expunged. - ForceNexusQuiesce(ForceNexusQuiesceArgs), + ForceMarkNexusQuiesced(ForceMarkNexusQuiescedArgs), } #[derive(Debug, Args, Clone)] -pub struct ForceNexusQuiesceArgs { +pub struct ForceMarkNexusQuiescedArgs { /// The UUID of the Nexus zone to be marked quiesced id: OmicronZoneUuid, - /// If "true": don't bother parsing the target blueprint to identify the - /// validity of the [`id`] argument. + /// Skip checking the target blueprint to determine whether Nexus zone `id` + /// is from the generation of Nexus zones that could be active or handing + /// off. /// - /// Forcing Nexus to quiesce is already an unsafe operation; this makes - /// it even less safe. Use with caution. + /// Manually marking Nexus quiesced is already an unsafe operation; this + /// makes it even less safe. Use with caution. #[arg(long, action=ArgAction::SetTrue)] - ignore_target_blueprint: bool, + skip_blueprint_validation: bool, + + /// Skip confirmation prompt to verify that this operation is intended. + #[arg(long, action=ArgAction::SetTrue)] + skip_confirmation: bool, } // DB Metadata @@ -186,13 +192,22 @@ pub async fn cmd_db_metadata_list_nexus( Ok(()) } -pub async fn cmd_db_metadata_force_nexus_quiesce( +pub async fn cmd_db_metadata_force_mark_nexus_quiesced( opctx: &OpContext, datastore: &DataStore, - args: &ForceNexusQuiesceArgs, + args: &ForceMarkNexusQuiescedArgs, _destruction_token: DestructiveOperationToken, ) -> Result<(), anyhow::Error> { - if !args.ignore_target_blueprint { + if !args.skip_confirmation { + println!( + "\nDo you want to mark Nexus {} as quiesced in the database?", + args.id + ); + let mut prompt = ConfirmationPrompt::new(); + prompt.read_and_validate("y/N", "y")?; + } + + if !args.skip_blueprint_validation { let (_, current_target_blueprint) = datastore .blueprint_target_get_current_full(opctx) .await @@ -220,7 +235,7 @@ pub async fn cmd_db_metadata_force_nexus_quiesce( } datastore.database_nexus_access_update_quiesced(args.id).await?; - println!("Quiesced {}", args.id); + println!("Marked {} quiesced", args.id); Ok(()) } diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index ebbf542c4ae..faed110eadf 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -11,7 +11,7 @@ stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected () ============================================= -EXECUTING COMMAND: omdb ["--destructive", "db", "db-metadata", "force-nexus-quiesce", "....................."] +EXECUTING COMMAND: omdb ["--destructive", "db", "db-metadata", "force-mark-nexus-quiesced", "--skip-confirmation", "....................."] termination: Exited(1) --------------------------------------------- stdout: @@ -1766,13 +1766,17 @@ note: database schema version matches expected () assembling reconfigurator state ... done wrote ============================================= -EXECUTING COMMAND: omdb ["--destructive", "db", "db-metadata", "force-nexus-quiesce", "--ignore-target-blueprint", "....................."] -termination: Exited(0) +EXECUTING COMMAND: omdb ["--destructive", "db", "db-metadata", "force-mark-nexus-quiesced", "--skip-confirmation", "--ignore-target-blueprint", "....................."] +termination: Exited(2) --------------------------------------------- stdout: -Quiesced ..................... --------------------------------------------- stderr: -note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected () +error: unexpected argument '--ignore-target-blueprint' found + + tip: to pass '--ignore-target-blueprint' as a value, use '-- --ignore-target-blueprint' + +Usage: omdb db db-metadata force-mark-nexus-quiesced --skip-confirmation + +For more information, try '--help'. ============================================= diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 9cd1586756b..663125004ff 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -188,7 +188,8 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { "--destructive", "db", "db-metadata", - "force-nexus-quiesce", + "force-mark-nexus-quiesced", + "--skip-confirmation", &cptestctx.server.server_context().nexus.id().to_string(), ], &["db", "disks", "list"], @@ -297,7 +298,8 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { "--destructive", "db", "db-metadata", - "force-nexus-quiesce", + "force-mark-nexus-quiesced", + "--skip-confirmation", "--ignore-target-blueprint", &cptestctx.server.server_context().nexus.id().to_string(), ], From ebe02fc0d3b09ac52875ea390a1f16b703c60aa6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 29 Sep 2025 16:43:56 -0700 Subject: [PATCH 6/6] actually fix test --- dev-tools/omdb/tests/successes.out | 14 +++++--------- dev-tools/omdb/tests/test_all_output.rs | 2 +- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index faed110eadf..3d0550ab324 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -1766,17 +1766,13 @@ note: database schema version matches expected () assembling reconfigurator state ... done wrote ============================================= -EXECUTING COMMAND: omdb ["--destructive", "db", "db-metadata", "force-mark-nexus-quiesced", "--skip-confirmation", "--ignore-target-blueprint", "....................."] -termination: Exited(2) +EXECUTING COMMAND: omdb ["--destructive", "db", "db-metadata", "force-mark-nexus-quiesced", "--skip-confirmation", "--skip-blueprint-validation", "....................."] +termination: Exited(0) --------------------------------------------- stdout: +Marked ..................... quiesced --------------------------------------------- stderr: -error: unexpected argument '--ignore-target-blueprint' found - - tip: to pass '--ignore-target-blueprint' as a value, use '-- --ignore-target-blueprint' - -Usage: omdb db db-metadata force-mark-nexus-quiesced --skip-confirmation - -For more information, try '--help'. +note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable +note: database schema version matches expected () ============================================= diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 663125004ff..e8807753032 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -300,7 +300,7 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { "db-metadata", "force-mark-nexus-quiesced", "--skip-confirmation", - "--ignore-target-blueprint", + "--skip-blueprint-validation", &cptestctx.server.server_context().nexus.id().to_string(), ], ];