Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions dev-tools/omdb/src/bin/omdb/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ use clap::builder::PossibleValuesParser;
use clap::builder::TypedValueParser;
use db_metadata::DbMetadataArgs;
use db_metadata::DbMetadataCommands;
use db_metadata::cmd_db_metadata_force_mark_nexus_quiesced;
use db_metadata::cmd_db_metadata_list_nexus;
use diesel::BoolExpressionMethods;
use diesel::ExpressionMethods;
Expand Down Expand Up @@ -1148,6 +1149,12 @@ impl DbArgs {
}) => {
cmd_db_metadata_list_nexus(&opctx, &datastore).await
}
DbCommands::DbMetadata(DbMetadataArgs {
command: DbMetadataCommands::ForceMarkNexusQuiesced(args),
}) => {
let token = omdb.check_allow_destructive()?;
cmd_db_metadata_force_mark_nexus_quiesced(&opctx, &datastore, args, token).await
}
DbCommands::CrucibleDataset(CrucibleDatasetArgs {
command: CrucibleDatasetCommands::List,
}) => {
Expand Down
87 changes: 87 additions & 0 deletions dev-tools/omdb/src/bin/omdb/db/db_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
//! `omdb db db_metadata` subcommands

use super::display_option_blank;

use crate::check_allow_destructive::DestructiveOperationToken;
use crate::helpers::ConfirmationPrompt;
use anyhow::Context;
use anyhow::bail;
use clap::ArgAction;
use clap::Args;
use clap::Subcommand;
use nexus_db_model::DbMetadataNexusState;
Expand All @@ -27,8 +32,42 @@ pub struct DbMetadataArgs {

#[derive(Debug, Subcommand, Clone)]
pub enum DbMetadataCommands {
/// Lists the `db_metadata_nexus` records for all Nexuses.
#[clap(alias = "ls-nexus")]
ListNexus,

/// !!! DANGEROUS !!! Updates a `db_metadata_nexus` record to 'Quiesced'
///
/// THIS OPERATION IS DANGEROUS. It is the responsibility of the caller
/// to ensure that the specified Nexus zone is not running.
///
/// If the Nexus being updated is actually running, this operation
/// may cause arbitrary data corruption, as it can allow multiple Nexuses
/// at distinct database verions to inadvertently be running concurrently.
///
/// This operation is intended to assist in the explicit case where a Nexus
/// is unable to finish marking itself quiesced during the handoff process,
/// and cannot be expunged.
ForceMarkNexusQuiesced(ForceMarkNexusQuiescedArgs),
}

#[derive(Debug, Args, Clone)]
pub struct ForceMarkNexusQuiescedArgs {
/// The UUID of the Nexus zone to be marked quiesced
id: OmicronZoneUuid,

/// Skip checking the target blueprint to determine whether Nexus zone `id`
/// is from the generation of Nexus zones that could be active or handing
/// off.
///
/// Manually marking Nexus quiesced is already an unsafe operation; this
/// makes it even less safe. Use with caution.
#[arg(long, action=ArgAction::SetTrue)]
skip_blueprint_validation: bool,

/// Skip confirmation prompt to verify that this operation is intended.
#[arg(long, action=ArgAction::SetTrue)]
skip_confirmation: bool,
}

// DB Metadata
Expand Down Expand Up @@ -152,3 +191,51 @@ pub async fn cmd_db_metadata_list_nexus(

Ok(())
}

pub async fn cmd_db_metadata_force_mark_nexus_quiesced(
opctx: &OpContext,
datastore: &DataStore,
args: &ForceMarkNexusQuiescedArgs,
_destruction_token: DestructiveOperationToken,
) -> Result<(), anyhow::Error> {
if !args.skip_confirmation {
println!(
"\nDo you want to mark Nexus {} as quiesced in the database?",
args.id
);
let mut prompt = ConfirmationPrompt::new();
prompt.read_and_validate("y/N", "y")?;
}

if !args.skip_blueprint_validation {
let (_, current_target_blueprint) = datastore
.blueprint_target_get_current_full(opctx)
.await
.context("loading current target blueprint")?;
let nexus_generation = current_target_blueprint
.all_nexus_zones(BlueprintZoneDisposition::is_in_service)
.find_map(|(_, zone, nexus_zone)| {
if zone.id == args.id {
Some(nexus_zone.nexus_generation)
} else {
None
}
});

let Some(gen) = nexus_generation else {
bail!("Nexus {} not found in blueprint", args.id);
};
let bp_gen = current_target_blueprint.nexus_generation;
if bp_gen <= gen {
bail!(
"Nexus {} not ready to quiesce (nexus generation {gen} >= blueprint gen {bp_gen})",
args.id
);
}
}

datastore.database_nexus_access_update_quiesced(args.id).await?;
println!("Marked {} quiesced", args.id);

Ok(())
}
20 changes: 20 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ stderr:
note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable
note: database schema version matches expected (<redacted database version>)
=============================================
EXECUTING COMMAND: omdb ["--destructive", "db", "db-metadata", "force-mark-nexus-quiesced", "--skip-confirmation", "..........<REDACTED_UUID>..........."]
termination: Exited(1)
---------------------------------------------
stdout:
---------------------------------------------
stderr:
note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable
note: database schema version matches expected (<redacted database version>)
Error: Nexus ..........<REDACTED_UUID>........... not ready to quiesce (nexus generation 1 >= blueprint gen 1)
=============================================
EXECUTING COMMAND: omdb ["db", "disks", "list"]
termination: Exited(0)
---------------------------------------------
Expand Down Expand Up @@ -1756,3 +1766,13 @@ note: database schema version matches expected (<redacted database version>)
assembling reconfigurator state ... done
wrote <TMP_PATH_REDACTED>
=============================================
EXECUTING COMMAND: omdb ["--destructive", "db", "db-metadata", "force-mark-nexus-quiesced", "--skip-confirmation", "--skip-blueprint-validation", "..........<REDACTED_UUID>..........."]
termination: Exited(0)
---------------------------------------------
stdout:
Marked ..........<REDACTED_UUID>........... quiesced
---------------------------------------------
stderr:
note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable
note: database schema version matches expected (<redacted database version>)
=============================================
29 changes: 29 additions & 0 deletions dev-tools/omdb/tests/test_all_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,20 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) {

let invocations: &[&[&str]] = &[
&["db", "db-metadata", "ls-nexus"],
// We expect this operation to fail (the nexus generation is the same
// as the one in the target blueprint - it shouldn't be trying to
// quiesce yet).
//
// We test a version of this command which sets this record to quiesced
// anyway as the final invocation.
&[
"--destructive",
"db",
"db-metadata",
"force-mark-nexus-quiesced",
"--skip-confirmation",
&cptestctx.server.server_context().nexus.id().to_string(),
],
&["db", "disks", "list"],
&["db", "dns", "show"],
&["db", "dns", "diff", "external", "2"],
Expand Down Expand Up @@ -274,6 +288,21 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) {
// We can't easily test the sled agent output because that's only
// provided by a real sled agent, which is not available in the
// ControlPlaneTestContext.

// This operation will set the "db_metadata_nexus" state to quiesced.
//
// This would normally only be set by a Nexus as it shuts itself down;
// save it for last to avoid causing a weird state while testing other
// commands.
&[
"--destructive",
"db",
"db-metadata",
"force-mark-nexus-quiesced",
"--skip-confirmation",
"--skip-blueprint-validation",
&cptestctx.server.server_context().nexus.id().to_string(),
],
];

let mut redactor = Redactor::default();
Expand Down
Loading