From 319da587e3c599cb5475b57e2a6239fcb43961b5 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Fri, 12 Jun 2026 10:32:12 -0700 Subject: [PATCH 01/29] okay i think this might actually work --- nexus/db-model/src/ereporter_restart.rs | 22 +++ nexus/db-model/src/lib.rs | 2 + nexus/db-queries/src/db/datastore/ereport.rs | 164 ++++++++++++++++++ .../tests/output/ereports_insert_host.sql | 44 +++++ .../tests/output/ereports_insert_sp.sql | 44 +++++ nexus/db-schema/src/schema.rs | 10 ++ schema/crdb/dbinit.sql | 23 +++ 7 files changed, 309 insertions(+) create mode 100644 nexus/db-model/src/ereporter_restart.rs create mode 100644 nexus/db-queries/tests/output/ereports_insert_host.sql create mode 100644 nexus/db-queries/tests/output/ereports_insert_sp.sql diff --git a/nexus/db-model/src/ereporter_restart.rs b/nexus/db-model/src/ereporter_restart.rs new file mode 100644 index 00000000000..57210cdc0ee --- /dev/null +++ b/nexus/db-model/src/ereporter_restart.rs @@ -0,0 +1,22 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::DbTypedUuid; +use crate::EreporterType; +use crate::SpMgsSlot; +use crate::SpType; +use chrono::DateTime; +use chrono::Utc; +use nexus_db_schema::schema::ereporter_restart; +use omicron_uuid_kinds::EreporterRestartKind; + +#[derive(Clone, Debug, Insertable, Queryable, Selectable)] +#[diesel(table_name = ereporter_restart)] +pub struct EreporterRestart { + pub id: DbTypedUuid, + pub time_first_seen: DateTime, + pub reporter: EreporterType, + pub slot_type: SpType, + pub slot: Option, +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index ecd3402e45b..3705e2839aa 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -148,6 +148,7 @@ mod disk_type_local_storage; mod dns; mod downstairs; pub mod ereport; +mod ereporter_restart; mod ereporter_type; mod external_ip; mod external_subnet; @@ -301,6 +302,7 @@ pub use disk_type_local_storage::*; pub use dns::*; pub use downstairs::*; pub use ereport::Ereport; +pub use ereporter_restart::*; pub use ereporter_type::*; pub use external_ip::*; pub use external_subnet::*; diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index 166eaf47467..1c71169116b 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -8,7 +8,9 @@ use super::DataStore; use crate::authz; use crate::context::OpContext; use crate::db::datastore::RunnableQuery; +use crate::db::model::DbTypedUuid; use crate::db::model::Ereport; +use crate::db::model::EreporterType; use crate::db::model::SpMgsSlot; use crate::db::model::SpType; use crate::db::model::SqlU16; @@ -23,12 +25,17 @@ use chrono::DateTime; use chrono::Utc; use diesel::AggregateExpressionMethods; use diesel::dsl::{count, min}; +use diesel::helper_types::*; +use diesel::pg::Pg; use diesel::prelude::*; +use diesel::query_builder::*; +use diesel::query_dsl::methods as query_methods; use diesel::sql_types; use nexus_db_errors::ErrorHandler; use nexus_db_errors::public_error_from_diesel; use nexus_db_lookup::DbConnection; use nexus_db_schema::schema::ereport::dsl; +use nexus_db_schema::schema::ereporter_restart::dsl as restart_dsl; use nexus_types::fm::ereport as fm; use nexus_types::fm::ereport::EreportFilters; use nexus_types::fm::ereport::EreportId; @@ -37,6 +44,7 @@ use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; +use omicron_uuid_kinds::EreporterRestartKind; use omicron_uuid_kinds::EreporterRestartUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SitrepUuid; @@ -319,6 +327,46 @@ impl DataStore { Ok((created, latest)) } + fn ereports_insert_query( + restart_id: EreporterRestartUuid, + time_collected: chrono::DateTime, + reporter: fm::Reporter, + ereports: impl IntoIterator, + ) -> impl RunnableQuery { + let ereports = ereports + .into_iter() + .map(|data| Ereport::new(data, reporter)) + .collect::>(); + let (reporter, slot_type, slot) = match reporter { + fm::Reporter::HostOs { slot, .. } => { + (EreporterType::Host, SpType::Sled, slot.map(SpMgsSlot::from)) + } + fm::Reporter::Sp { sp_type, slot } => { + (EreporterType::Sp, sp_type.into(), Some(SpMgsSlot::from(slot))) + } + }; + let insert_ereports = diesel::insert_into(dsl::ereport) + .values(ereports) + .on_conflict((dsl::restart_id, dsl::ena)) + .do_nothing(); + let insert_reporter = + diesel::insert_into(restart_dsl::ereporter_restart) + .values(crate::db::model::EreporterRestart { + id: restart_id.into(), + time_first_seen: time_collected, + reporter, + slot_type, + slot, + }) + .on_conflict(restart_dsl::id) + .do_update() + // TODO(eliza): figure out how to make the `ON CONFLICT DO + // UPDATE` not set the slot to NULL when it was previously + // non-null + .set(restart_dsl::slot.eq(slot)); + EreportInsertQuery { insert_ereports, insert_reporter } + } + /// Lists ereports which have not been marked as **definitely seen** /// (included in a committed sitrep) in the database, restricted to /// ereports whose `class` is one of `classes`, paginated by the reporter @@ -485,6 +533,49 @@ impl DataStore { } } +struct EreportInsertQuery { + insert_ereports: IE, + insert_reporter: IR, + // latest: L, + // restart_id: EreporterRestartUuid, + // ereports: Vec, + // reporter: fm::Reporter, + // time_collected: chrono::DateTime, +} + +impl QueryId for EreportInsertQuery { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl Query for EreportInsertQuery { + type SqlType = sql_types::BigInt; +} + +impl diesel::RunQueryDsl + for EreportInsertQuery +{ +} + +impl QueryFragment for EreportInsertQuery +where + IE: QueryFragment, + IR: QueryFragment, + // L: QueryFragment, +{ + fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> QueryResult<()> { + out.push_sql("WITH inserted_ereports AS ( "); + self.insert_ereports.walk_ast(out.reborrow())?; + out.push_sql("), inserted_reporter AS ("); + self.insert_reporter.walk_ast(out.reborrow())?; + // out.push_sql("), latest AS ("); + // self.latest.walk_ast(out.reborrow())?; + + out.push_sql(") SELECT (inserted_ereports)"); + Ok(()) + } +} + #[cfg(test)] mod tests { use super::*; @@ -493,6 +584,7 @@ mod tests { use crate::db::pub_test_utils::explain::assert_uses_partial_index_only; use crate::db::raw_query_builder::expectorate_query_contents; use dropshot::PaginationOrder; + use ereport_types::Ena; use omicron_test_utils::dev; use omicron_uuid_kinds::OmicronZoneUuid; use std::num::NonZeroU32; @@ -835,6 +927,78 @@ mod tests { logctx.cleanup_successful(); } + #[tokio::test] + async fn expectorate_ereports_insert_sp() { + let restart_id = EreporterRestartUuid::nil(); + let collector_id = OmicronZoneUuid::nil(); + let query = DataStore::ereports_insert_query( + restart_id, + DateTime::::MIN_UTC, + fm::Reporter::Sp { sp_type: SpType::Sled.into(), slot: 16 }, + vec![ + fm::EreportData { + id: fm::EreportId { restart_id, ena: Ena(2) }, + time_collected: DateTime::::MIN_UTC, + collector_id, + part_number: Some("my cool CPN".to_string()), + serial_number: Some("my cool serial".to_string()), + class: Some("my cool ereport".to_string()), + report: serde_json::json!({}), + }, + fm::EreportData { + id: fm::EreportId { restart_id, ena: Ena(3) }, + time_collected: DateTime::::MIN_UTC, + collector_id, + part_number: Some("my cool CPN".to_string()), + serial_number: Some("my cool serial".to_string()), + class: Some("my other ereport".to_string()), + report: serde_json::json!({}), + }, + ], + ); + expectorate_query_contents( + &query, + "tests/output/ereports_insert_sp.sql", + ) + .await; + } + + #[tokio::test] + async fn expectorate_ereports_insert_host() { + let restart_id = EreporterRestartUuid::nil(); + let collector_id = OmicronZoneUuid::nil(); + let query = DataStore::ereports_insert_query( + restart_id, + DateTime::::MIN_UTC, + fm::Reporter::HostOs { slot: Some(16), sled: SledUuid::nil() }, + vec![ + fm::EreportData { + id: fm::EreportId { restart_id, ena: Ena(2) }, + time_collected: DateTime::::MIN_UTC, + collector_id, + part_number: Some("my cool CPN".to_string()), + serial_number: Some("my cool serial".to_string()), + class: Some("my cool ereport".to_string()), + report: serde_json::json!({}), + }, + fm::EreportData { + id: fm::EreportId { restart_id, ena: Ena(3) }, + time_collected: DateTime::::MIN_UTC, + collector_id, + part_number: Some("my cool CPN".to_string()), + serial_number: Some("my cool serial".to_string()), + class: Some("my other ereport".to_string()), + report: serde_json::json!({}), + }, + ], + ); + expectorate_query_contents( + &query, + "tests/output/ereports_insert_host.sql", + ) + .await; + } + // This test tests that the `ereport_fetch_matching` queries succeed with // filters that only select ereports over a time range, and the default (no // filters). diff --git a/nexus/db-queries/tests/output/ereports_insert_host.sql b/nexus/db-queries/tests/output/ereports_insert_host.sql new file mode 100644 index 00000000000..ca6536d8f5a --- /dev/null +++ b/nexus/db-queries/tests/output/ereports_insert_host.sql @@ -0,0 +1,44 @@ +WITH + inserted_ereports + AS ( + INSERT + INTO + ereport + ( + restart_id, + ena, + time_deleted, + time_collected, + collector_id, + part_number, + serial_number, + class, + report, + reporter, + sled_id, + slot_type, + slot, + marked_seen_in + ) + VALUES + ($1, $2, DEFAULT, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, DEFAULT), + ($13, $14, DEFAULT, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, DEFAULT) + ON CONFLICT + (restart_id, ena) + DO + NOTHING + ), + inserted_reporter + AS ( + INSERT + INTO + ereporter_restart (id, time_first_seen, reporter, slot_type, slot) + VALUES + ($25, $26, $27, $28, $29) + ON CONFLICT + (id) + DO + UPDATE SET slot = $30 + ) +SELECT + inserted_ereports diff --git a/nexus/db-queries/tests/output/ereports_insert_sp.sql b/nexus/db-queries/tests/output/ereports_insert_sp.sql new file mode 100644 index 00000000000..a62c39802d0 --- /dev/null +++ b/nexus/db-queries/tests/output/ereports_insert_sp.sql @@ -0,0 +1,44 @@ +WITH + inserted_ereports + AS ( + INSERT + INTO + ereport + ( + restart_id, + ena, + time_deleted, + time_collected, + collector_id, + part_number, + serial_number, + class, + report, + reporter, + sled_id, + slot_type, + slot, + marked_seen_in + ) + VALUES + ($1, $2, DEFAULT, $3, $4, $5, $6, $7, $8, $9, DEFAULT, $10, $11, DEFAULT), + ($12, $13, DEFAULT, $14, $15, $16, $17, $18, $19, $20, DEFAULT, $21, $22, DEFAULT) + ON CONFLICT + (restart_id, ena) + DO + NOTHING + ), + inserted_reporter + AS ( + INSERT + INTO + ereporter_restart (id, time_first_seen, reporter, slot_type, slot) + VALUES + ($23, $24, $25, $26, $27) + ON CONFLICT + (id) + DO + UPDATE SET slot = $28 + ) +SELECT + inserted_ereports diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 0e4a3e8ab09..adaaf1f4487 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -2972,6 +2972,16 @@ table! { } } +table! { + ereporter_restart (id) { + id -> Uuid, + time_first_seen -> Timestamptz, + reporter -> crate::enums::EreporterTypeEnum, + slot_type -> crate::enums::SpTypeEnum, + slot -> Nullable, + } +} + table! { user_data_export (id) { id -> Uuid, diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 1ce1140402c..2b150f49117 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7626,6 +7626,29 @@ WHERE marked_seen_in IS NULL AND time_deleted IS NULL; +-- Table tracking the timestamp of the first ereport received from each restart +-- ID. +CREATE TABLE IF NOT EXISTS omicron.public.ereport_restart ( + -- The reporter restart ID. + -- + -- This corresponds to the `restart_id` column in `omicron.public.ereport`. + id UUID PRIMARY KEY, + -- The timestamp at which the first ereport received from this restart ID + -- was received. + time_first_seen TIMESTAMPTZ NOT NULL, + -- Whether this ereport was generated by SP firmware or the host OS. + reporter omicron.public.ereporter_type NOT NULL, + -- The type of the physical slot occupied by the reporter. + slot_type omicron.public.sp_type NOT NULL, + -- The number of the physical slot occupied by the reporter. + -- + -- For sled host OS reporters, this may be NULL if the sled's location is + -- not known to the system when the ereport was received. If the physical + -- location of the sled is determined later, subsequent attempts to insert + -- ereports will update this field. + slot_number INT NOT NULL +); + /* * Fault management situation reports (and accessories) * From b2fff05e5000a26854856ca2abe0ff904415f9fe Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Fri, 12 Jun 2026 11:03:51 -0700 Subject: [PATCH 02/29] diesel hell --- nexus/db-queries/src/db/datastore/ereport.rs | 127 +++++++++++++------ 1 file changed, 90 insertions(+), 37 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index 1c71169116b..638f0489572 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -332,39 +332,64 @@ impl DataStore { time_collected: chrono::DateTime, reporter: fm::Reporter, ereports: impl IntoIterator, - ) -> impl RunnableQuery { + ) -> impl RunnableQuery<(i64, Option, Option)> { let ereports = ereports .into_iter() .map(|data| Ereport::new(data, reporter)) .collect::>(); - let (reporter, slot_type, slot) = match reporter { - fm::Reporter::HostOs { slot, .. } => { - (EreporterType::Host, SpType::Sled, slot.map(SpMgsSlot::from)) + let (reporter, slot_type, slot, latest_query) = match reporter { + fm::Reporter::HostOs { slot, sled, .. } => { + let latest = dsl::ereport + .filter(dsl::sled_id.eq(sled.into_untyped_uuid())) + .filter(dsl::time_deleted.is_null()) + .order_by((dsl::time_collected.desc(), dsl::ena.desc())) + .limit(1) + .select((dsl::restart_id, dsl::ena)); + ( + EreporterType::Host, + SpType::Sled, + slot.map(SqlU16::from), + LatestQuery::Host(latest), + ) } fm::Reporter::Sp { sp_type, slot } => { - (EreporterType::Sp, sp_type.into(), Some(SpMgsSlot::from(slot))) + let sp_type = sp_type.into(); + let slot = SqlU16::from(slot); + let latest = dsl::ereport + .filter(dsl::slot_type.eq(sp_type)) + .filter(dsl::slot.eq(Some(slot))) + .filter(dsl::time_deleted.is_null()) + .order_by((dsl::time_collected.desc(), dsl::ena.desc())) + .limit(1) + .select((dsl::restart_id, dsl::ena)); + ( + EreporterType::Sp, + sp_type, + Some(slot), + LatestQuery::Sp(latest), + ) } }; let insert_ereports = diesel::insert_into(dsl::ereport) .values(ereports) .on_conflict((dsl::restart_id, dsl::ena)) .do_nothing(); - let insert_reporter = - diesel::insert_into(restart_dsl::ereporter_restart) - .values(crate::db::model::EreporterRestart { - id: restart_id.into(), - time_first_seen: time_collected, - reporter, - slot_type, - slot, - }) - .on_conflict(restart_dsl::id) - .do_update() - // TODO(eliza): figure out how to make the `ON CONFLICT DO - // UPDATE` not set the slot to NULL when it was previously - // non-null - .set(restart_dsl::slot.eq(slot)); - EreportInsertQuery { insert_ereports, insert_reporter } + let insert_reporter = diesel::insert_into( + restart_dsl::ereporter_restart, + ) + .values(crate::db::model::EreporterRestart { + id: restart_id.into(), + time_first_seen: time_collected, + reporter, + slot_type, + slot: slot.map(SpMgsSlot::from), + }); + EreportInsertQuery { + insert_ereports, + insert_reporter, + slot, + latest_query, + } } /// Lists ereports which have not been marked as **definitely seen** @@ -533,45 +558,73 @@ impl DataStore { } } -struct EreportInsertQuery { +struct EreportInsertQuery { insert_ereports: IE, insert_reporter: IR, - // latest: L, - // restart_id: EreporterRestartUuid, - // ereports: Vec, - // reporter: fm::Reporter, - // time_collected: chrono::DateTime, + slot: Option, + latest_query: LatestQuery, +} + +enum LatestQuery { + Sp(LS), + Host(LH), } -impl QueryId for EreportInsertQuery { +type SelectableSqlType = + <>::SelectExpression as Expression>::SqlType; + +impl QueryId for EreportInsertQuery { type QueryId = (); const HAS_STATIC_QUERY_ID: bool = false; } -impl Query for EreportInsertQuery { - type SqlType = sql_types::BigInt; +impl Query for EreportInsertQuery { + type SqlType = ( + sql_types::BigInt, + Nullable>, + Nullable>, + ); } -impl diesel::RunQueryDsl - for EreportInsertQuery +impl diesel::RunQueryDsl + for EreportInsertQuery { } -impl QueryFragment for EreportInsertQuery +impl QueryFragment for EreportInsertQuery where IE: QueryFragment, IR: QueryFragment, - // L: QueryFragment, + LS: QueryFragment, + LH: QueryFragment, { fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> QueryResult<()> { out.push_sql("WITH inserted_ereports AS ( "); self.insert_ereports.walk_ast(out.reborrow())?; + out.push_sql("), inserted_reporter AS ("); self.insert_reporter.walk_ast(out.reborrow())?; - // out.push_sql("), latest AS ("); - // self.latest.walk_ast(out.reborrow())?; + out.push_sql(" ON CONFLICT (id) DO "); + // If we have a slot number, update it so that a previously-null slot + // number is filled in; if we do not, do nothing on conflict so a + // previously non-NULL slot is not clobbered. + if let Some(ref slot) = self.slot { + out.push_sql("UPDATE SET slot = "); + out.push_bind_param::(slot)?; + } else { + out.push_sql("NOTHING"); + } - out.push_sql(") SELECT (inserted_ereports)"); + out.push_sql("), latest AS ("); + match self.latest_query { + LatestQuery::Sp(ref query) => { + query.walk_ast(out.reborrow())?; + } + LatestQuery::Host(ref query) => { + query.walk_ast(out.reborrow())?; + } + } + out.push_sql(") SELECT (inserted_ereports, latest.*)"); Ok(()) } } From 5602f0336cda937af269c982f223bdd79b76bf67 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Fri, 12 Jun 2026 11:10:19 -0700 Subject: [PATCH 03/29] okay get the latest query in there too --- nexus/db-queries/src/db/datastore/ereport.rs | 7 ++----- .../tests/output/ereports_insert_host.sql | 15 ++++++++++++++- .../tests/output/ereports_insert_sp.sql | 15 ++++++++++++++- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index 638f0489572..02d8784335c 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -570,9 +570,6 @@ enum LatestQuery { Host(LH), } -type SelectableSqlType = - <>::SelectExpression as Expression>::SqlType; - impl QueryId for EreportInsertQuery { type QueryId = (); const HAS_STATIC_QUERY_ID: bool = false; @@ -581,8 +578,8 @@ impl QueryId for EreportInsertQuery { impl Query for EreportInsertQuery { type SqlType = ( sql_types::BigInt, - Nullable>, - Nullable>, + sql_types::Nullable, + sql_types::Nullable, ); } diff --git a/nexus/db-queries/tests/output/ereports_insert_host.sql b/nexus/db-queries/tests/output/ereports_insert_host.sql index ca6536d8f5a..a73ba7487d1 100644 --- a/nexus/db-queries/tests/output/ereports_insert_host.sql +++ b/nexus/db-queries/tests/output/ereports_insert_host.sql @@ -39,6 +39,19 @@ WITH (id) DO UPDATE SET slot = $30 + ), + latest + AS ( + SELECT + ereport.restart_id, ereport.ena + FROM + ereport + WHERE + ereport.sled_id = $31 AND (ereport.time_deleted IS NULL) + ORDER BY + ereport.time_collected DESC, ereport.ena DESC + LIMIT + $32 ) SELECT - inserted_ereports + (inserted_ereports, latest.*) diff --git a/nexus/db-queries/tests/output/ereports_insert_sp.sql b/nexus/db-queries/tests/output/ereports_insert_sp.sql index a62c39802d0..eba90ecf704 100644 --- a/nexus/db-queries/tests/output/ereports_insert_sp.sql +++ b/nexus/db-queries/tests/output/ereports_insert_sp.sql @@ -39,6 +39,19 @@ WITH (id) DO UPDATE SET slot = $28 + ), + latest + AS ( + SELECT + ereport.restart_id, ereport.ena + FROM + ereport + WHERE + (ereport.slot_type = $29 AND ereport.slot = $30) AND (ereport.time_deleted IS NULL) + ORDER BY + ereport.time_collected DESC, ereport.ena DESC + LIMIT + $31 ) SELECT - inserted_ereports + (inserted_ereports, latest.*) From 972cfc2b4495ea293d7b48bdeea119d59013db00 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Fri, 12 Jun 2026 11:11:59 -0700 Subject: [PATCH 04/29] move stuff into other stuff --- nexus/db-queries/src/db/datastore/ereport.rs | 139 ++++++++++--------- 1 file changed, 71 insertions(+), 68 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index 02d8784335c..dbc56190ee6 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -333,6 +333,77 @@ impl DataStore { reporter: fm::Reporter, ereports: impl IntoIterator, ) -> impl RunnableQuery<(i64, Option, Option)> { + struct EreportInsertQuery { + insert_ereports: IE, + insert_reporter: IR, + slot: Option, + latest_query: LatestQuery, + } + + enum LatestQuery { + Sp(LS), + Host(LH), + } + + impl QueryId for EreportInsertQuery { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; + } + + impl Query for EreportInsertQuery { + type SqlType = ( + sql_types::BigInt, + sql_types::Nullable, + sql_types::Nullable, + ); + } + + impl diesel::RunQueryDsl + for EreportInsertQuery + { + } + + impl QueryFragment for EreportInsertQuery + where + IE: QueryFragment, + IR: QueryFragment, + LS: QueryFragment, + LH: QueryFragment, + { + fn walk_ast<'b>( + &'b self, + mut out: AstPass<'_, 'b, Pg>, + ) -> QueryResult<()> { + out.push_sql("WITH inserted_ereports AS ( "); + self.insert_ereports.walk_ast(out.reborrow())?; + + out.push_sql("), inserted_reporter AS ("); + self.insert_reporter.walk_ast(out.reborrow())?; + out.push_sql(" ON CONFLICT (id) DO "); + // If we have a slot number, update it so that a previously-null slot + // number is filled in; if we do not, do nothing on conflict so a + // previously non-NULL slot is not clobbered. + if let Some(ref slot) = self.slot { + out.push_sql("UPDATE SET slot = "); + out.push_bind_param::(slot)?; + } else { + out.push_sql("NOTHING"); + } + + out.push_sql("), latest AS ("); + match self.latest_query { + LatestQuery::Sp(ref query) => { + query.walk_ast(out.reborrow())?; + } + LatestQuery::Host(ref query) => { + query.walk_ast(out.reborrow())?; + } + } + out.push_sql(") SELECT (inserted_ereports, latest.*)"); + Ok(()) + } + } + let ereports = ereports .into_iter() .map(|data| Ereport::new(data, reporter)) @@ -558,74 +629,6 @@ impl DataStore { } } -struct EreportInsertQuery { - insert_ereports: IE, - insert_reporter: IR, - slot: Option, - latest_query: LatestQuery, -} - -enum LatestQuery { - Sp(LS), - Host(LH), -} - -impl QueryId for EreportInsertQuery { - type QueryId = (); - const HAS_STATIC_QUERY_ID: bool = false; -} - -impl Query for EreportInsertQuery { - type SqlType = ( - sql_types::BigInt, - sql_types::Nullable, - sql_types::Nullable, - ); -} - -impl diesel::RunQueryDsl - for EreportInsertQuery -{ -} - -impl QueryFragment for EreportInsertQuery -where - IE: QueryFragment, - IR: QueryFragment, - LS: QueryFragment, - LH: QueryFragment, -{ - fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> QueryResult<()> { - out.push_sql("WITH inserted_ereports AS ( "); - self.insert_ereports.walk_ast(out.reborrow())?; - - out.push_sql("), inserted_reporter AS ("); - self.insert_reporter.walk_ast(out.reborrow())?; - out.push_sql(" ON CONFLICT (id) DO "); - // If we have a slot number, update it so that a previously-null slot - // number is filled in; if we do not, do nothing on conflict so a - // previously non-NULL slot is not clobbered. - if let Some(ref slot) = self.slot { - out.push_sql("UPDATE SET slot = "); - out.push_bind_param::(slot)?; - } else { - out.push_sql("NOTHING"); - } - - out.push_sql("), latest AS ("); - match self.latest_query { - LatestQuery::Sp(ref query) => { - query.walk_ast(out.reborrow())?; - } - LatestQuery::Host(ref query) => { - query.walk_ast(out.reborrow())?; - } - } - out.push_sql(") SELECT (inserted_ereports, latest.*)"); - Ok(()) - } -} - #[cfg(test)] mod tests { use super::*; From 675e2edecc41594491b0bd85fcb5b030e571bcee Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Fri, 12 Jun 2026 11:31:27 -0700 Subject: [PATCH 05/29] okay i can't quite get the latest query inlined into the CTE, that's fine actually --- nexus/db-queries/src/db/datastore/ereport.rs | 135 ++++++++++--------- 1 file changed, 68 insertions(+), 67 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index dbc56190ee6..d139c245ae6 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -297,31 +297,33 @@ impl DataStore { pub async fn ereports_insert( &self, opctx: &OpContext, + restart_id: EreporterRestartUuid, + time_collected: DateTime, reporter: fm::Reporter, ereports: impl IntoIterator, ) -> CreateResult<(usize, Option)> { opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; let conn = self.pool_connection_authorized(opctx).await?; + let ereports = ereports .into_iter() .map(|data| Ereport::new(data, reporter)) .collect::>(); - let created = diesel::insert_into(dsl::ereport) - .values(ereports) - .on_conflict((dsl::restart_id, dsl::ena)) - .do_nothing() - .execute_async(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - .internal_context("failed to insert ereports") - })?; + let created = Self::ereports_insert_query( + restart_id, + time_collected, + reporter, + ereports, + ) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; let latest = self .latest_ereport_id_on_conn(&conn, reporter) .await .map_err(|e| { e.internal_context(format!( - "failed to refresh latest ereport ID for {reporter}" + "failed to refresh latest ereport ID for {reporter}", )) })?; Ok((created, latest)) @@ -331,44 +333,45 @@ impl DataStore { restart_id: EreporterRestartUuid, time_collected: chrono::DateTime, reporter: fm::Reporter, - ereports: impl IntoIterator, - ) -> impl RunnableQuery<(i64, Option, Option)> { - struct EreportInsertQuery { + ereports: Vec, + ) -> impl RunnableQuery { + struct EreportInsertQuery { insert_ereports: IE, insert_reporter: IR, slot: Option, - latest_query: LatestQuery, + // latest_query: LatestQuery, } - enum LatestQuery { - Sp(LS), - Host(LH), - } + // enum LatestQuery { + // Sp(LS), + // Host(LH), + // } - impl QueryId for EreportInsertQuery { + impl QueryId for EreportInsertQuery { type QueryId = (); const HAS_STATIC_QUERY_ID: bool = false; } - impl Query for EreportInsertQuery { - type SqlType = ( - sql_types::BigInt, - sql_types::Nullable, - sql_types::Nullable, - ); + impl Query for EreportInsertQuery { + // type SqlType = ( + // sql_types::BigInt, + // sql_types::Nullable<(sql_types::Uuid, sql_types::BigInt)>, + // ); + type SqlType = sql_types::BigInt; } - impl diesel::RunQueryDsl - for EreportInsertQuery + impl diesel::RunQueryDsl + for EreportInsertQuery { } - impl QueryFragment for EreportInsertQuery + impl QueryFragment + for EreportInsertQuery where IE: QueryFragment, IR: QueryFragment, - LS: QueryFragment, - LH: QueryFragment, + // LS: QueryFragment, + // LH: QueryFragment, { fn walk_ast<'b>( &'b self, @@ -390,54 +393,51 @@ impl DataStore { out.push_sql("NOTHING"); } - out.push_sql("), latest AS ("); - match self.latest_query { - LatestQuery::Sp(ref query) => { - query.walk_ast(out.reborrow())?; - } - LatestQuery::Host(ref query) => { - query.walk_ast(out.reborrow())?; - } - } + // TODO(eliza): make this part work + // out.push_sql("), latest AS ("); + // match self.latest_query { + // LatestQuery::Sp(ref query) => { + // query.walk_ast(out.reborrow())?; + // } + // LatestQuery::Host(ref query) => { + // query.walk_ast(out.reborrow())?; + // } + // } out.push_sql(") SELECT (inserted_ereports, latest.*)"); Ok(()) } } - let ereports = ereports - .into_iter() - .map(|data| Ereport::new(data, reporter)) - .collect::>(); - let (reporter, slot_type, slot, latest_query) = match reporter { - fm::Reporter::HostOs { slot, sled, .. } => { - let latest = dsl::ereport - .filter(dsl::sled_id.eq(sled.into_untyped_uuid())) - .filter(dsl::time_deleted.is_null()) - .order_by((dsl::time_collected.desc(), dsl::ena.desc())) - .limit(1) - .select((dsl::restart_id, dsl::ena)); + let (reporter, slot_type, slot /*latest_query*/) = match reporter { + fm::Reporter::HostOs { slot /*sled,*/, .. } => { + // let latest = dsl::ereport + // .filter(dsl::sled_id.eq(sled.into_untyped_uuid())) + // .filter(dsl::time_deleted.is_null()) + // .order_by((dsl::time_collected.desc(), dsl::ena.desc())) + // .limit(1) + // .select((dsl::restart_id, dsl::ena)); ( EreporterType::Host, SpType::Sled, slot.map(SqlU16::from), - LatestQuery::Host(latest), + // LatestQuery::Host(latest), ) } fm::Reporter::Sp { sp_type, slot } => { let sp_type = sp_type.into(); let slot = SqlU16::from(slot); - let latest = dsl::ereport - .filter(dsl::slot_type.eq(sp_type)) - .filter(dsl::slot.eq(Some(slot))) - .filter(dsl::time_deleted.is_null()) - .order_by((dsl::time_collected.desc(), dsl::ena.desc())) - .limit(1) - .select((dsl::restart_id, dsl::ena)); + // let latest = dsl::ereport + // .filter(dsl::slot_type.eq(sp_type)) + // .filter(dsl::slot.eq(Some(slot))) + // .filter(dsl::time_deleted.is_null()) + // .order_by((dsl::time_collected.desc(), dsl::ena.desc())) + // .limit(1) + // .select((dsl::restart_id, dsl::ena)); ( EreporterType::Sp, sp_type, Some(slot), - LatestQuery::Sp(latest), + // LatestQuery::Sp(latest), ) } }; @@ -459,7 +459,7 @@ impl DataStore { insert_ereports, insert_reporter, slot, - latest_query, + // latest_query, } } @@ -1069,13 +1069,12 @@ mod tests { let db = TestDatabase::new_with_datastore(&logctx.log).await; let (opctx, datastore) = (db.opctx(), db.datastore()); - let id = fm::EreportId { - restart_id: EreporterRestartUuid::new_v4(), - ena: ereport_types::Ena(2), - }; + let restart_id = EreporterRestartUuid::new_v4(); + let id = fm::EreportId { restart_id, ena: ereport_types::Ena(2) }; + let time_collected = Utc::now(); let ereport = fm::EreportData { id, - time_collected: Utc::now(), + time_collected, collector_id: OmicronZoneUuid::new_v4(), part_number: Some("my cool CPN".to_string()), serial_number: Some("my cool serial".to_string()), @@ -1085,6 +1084,8 @@ mod tests { datastore .ereports_insert( &opctx, + restart_id, + time_collected, fm::Reporter::Sp { sp_type: nexus_types::inventory::SpType::Sled, slot: 19, From c98befb79d621c567ea9cca59713a2452e3bcda7 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Fri, 12 Jun 2026 12:35:58 -0700 Subject: [PATCH 06/29] whoa it actually works --- nexus/db-queries/src/db/datastore/ereport.rs | 64 +++++++++++++------ nexus/db-queries/src/db/datastore/fm.rs | 7 +- .../tests/output/ereports_insert_host.sql | 21 ++---- .../tests/output/ereports_insert_sp.sql | 21 ++---- .../app/background/tasks/ereport_ingester.rs | 8 ++- .../src/app/background/tasks/fm_rendezvous.rs | 21 +++--- .../tasks/support_bundle_collector.rs | 38 +++++++---- schema/crdb/dbinit.sql | 4 +- 8 files changed, 109 insertions(+), 75 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index d139c245ae6..7384e573153 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -8,7 +8,6 @@ use super::DataStore; use crate::authz; use crate::context::OpContext; use crate::db::datastore::RunnableQuery; -use crate::db::model::DbTypedUuid; use crate::db::model::Ereport; use crate::db::model::EreporterType; use crate::db::model::SpMgsSlot; @@ -25,11 +24,9 @@ use chrono::DateTime; use chrono::Utc; use diesel::AggregateExpressionMethods; use diesel::dsl::{count, min}; -use diesel::helper_types::*; use diesel::pg::Pg; use diesel::prelude::*; use diesel::query_builder::*; -use diesel::query_dsl::methods as query_methods; use diesel::sql_types; use nexus_db_errors::ErrorHandler; use nexus_db_errors::public_error_from_diesel; @@ -44,7 +41,6 @@ use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; -use omicron_uuid_kinds::EreporterRestartKind; use omicron_uuid_kinds::EreporterRestartUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SitrepUuid; @@ -387,11 +383,14 @@ impl DataStore { // number is filled in; if we do not, do nothing on conflict so a // previously non-NULL slot is not clobbered. if let Some(ref slot) = self.slot { - out.push_sql("UPDATE SET slot = "); + out.push_sql("UPDATE SET \"slot\" = "); out.push_bind_param::(slot)?; } else { out.push_sql("NOTHING"); } + // We don't actually need this, but `WITH` clauses have to + // return something, sooo.... + out.push_sql(" RETURNING id) "); // TODO(eliza): make this part work // out.push_sql("), latest AS ("); @@ -403,7 +402,7 @@ impl DataStore { // query.walk_ast(out.reborrow())?; // } // } - out.push_sql(") SELECT (inserted_ereports, latest.*)"); + out.push_sql("SELECT count(*) FROM inserted_ereports"); Ok(()) } } @@ -444,7 +443,8 @@ impl DataStore { let insert_ereports = diesel::insert_into(dsl::ereport) .values(ereports) .on_conflict((dsl::restart_id, dsl::ena)) - .do_nothing(); + .do_nothing() + .returning(dsl::ena); let insert_reporter = diesel::insert_into( restart_dsl::ereporter_restart, ) @@ -984,28 +984,38 @@ mod tests { async fn expectorate_ereports_insert_sp() { let restart_id = EreporterRestartUuid::nil(); let collector_id = OmicronZoneUuid::nil(); + let reporter = + fm::Reporter::Sp { sp_type: SpType::Sled.into(), slot: 16 }; let query = DataStore::ereports_insert_query( restart_id, DateTime::::MIN_UTC, - fm::Reporter::Sp { sp_type: SpType::Sled.into(), slot: 16 }, + reporter, vec![ - fm::EreportData { - id: fm::EreportId { restart_id, ena: Ena(2) }, + Ereport { + restart_id: restart_id.into(), + ena: Ena(2).into(), time_collected: DateTime::::MIN_UTC, - collector_id, + collector_id: collector_id.into(), part_number: Some("my cool CPN".to_string()), serial_number: Some("my cool serial".to_string()), class: Some("my cool ereport".to_string()), report: serde_json::json!({}), + marked_seen_in: None, + time_deleted: None, + reporter: reporter.into(), }, - fm::EreportData { - id: fm::EreportId { restart_id, ena: Ena(3) }, + Ereport { + restart_id: restart_id.into(), + ena: Ena(3).into(), time_collected: DateTime::::MIN_UTC, - collector_id, + collector_id: collector_id.into(), part_number: Some("my cool CPN".to_string()), serial_number: Some("my cool serial".to_string()), class: Some("my other ereport".to_string()), report: serde_json::json!({}), + marked_seen_in: None, + time_deleted: None, + reporter: reporter.into(), }, ], ); @@ -1020,28 +1030,38 @@ mod tests { async fn expectorate_ereports_insert_host() { let restart_id = EreporterRestartUuid::nil(); let collector_id = OmicronZoneUuid::nil(); + let reporter = + fm::Reporter::HostOs { slot: Some(16), sled: SledUuid::nil() }; let query = DataStore::ereports_insert_query( restart_id, DateTime::::MIN_UTC, - fm::Reporter::HostOs { slot: Some(16), sled: SledUuid::nil() }, + reporter, vec![ - fm::EreportData { - id: fm::EreportId { restart_id, ena: Ena(2) }, + Ereport { + restart_id: restart_id.into(), + ena: Ena(2).into(), time_collected: DateTime::::MIN_UTC, - collector_id, + collector_id: collector_id.into(), part_number: Some("my cool CPN".to_string()), serial_number: Some("my cool serial".to_string()), class: Some("my cool ereport".to_string()), report: serde_json::json!({}), + marked_seen_in: None, + time_deleted: None, + reporter: reporter.into(), }, - fm::EreportData { - id: fm::EreportId { restart_id, ena: Ena(3) }, + Ereport { + restart_id: restart_id.into(), + ena: Ena(3).into(), time_collected: DateTime::::MIN_UTC, - collector_id, + collector_id: collector_id.into(), part_number: Some("my cool CPN".to_string()), serial_number: Some("my cool serial".to_string()), class: Some("my other ereport".to_string()), report: serde_json::json!({}), + marked_seen_in: None, + time_deleted: None, + reporter: reporter.into(), }, ], ); @@ -1194,6 +1214,8 @@ mod tests { datastore .ereports_insert( &opctx, + restart_id, + Utc::now(), fm::Reporter::Sp { sp_type: nexus_types::inventory::SpType::Sled, slot: 0, diff --git a/nexus/db-queries/src/db/datastore/fm.rs b/nexus/db-queries/src/db/datastore/fm.rs index e2f314ee41a..83520d68d3d 100644 --- a/nexus/db-queries/src/db/datastore/fm.rs +++ b/nexus/db-queries/src/db/datastore/fm.rs @@ -2134,10 +2134,11 @@ mod tests { // we'll make those here first. let restart_id = omicron_uuid_kinds::EreporterRestartUuid::new_v4(); let collector_id = OmicronZoneUuid::new_v4(); + let time_collected = Utc::now(); let ereport1 = EreportData { id: fm::EreportId { restart_id, ena: ereport_types::Ena(2) }, - time_collected: Utc::now(), + time_collected, collector_id, part_number: Some("930-55555".to_string()), serial_number: Some("BRM6900420".to_string()), @@ -2147,7 +2148,7 @@ mod tests { let ereport2 = EreportData { id: fm::EreportId { restart_id, ena: ereport_types::Ena(3) }, - time_collected: Utc::now(), + time_collected, collector_id, part_number: Some("930-55555".to_string()), serial_number: Some("BRM6900420".to_string()), @@ -2164,6 +2165,8 @@ mod tests { datastore .ereports_insert( &opctx, + restart_id, + time_collected, reporter, vec![ereport1.clone(), ereport2.clone()], ) diff --git a/nexus/db-queries/tests/output/ereports_insert_host.sql b/nexus/db-queries/tests/output/ereports_insert_host.sql index a73ba7487d1..545818ab538 100644 --- a/nexus/db-queries/tests/output/ereports_insert_host.sql +++ b/nexus/db-queries/tests/output/ereports_insert_host.sql @@ -27,6 +27,8 @@ WITH (restart_id, ena) DO NOTHING + RETURNING + ereport.ena ), inserted_reporter AS ( @@ -39,19 +41,10 @@ WITH (id) DO UPDATE SET slot = $30 - ), - latest - AS ( - SELECT - ereport.restart_id, ereport.ena - FROM - ereport - WHERE - ereport.sled_id = $31 AND (ereport.time_deleted IS NULL) - ORDER BY - ereport.time_collected DESC, ereport.ena DESC - LIMIT - $32 + RETURNING + id ) SELECT - (inserted_ereports, latest.*) + count(*) +FROM + inserted_ereports diff --git a/nexus/db-queries/tests/output/ereports_insert_sp.sql b/nexus/db-queries/tests/output/ereports_insert_sp.sql index eba90ecf704..5c7e4a64d98 100644 --- a/nexus/db-queries/tests/output/ereports_insert_sp.sql +++ b/nexus/db-queries/tests/output/ereports_insert_sp.sql @@ -27,6 +27,8 @@ WITH (restart_id, ena) DO NOTHING + RETURNING + ereport.ena ), inserted_reporter AS ( @@ -39,19 +41,10 @@ WITH (id) DO UPDATE SET slot = $28 - ), - latest - AS ( - SELECT - ereport.restart_id, ereport.ena - FROM - ereport - WHERE - (ereport.slot_type = $29 AND ereport.slot = $30) AND (ereport.time_deleted IS NULL) - ORDER BY - ereport.time_collected DESC, ereport.ena DESC - LIMIT - $31 + RETURNING + id ) SELECT - (inserted_ereports, latest.*) + count(*) +FROM + inserted_ereports diff --git a/nexus/src/app/background/tasks/ereport_ingester.rs b/nexus/src/app/background/tasks/ereport_ingester.rs index a853d58d9b1..afd800ca9cf 100644 --- a/nexus/src/app/background/tasks/ereport_ingester.rs +++ b/nexus/src/app/background/tasks/ereport_ingester.rs @@ -291,7 +291,13 @@ impl Ingester { }); let created = match self .datastore - .ereports_insert(&opctx, reporter, db_ereports) + .ereports_insert( + &opctx, + restart_id, + time_collected, + reporter, + db_ereports, + ) .await { Ok((created, latest)) => { diff --git a/nexus/src/app/background/tasks/fm_rendezvous.rs b/nexus/src/app/background/tasks/fm_rendezvous.rs index b099b3534cc..61a4a34f72d 100644 --- a/nexus/src/app/background/tasks/fm_rendezvous.rs +++ b/nexus/src/app/background/tasks/fm_rendezvous.rs @@ -835,6 +835,7 @@ mod tests { let restart_id = EreporterRestartUuid::new_v4(); let collector_id = OmicronZoneUuid::new_v4(); + let time_collected = Utc::now(); let reporter = Reporter::Sp { sp_type: nexus_types::inventory::SpType::Sled, slot: 0, @@ -845,7 +846,7 @@ mod tests { restart_id, ena: ereport_types::Ena(2), }, - time_collected: Utc::now(), + time_collected, collector_id, part_number: Some("9130000019".to_string()), serial_number: Some("BRM420069".to_string()), @@ -857,7 +858,7 @@ mod tests { restart_id, ena: ereport_types::Ena(3), }, - time_collected: Utc::now(), + time_collected, collector_id, part_number: Some("9130000019".to_string()), serial_number: Some("BRM420069".to_string()), @@ -869,7 +870,7 @@ mod tests { restart_id, ena: ereport_types::Ena(4), }, - time_collected: Utc::now(), + time_collected, collector_id, part_number: Some("9130000019".to_string()), serial_number: Some("BRM420069".to_string()), @@ -878,7 +879,9 @@ mod tests { }; datastore .ereports_insert( - opctx, + &opctx, + restart_id, + time_collected, reporter, vec![ ereport1_data.clone(), @@ -1072,13 +1075,13 @@ mod tests { sp_type: nexus_types::inventory::SpType::Sled, slot: 1, }; - + let time_collected = Utc::now(); let ereport1_data = EreportData { id: ereport_types::EreportId { restart_id, ena: ereport_types::Ena(2), }, - time_collected: Utc::now(), + time_collected, collector_id, part_number: Some("9130000019".to_string()), serial_number: Some("BRM420069".to_string()), @@ -1090,7 +1093,7 @@ mod tests { restart_id, ena: ereport_types::Ena(3), }, - time_collected: Utc::now(), + time_collected, collector_id, part_number: Some("9130000019".to_string()), serial_number: Some("BRM420069".to_string()), @@ -1102,7 +1105,7 @@ mod tests { restart_id, ena: ereport_types::Ena(4), }, - time_collected: Utc::now(), + time_collected, collector_id, part_number: Some("9130000019".to_string()), serial_number: Some("BRM420069".to_string()), @@ -1112,6 +1115,8 @@ mod tests { datastore .ereports_insert( opctx, + restart_id, + time_collected, reporter, vec![ ereport1_data.clone(), diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs index 5c0e29b3232..323fa666d79 100644 --- a/nexus/src/app/background/tasks/support_bundle_collector.rs +++ b/nexus/src/app/background/tasks/support_bundle_collector.rs @@ -856,10 +856,11 @@ mod test { const GIMLET_PN: &str = "9130000019"; // Make some SP ereports... let sp_restart_id = EreporterRestartUuid::new_v4(); - datastore.ereports_insert(&opctx, Reporter::Sp { sp_type: SpType::Sled, slot: SLED_SLOT}, vec![ + let time_collected = chrono::Utc::now(); + datastore.ereports_insert(&opctx, sp_restart_id, time_collected, Reporter::Sp { sp_type: SpType::Sled, slot: SLED_SLOT}, vec![ EreportData { id: EreportId { restart_id: sp_restart_id, ena: ereport_types::Ena(1) }, - time_collected: chrono::Utc::now(), + time_collected, collector_id: OmicronZoneUuid::new_v4(), part_number: Some(GIMLET_PN.to_string()), serial_number: Some(SP_SERIAL.to_string()), @@ -868,7 +869,7 @@ mod test { }, EreportData { id: EreportId { restart_id: sp_restart_id, ena: ereport_types::Ena(2) }, - time_collected: chrono::Utc::now(), + time_collected, collector_id: OmicronZoneUuid::new_v4(), part_number: Some(GIMLET_PN.to_string()), serial_number: Some(SP_SERIAL.to_string()), @@ -877,7 +878,7 @@ mod test { }, EreportData { id: EreportId { restart_id: EreporterRestartUuid::new_v4(), ena: ereport_types::Ena(1) }, - time_collected: chrono::Utc::now(), + time_collected, collector_id: OmicronZoneUuid::new_v4(), // Let's do a silly one! No VPD, to make sure that's also // handled correctly. @@ -890,16 +891,20 @@ mod test { // And one from a different serial. N.B. that I made sure the number of // host-OS and SP ereports are different for when we make assertions // about the bundle report. + let sp2_restart_id = EreporterRestartUuid::new_v4(); + let time_collected = chrono::Utc::now(); datastore .ereports_insert( &opctx, + sp2_restart_id, + time_collected, Reporter::Sp { sp_type: SpType::Switch, slot: 1 }, vec![EreportData { id: EreportId { - restart_id: EreporterRestartUuid::new_v4(), + restart_id: sp2_restart_id, ena: ereport_types::Ena(1), }, - time_collected: chrono::Utc::now(), + time_collected, collector_id: OmicronZoneUuid::new_v4(), part_number: Some("9130000006".to_string()), serial_number: Some("BRM41000555".to_string()), @@ -910,10 +915,13 @@ mod test { .await .expect("failed to insert another fake SP ereport"); // And some host OS ones... - let restart_id = EreporterRestartUuid::new_v4(); + let sled1_restart_id = EreporterRestartUuid::new_v4(); + let time_collected = chrono::Utc::now(); datastore .ereports_insert( &opctx, + sled1_restart_id, + time_collected, Reporter::HostOs { sled: SledUuid::new_v4(), slot: Some(SLED_SLOT), @@ -921,10 +929,10 @@ mod test { vec![ EreportData { id: EreportId { - restart_id, + restart_id: sled1_restart_id, ena: ereport_types::Ena(1), }, - time_collected: chrono::Utc::now(), + time_collected, collector_id: OmicronZoneUuid::new_v4(), serial_number: Some(HOST_SERIAL.to_string()), part_number: Some(GIMLET_PN.to_string()), @@ -933,10 +941,10 @@ mod test { }, EreportData { id: EreportId { - restart_id, + restart_id: sled1_restart_id, ena: ereport_types::Ena(2), }, - time_collected: chrono::Utc::now(), + time_collected, collector_id: OmicronZoneUuid::new_v4(), serial_number: Some(HOST_SERIAL.to_string()), part_number: Some(GIMLET_PN.to_string()), @@ -947,14 +955,18 @@ mod test { ) .await .expect("failed to insert fake host OS ereports"); + let sled2_restart_id = EreporterRestartUuid::new_v4(); + let time_collected = chrono::Utc::now(); datastore .ereports_insert( &opctx, + sled2_restart_id, + time_collected, Reporter::HostOs { sled: SledUuid::new_v4(), slot: Some(SLED_SLOT) }, vec![ EreportData { - id: EreportId { restart_id: EreporterRestartUuid::new_v4(), ena: ereport_types::Ena(1) }, - time_collected: chrono::Utc::now(), + id: EreportId { restart_id: sled2_restart_id, ena: ereport_types::Ena(1) }, + time_collected, collector_id: OmicronZoneUuid::new_v4(), serial_number: Some(HOST_SERIAL.to_string()), part_number: Some(GIMLET_PN.to_string()), diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 2b150f49117..fb01ecb0307 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7628,7 +7628,7 @@ WHERE -- Table tracking the timestamp of the first ereport received from each restart -- ID. -CREATE TABLE IF NOT EXISTS omicron.public.ereport_restart ( +CREATE TABLE IF NOT EXISTS omicron.public.ereporter_restart ( -- The reporter restart ID. -- -- This corresponds to the `restart_id` column in `omicron.public.ereport`. @@ -7646,7 +7646,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.ereport_restart ( -- not known to the system when the ereport was received. If the physical -- location of the sled is determined later, subsequent attempts to insert -- ereports will update this field. - slot_number INT NOT NULL + slot INT NOT NULL ); /* From 7aa683a8a21b6d2788da2cd5bd463cfce8318822 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Fri, 12 Jun 2026 13:21:08 -0700 Subject: [PATCH 07/29] have a restart list query --- nexus/db-queries/src/db/datastore/ereport.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index 7384e573153..a7e03cbc9af 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -9,6 +9,7 @@ use crate::authz; use crate::context::OpContext; use crate::db::datastore::RunnableQuery; use crate::db::model::Ereport; +use crate::db::model::EreporterRestart; use crate::db::model::EreporterType; use crate::db::model::SpMgsSlot; use crate::db::model::SpType; @@ -162,6 +163,20 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Lists ereporter restarts, paginated by restart ID. + pub async fn ereporter_restart_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + paginated(restart_dsl::ereporter_restart, restart_dsl::id, pagparams) + .select(EreporterRestart::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// List unique ereporter restarts for the given serial number. pub async fn ereporter_restart_list_by_serial( &self, From 7cd919c56748493a88d93596f96bf1ccfc5dafeb Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Sat, 13 Jun 2026 09:55:11 -0700 Subject: [PATCH 08/29] let's have some tests for that as well --- common/src/lib.rs | 16 +- nexus/db-model/src/ereporter_restart.rs | 6 + nexus/db-model/src/ereporter_type.rs | 1 + nexus/db-queries/src/db/datastore/ereport.rs | 263 +++++++++++++++++++ schema/crdb/dbinit.sql | 6 +- 5 files changed, 290 insertions(+), 2 deletions(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index 162c5fd2c6d..2ce4140b728 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -60,8 +60,22 @@ impl slog::KV for FileKv { /// This exists because the database doesn't store nanosecond-precision, so if /// we store nanosecond-precision timestamps, then DateTime conversion is lossy /// when round-tripping through the database. That's rather inconvenient. +/// +/// To truncate an arbitrary timestamp to database precision, use +/// [`timestamp_db_precision`]. pub fn now_db_precision() -> chrono::DateTime { - let ts = chrono::Utc::now(); + timestamp_db_precision(chrono::Utc::now()) +} + +/// Truncates a [`chrono::DateTime`]`<`[`chrono::Utc`]`>` to the previous +/// microsecond. +/// +/// This exists because the database doesn't store nanosecond-precision, so if +/// we store nanosecond-precision timestamps, then `DateTime`conversion is lossy +/// when round-tripping through the database. That's rather inconvenient. +pub fn timestamp_db_precision( + ts: chrono::DateTime, +) -> chrono::DateTime { let nanosecs = ts.timestamp_subsec_nanos(); let micros = ts.timestamp_subsec_micros(); let only_nanos = nanosecs - micros * 1000; diff --git a/nexus/db-model/src/ereporter_restart.rs b/nexus/db-model/src/ereporter_restart.rs index 57210cdc0ee..8d8f854b402 100644 --- a/nexus/db-model/src/ereporter_restart.rs +++ b/nexus/db-model/src/ereporter_restart.rs @@ -20,3 +20,9 @@ pub struct EreporterRestart { pub slot_type: SpType, pub slot: Option, } + +impl EreporterRestart { + pub fn slot_number(&self) -> Option { + self.slot.map(|slot| (*slot).0) + } +} diff --git a/nexus/db-model/src/ereporter_type.rs b/nexus/db-model/src/ereporter_type.rs index 933675bc5fd..8bd142a95b4 100644 --- a/nexus/db-model/src/ereporter_type.rs +++ b/nexus/db-model/src/ereporter_type.rs @@ -14,6 +14,7 @@ impl_enum_type!( Clone, Debug, PartialEq, + Eq, Serialize, Deserialize, AsExpression, diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index a7e03cbc9af..90a0b7908cf 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -648,6 +648,7 @@ impl DataStore { mod tests { use super::*; use crate::db::explain::ExplainableAsync; + use crate::db::pagination::Paginator; use crate::db::pub_test_utils::TestDatabase; use crate::db::pub_test_utils::explain::assert_uses_partial_index_only; use crate::db::raw_query_builder::expectorate_query_contents; @@ -655,6 +656,7 @@ mod tests { use ereport_types::Ena; use omicron_test_utils::dev; use omicron_uuid_kinds::OmicronZoneUuid; + use std::collections::BTreeMap; use std::num::NonZeroU32; use std::time::Duration; @@ -1293,4 +1295,265 @@ mod tests { db.terminate().await; logctx.cleanup_successful(); } + + async fn fetch_restarts( + datastore: &DataStore, + opctx: &OpContext, + ) -> BTreeMap { + // use a very small batch size in tests so that we are also exercising + // pagination. + const BATCH_SIZE: NonZeroU32 = match NonZeroU32::new(2) { + Some(b) => b, + None => panic!("2 is nonzero"), + }; + + let mut restarts = BTreeMap::new(); + let mut paginator = + Paginator::new(BATCH_SIZE, dropshot::PaginationOrder::Ascending); + while let Some(p) = paginator.next() { + let batch = datastore + .ereporter_restart_list(opctx, &p.current_pagparams()) + .await + .expect("listing ereporter restarts should succeed"); + paginator = + p.found_batch(&batch[..], &|r| r.id.into_untyped_uuid()); + for r in batch { + let id = r.id.into(); + if let Some(_) = restarts.insert(id, r) { + unreachable!( + "duplicate ereporter restart ID {id}; this should be \ + impossible as it is the primary key" + ) + } + } + } + + restarts + } + + #[tokio::test] + async fn test_ereporter_restarts() { + fn ereport_data( + restart_id: EreporterRestartUuid, + ena: u64, + time_collected: DateTime, + ) -> fm::EreportData { + fm::EreportData { + id: fm::EreportId { restart_id, ena: Ena(ena) }, + time_collected, + collector_id: OmicronZoneUuid::new_v4(), + part_number: None, + serial_number: None, + class: None, + report: serde_json::json!({}), + } + } + + let logctx = dev::test_setup_log("test_ereporter_restarts"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let t0 = Utc::now(); + + let timestamp = move |secs: usize| -> DateTime { + let t1 = omicron_common::timestamp_db_precision(t0); + let t2 = t1 + Duration::from_secs(secs as u64); + let t2 = omicron_common::timestamp_db_precision(t2); + assert_ne!( + t1, t2, + "two timestamps intended to be distinct would be equal after \ + truncating to database precision (advanced by {secs} seconds)" + ); + t2 + }; + // For host OS reporters, the sled's physical location may not be known + // immediately, so we may insert ereports with a NULL slot number. We + // wish to ensure that, if subsequent ereports are inserted for the same + // restart ID with a non-NULL slot number, the previous slot number is + // updated to the new value, but the first seen timestamp is not + // changed. + let host0_restart_id = EreporterRestartUuid::new_v4(); + let host0_first_seen = timestamp(100); + let host0_slot = 7; + let sled = SledUuid::new_v4(); + datastore + .ereports_insert( + opctx, + host0_restart_id, + host0_first_seen, + fm::Reporter::HostOs { sled, slot: None }, + vec![ereport_data(host0_restart_id, 1, host0_first_seen)], + ) + .await + .unwrap(); + { + let time_collected = timestamp(200); + datastore + .ereports_insert( + opctx, + host0_restart_id, + time_collected, + fm::Reporter::HostOs { sled, slot: Some(host0_slot) }, + vec![ + ereport_data(host0_restart_id, 2, time_collected), + ereport_data(host0_restart_id, 3, time_collected), + ], + ) + .await + .unwrap(); + } + + // Let's also test that if we insert ereports from a reporter with a + // non-NULL slot number, and then insert ereports from the same reporter + // with a NULL slot number, the original slot number is preserved. This + // shouldn't happen in practice, but we should make sure the behavior + // here is correct anyway. + let host1_restart_id = EreporterRestartUuid::new_v4(); + let host1_first_seen = timestamp(150); + let host1_slot = 3; + let sled = SledUuid::new_v4(); + datastore + .ereports_insert( + opctx, + host1_restart_id, + host1_first_seen, + fm::Reporter::HostOs { sled, slot: Some(host1_slot) }, + vec![ + ereport_data(host1_restart_id, 1, host1_first_seen), + ereport_data(host1_restart_id, 2, host1_first_seen), + ], + ) + .await + .unwrap(); + datastore + .ereports_insert( + opctx, + host1_restart_id, + timestamp(250), + fm::Reporter::HostOs { sled, slot: None }, + vec![ereport_data(host1_restart_id, 3, timestamp(250))], + ) + .await + .unwrap(); + + // Finally, let's do some SP ereports. These are mostly uncomplicated, + // since the slot number will always be present. + let sp0_restart_id0 = EreporterRestartUuid::new_v4(); + let sp0_first_seen = timestamp(300); + let sp0_slot = 1; + datastore + .ereports_insert( + opctx, + sp0_restart_id0, + sp0_first_seen, + fm::Reporter::Sp { + sp_type: SpType::Switch.into(), + slot: sp0_slot, + }, + vec![ + ereport_data(sp0_restart_id0, 1, sp0_first_seen), + ereport_data(sp0_restart_id0, 2, sp0_first_seen), + ], + ) + .await + .unwrap(); + { + let time_collected = timestamp(350); + datastore + .ereports_insert( + opctx, + sp0_restart_id0, + time_collected, + fm::Reporter::Sp { + sp_type: SpType::Switch.into(), + slot: sp0_slot, + }, + vec![ereport_data(sp0_restart_id0, 3, time_collected)], + ) + .await + .unwrap(); + } + // And a subsequent restart of the same SP slot. + let sp0_restart_id1 = EreporterRestartUuid::new_v4(); + let sp0_restart1_first_seen = timestamp(360); + datastore + .ereports_insert( + opctx, + sp0_restart_id1, + sp0_restart1_first_seen, + fm::Reporter::Sp { + sp_type: SpType::Switch.into(), + slot: sp0_slot, + }, + vec![ + ereport_data(sp0_restart_id1, 1, sp0_restart1_first_seen), + ereport_data(sp0_restart_id1, 2, sp0_restart1_first_seen), + ], + ) + .await + .unwrap(); + + let mut restarts = fetch_restarts(datastore, opctx).await; + + // host 0 restart 0 + let Some(host0) = restarts.remove(&host0_restart_id) else { + panic!( + "expected host 0 restart 0 ({host0_restart_id}) to be present" + ); + }; + assert_eq!(host0.reporter, EreporterType::Host); + assert_eq!(host0.slot_type, SpType::Sled); + assert_eq!( + host0.slot_number(), + Some(host0_slot), + "a NULL slot should be filled in once it is known", + ); + assert_eq!( + host0.time_first_seen, host0_first_seen, + "time_first_seen comes from the first insert and is not updated", + ); + + // host 1 restart 0 + let Some(host1) = restarts.remove(&host1_restart_id) else { + panic!( + "expected host 1 restart 0 ({host1_restart_id}) to be present" + ); + }; + assert_eq!(host1.reporter, EreporterType::Host); + assert_eq!(host1.slot_type, SpType::Sled); + assert_eq!( + host1.slot_number(), + Some(host1_slot), + "a NULL slot should be filled in once it is known", + ); + assert_eq!( + host1.time_first_seen, host1_first_seen, + "time_first_seen comes from the first insert and is not updated", + ); + + // SP 0 restart 0 + let Some(sp0_restart_0) = restarts.remove(&sp0_restart_id0) else { + panic!("expected sp 0 restart 0 ({sp0_restart_id0}) to be present"); + }; + assert_eq!(sp0_restart_0.reporter, EreporterType::Sp); + assert_eq!(sp0_restart_0.slot_type, SpType::Switch); + assert_eq!(sp0_restart_0.slot_number(), Some(sp0_slot)); + assert_eq!(sp0_restart_0.time_first_seen, sp0_first_seen); + + // SP 0 restart 1 + let Some(sp0_restart_1) = restarts.remove(&sp0_restart_id1) else { + panic!("expected sp 0 restart 1 ({sp0_restart_id1}) to be present"); + }; + assert_eq!(sp0_restart_1.reporter, EreporterType::Sp); + assert_eq!(sp0_restart_1.slot_type, SpType::Switch); + assert_eq!(sp0_restart_1.slot_number(), Some(1)); + assert_eq!(sp0_restart_1.time_first_seen, sp0_restart1_first_seen); + + assert!( + restarts.is_empty(), + "unexpected restart entries remaining: {restarts:?}" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } } diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index fb01ecb0307..74045582099 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7646,7 +7646,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.ereporter_restart ( -- not known to the system when the ereport was received. If the physical -- location of the sled is determined later, subsequent attempts to insert -- ereports will update this field. - slot INT NOT NULL + -- + -- Note: this is `INT4`, not `INT`/`INT8`. In CockroachDB `INT` is a 64-bit + -- alias, but the `slot` column (and its `ereport` counterpart) is a 32-bit + -- slot number, matching `Nullable` in the Diesel schema. + slot INT4 ); /* From 3cbade2e45ce2c481295cde7ffb8dd1bee544912 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Sat, 13 Jun 2026 10:07:49 -0700 Subject: [PATCH 09/29] clean up `ereports_insert_query` stop trying to add the "latest ENA" fetch to the CTE since that was too annoying, comments, nicer errors --- nexus/db-queries/src/db/datastore/ereport.rs | 116 ++++++++----------- 1 file changed, 47 insertions(+), 69 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index 90a0b7908cf..d5230b77b77 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -320,6 +320,7 @@ impl DataStore { .into_iter() .map(|data| Ereport::new(data, reporter)) .collect::>(); + let n_ereports = ereports.len(); let created = Self::ereports_insert_query( restart_id, time_collected, @@ -328,7 +329,15 @@ impl DataStore { ) .execute_async(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server).internal_context( + format!( + "failed to insert {n_ereports} ereports from {reporter} \ + (restart {restart_id})", + ), + ) + })?; + let latest = self .latest_ereport_id_on_conn(&conn, reporter) .await @@ -340,49 +349,44 @@ impl DataStore { Ok((created, latest)) } + /// Returns a query for inserting ereports into the database and updating + /// the restart history table if the restart ID of the provided ereports is + /// not already present. + /// + /// This is performed in a single atomic CTE, in order to ensure that the + /// earliest `time_collected` timestamp in the `ereports` table for a given + /// restart ID will always match the `time_first_seen` timestamp in the + /// restart history table, even if Nexus crashes midway through inserting + /// ereports. fn ereports_insert_query( restart_id: EreporterRestartUuid, time_collected: chrono::DateTime, reporter: fm::Reporter, ereports: Vec, ) -> impl RunnableQuery { - struct EreportInsertQuery { + /// This is basically just a big pile of ceremony for combining two + /// little Diesel queries into a CTE... + struct EreportInsertQuery { insert_ereports: IE, insert_reporter: IR, slot: Option, - // latest_query: LatestQuery, } - // enum LatestQuery { - // Sp(LS), - // Host(LH), - // } - - impl QueryId for EreportInsertQuery { + impl QueryId for EreportInsertQuery { type QueryId = (); const HAS_STATIC_QUERY_ID: bool = false; } - impl Query for EreportInsertQuery { - // type SqlType = ( - // sql_types::BigInt, - // sql_types::Nullable<(sql_types::Uuid, sql_types::BigInt)>, - // ); + impl Query for EreportInsertQuery { type SqlType = sql_types::BigInt; } - impl diesel::RunQueryDsl - for EreportInsertQuery - { - } + impl diesel::RunQueryDsl for EreportInsertQuery {} - impl QueryFragment - for EreportInsertQuery + impl QueryFragment for EreportInsertQuery where IE: QueryFragment, IR: QueryFragment, - // LS: QueryFragment, - // LH: QueryFragment, { fn walk_ast<'b>( &'b self, @@ -394,9 +398,9 @@ impl DataStore { out.push_sql("), inserted_reporter AS ("); self.insert_reporter.walk_ast(out.reborrow())?; out.push_sql(" ON CONFLICT (id) DO "); - // If we have a slot number, update it so that a previously-null slot - // number is filled in; if we do not, do nothing on conflict so a - // previously non-NULL slot is not clobbered. + // If we have a slot number, update it so that a previously-null + // slot number is filled in; if we do not, do nothing on + // conflict so a previously non-NULL slot is not clobbered. if let Some(ref slot) = self.slot { out.push_sql("UPDATE SET \"slot\" = "); out.push_bind_param::(slot)?; @@ -406,60 +410,39 @@ impl DataStore { // We don't actually need this, but `WITH` clauses have to // return something, sooo.... out.push_sql(" RETURNING id) "); - - // TODO(eliza): make this part work - // out.push_sql("), latest AS ("); - // match self.latest_query { - // LatestQuery::Sp(ref query) => { - // query.walk_ast(out.reborrow())?; - // } - // LatestQuery::Host(ref query) => { - // query.walk_ast(out.reborrow())?; - // } - // } out.push_sql("SELECT count(*) FROM inserted_ereports"); Ok(()) } } - let (reporter, slot_type, slot /*latest_query*/) = match reporter { - fm::Reporter::HostOs { slot /*sled,*/, .. } => { - // let latest = dsl::ereport - // .filter(dsl::sled_id.eq(sled.into_untyped_uuid())) - // .filter(dsl::time_deleted.is_null()) - // .order_by((dsl::time_collected.desc(), dsl::ena.desc())) - // .limit(1) - // .select((dsl::restart_id, dsl::ena)); - ( - EreporterType::Host, - SpType::Sled, - slot.map(SqlU16::from), - // LatestQuery::Host(latest), - ) + let (reporter, slot_type, slot) = match reporter { + fm::Reporter::HostOs { slot, .. } => { + (EreporterType::Host, SpType::Sled, slot.map(SqlU16::from)) } fm::Reporter::Sp { sp_type, slot } => { let sp_type = sp_type.into(); let slot = SqlU16::from(slot); - // let latest = dsl::ereport - // .filter(dsl::slot_type.eq(sp_type)) - // .filter(dsl::slot.eq(Some(slot))) - // .filter(dsl::time_deleted.is_null()) - // .order_by((dsl::time_collected.desc(), dsl::ena.desc())) - // .limit(1) - // .select((dsl::restart_id, dsl::ena)); - ( - EreporterType::Sp, - sp_type, - Some(slot), - // LatestQuery::Sp(latest), - ) + (EreporterType::Sp, sp_type, Some(slot)) } }; + + // The query fragment to insert ereports into the `ereport` table. let insert_ereports = diesel::insert_into(dsl::ereport) .values(ereports) + // Some or all of the ereports collected in this batch may already + // exist in the database because they were ingested by another + // Nexus. If the same ENAs exist for this restart ID, that's fine; + // don't overwrite them. .on_conflict((dsl::restart_id, dsl::ena)) .do_nothing() .returning(dsl::ena); + // Query fragment to insert the reporter restart entry into the + // ereporter restart table, or update the existing entry's slot column + // if one exists and the slot column is null. The null behavior will be + // added by the `walk_ast()` method on `EreporterInsertQuery`, because + // it depends on whether or not there is a slot number to insert, and I + // couldn't figure out how to get diesel to let me type erase an INSERT + // statement that may have one of multiple ON CONFLICT clauses... let insert_reporter = diesel::insert_into( restart_dsl::ereporter_restart, ) @@ -470,12 +453,7 @@ impl DataStore { slot_type, slot: slot.map(SpMgsSlot::from), }); - EreportInsertQuery { - insert_ereports, - insert_reporter, - slot, - // latest_query, - } + EreportInsertQuery { insert_ereports, insert_reporter, slot } } /// Lists ereports which have not been marked as **definitely seen** From 7e29d33353bbe173022ef833d9666d4b693b0204 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Mon, 15 Jun 2026 10:18:26 -0700 Subject: [PATCH 10/29] migration, index, constraints --- nexus/db-model/src/schema_versions.rs | 3 +- schema/crdb/dbinit.sql | 31 +++++++++++--- .../crdb/ereporter-restart-order-v2/up01.sql | 35 ++++++++++++++++ .../crdb/ereporter-restart-order-v2/up02.sql | 41 +++++++++++++++++++ .../crdb/ereporter-restart-order-v2/up03.sql | 8 ++++ .../up03.verify.sql | 2 + 6 files changed, 113 insertions(+), 7 deletions(-) create mode 100644 schema/crdb/ereporter-restart-order-v2/up01.sql create mode 100644 schema/crdb/ereporter-restart-order-v2/up02.sql create mode 100644 schema/crdb/ereporter-restart-order-v2/up03.sql create mode 100644 schema/crdb/ereporter-restart-order-v2/up03.verify.sql diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index bba198bcc23..c6697c1985d 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(267, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(268, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ pub static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(268, "ereporter-restart-order-v2"), KnownVersion::new(267, "add-disruption-policy"), KnownVersion::new(266, "alert-version"), KnownVersion::new(265, "sled-resource-vmm-state"), diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 74045582099..72cb016be35 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7646,13 +7646,32 @@ CREATE TABLE IF NOT EXISTS omicron.public.ereporter_restart ( -- not known to the system when the ereport was received. If the physical -- location of the sled is determined later, subsequent attempts to insert -- ereports will update this field. - -- - -- Note: this is `INT4`, not `INT`/`INT8`. In CockroachDB `INT` is a 64-bit - -- alias, but the `slot` column (and its `ereport` counterpart) is a 32-bit - -- slot number, matching `Nullable` in the Diesel schema. - slot INT4 + slot INT4, + + CONSTRAINT reporter_validity CHECK ( + ( + -- ereports from SPs must always have a SP type and slot, so we need + -- not worry about temporarily accepting NULL slots so that they can + -- be back-populated later as we do for host OS reporters. + reporter = 'sp' AND slot IS NOT NULL + ) OR ( + -- ereports from the sled host OS must have the 'sled' slot type (as + -- switches and PSCs do not have a host OS) + reporter = 'host' AND slot_type = 'sled' + ) + ), + ); +CREATE INDEX IF NOT EXISTS lookup_ereporter_restart_by_slot +ON omicron.public.ereporter_restart ( + reporter, + slot_type, + slot +) +WHERE + slot IS NOT NULL; + /* * Fault management situation reports (and accessories) * @@ -8749,7 +8768,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '267.0.0', NULL) + (TRUE, NOW(), NOW(), '268.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/ereporter-restart-order-v2/up01.sql b/schema/crdb/ereporter-restart-order-v2/up01.sql new file mode 100644 index 00000000000..64e162b9ca8 --- /dev/null +++ b/schema/crdb/ereporter-restart-order-v2/up01.sql @@ -0,0 +1,35 @@ +-- Table tracking the timestamp of the first ereport received from each restart +-- ID. +CREATE TABLE IF NOT EXISTS omicron.public.ereporter_restart ( + -- The reporter restart ID. + -- + -- This corresponds to the `restart_id` column in `omicron.public.ereport`. + id UUID PRIMARY KEY, + -- The timestamp at which the first ereport received from this restart ID + -- was received. + time_first_seen TIMESTAMPTZ NOT NULL, + -- Whether this ereport was generated by SP firmware or the host OS. + reporter omicron.public.ereporter_type NOT NULL, + -- The type of the physical slot occupied by the reporter. + slot_type omicron.public.sp_type NOT NULL, + -- The number of the physical slot occupied by the reporter. + -- + -- For sled host OS reporters, this may be NULL if the sled's location is + -- not known to the system when the ereport was received. If the physical + -- location of the sled is determined later, subsequent attempts to insert + -- ereports will update this field. + slot INT4, + + CONSTRAINT reporter_validity CHECK ( + ( + -- ereports from SPs must always have a SP type and slot, so we need + -- not worry about temporarily accepting NULL slots so that they can + -- be back-populated later as we do for host OS reporters. + reporter = 'sp' AND slot IS NOT NULL + ) OR ( + -- ereports from the sled host OS must have the 'sled' slot type (as + -- switches and PSCs do not have a host OS) + reporter = 'host' AND slot_type = 'sled' + ) + ), +); diff --git a/schema/crdb/ereporter-restart-order-v2/up02.sql b/schema/crdb/ereporter-restart-order-v2/up02.sql new file mode 100644 index 00000000000..9055d0e1ee7 --- /dev/null +++ b/schema/crdb/ereporter-restart-order-v2/up02.sql @@ -0,0 +1,41 @@ +SET LOCAL disallow_full_table_scans = off; + +-- Backfill the ereporter_restart table from existing ereport data. +-- +-- Insert one row for each unique restart ID present in the ereport table: +-- +-- * `time_first_seen` is the earliest `time_collected` of any ereport with +-- that restart ID. +-- * `reporter` and `slot_type` are constant for a given restart ID (a restart +-- is a single reporter incarnation occupying a single physical slot), so we +-- read them directly from the group. +-- * The slot number may be NULL in some of a restart ID's ereports and +-- non-NULL in others: a sled host OS can emit ereports before its physical +-- location is known, and then emit more once it has been resolved. SQL's +-- `MAX` ignores NULLs, so `MAX(slot)` recovers the non-NULL slot if *any* +-- ereport for the restart ID had one. If every ereport's slot was NULL, the +-- restart's slot is left NULL, which the column permits. +-- +-- `ON CONFLICT (id) DO NOTHING` makes this insert idempotent, and also tolerates +-- the (schema-prohibited, domain-impossible) case of a single restart ID having +-- inconsistent reporter/slot_type values across its ereports. +INSERT INTO omicron.public.ereporter_restart ( + id, + time_first_seen, + reporter, + slot_type, + slot +) +SELECT + restart_id, + MIN(time_collected), + reporter, + slot_type, + MAX(slot) +FROM + omicron.public.ereport +GROUP BY + restart_id, + reporter, + slot_type +ON CONFLICT (id) DO NOTHING; diff --git a/schema/crdb/ereporter-restart-order-v2/up03.sql b/schema/crdb/ereporter-restart-order-v2/up03.sql new file mode 100644 index 00000000000..f861a26b976 --- /dev/null +++ b/schema/crdb/ereporter-restart-order-v2/up03.sql @@ -0,0 +1,8 @@ +CREATE INDEX IF NOT EXISTS lookup_ereporter_restart_by_slot +ON omicron.public.ereporter_restart ( + reporter, + slot_type, + slot +) +WHERE + slot IS NOT NULL; diff --git a/schema/crdb/ereporter-restart-order-v2/up03.verify.sql b/schema/crdb/ereporter-restart-order-v2/up03.verify.sql new file mode 100644 index 00000000000..9bd922fd091 --- /dev/null +++ b/schema/crdb/ereporter-restart-order-v2/up03.verify.sql @@ -0,0 +1,2 @@ +-- DO NOT EDIT. Generated by test_migration_verification_files. +SELECT CAST(IF((SELECT true WHERE EXISTS (SELECT index_name FROM omicron.crdb_internal.table_indexes WHERE descriptor_name = 'ereporter_restart' AND index_name = 'lookup_ereporter_restart_by_slot')),'true','Schema change verification failed: index lookup_ereporter_restart_by_slot on table ereporter_restart does not exist') AS BOOL); From 92ebeb5a37611f467e136fbcadf024c090901884 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Mon, 15 Jun 2026 11:26:33 -0700 Subject: [PATCH 11/29] actually nothing is using the slot index we can add it later if we do want it --- schema/crdb/dbinit.sql | 12 +----------- schema/crdb/ereporter-restart-order-v2/up01.sql | 2 +- schema/crdb/ereporter-restart-order-v2/up03.sql | 8 -------- .../crdb/ereporter-restart-order-v2/up03.verify.sql | 2 -- 4 files changed, 2 insertions(+), 22 deletions(-) delete mode 100644 schema/crdb/ereporter-restart-order-v2/up03.sql delete mode 100644 schema/crdb/ereporter-restart-order-v2/up03.verify.sql diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 72cb016be35..0ed5f959b06 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7659,19 +7659,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.ereporter_restart ( -- switches and PSCs do not have a host OS) reporter = 'host' AND slot_type = 'sled' ) - ), - + ) ); -CREATE INDEX IF NOT EXISTS lookup_ereporter_restart_by_slot -ON omicron.public.ereporter_restart ( - reporter, - slot_type, - slot -) -WHERE - slot IS NOT NULL; - /* * Fault management situation reports (and accessories) * diff --git a/schema/crdb/ereporter-restart-order-v2/up01.sql b/schema/crdb/ereporter-restart-order-v2/up01.sql index 64e162b9ca8..b2cc84125d0 100644 --- a/schema/crdb/ereporter-restart-order-v2/up01.sql +++ b/schema/crdb/ereporter-restart-order-v2/up01.sql @@ -31,5 +31,5 @@ CREATE TABLE IF NOT EXISTS omicron.public.ereporter_restart ( -- switches and PSCs do not have a host OS) reporter = 'host' AND slot_type = 'sled' ) - ), + ) ); diff --git a/schema/crdb/ereporter-restart-order-v2/up03.sql b/schema/crdb/ereporter-restart-order-v2/up03.sql deleted file mode 100644 index f861a26b976..00000000000 --- a/schema/crdb/ereporter-restart-order-v2/up03.sql +++ /dev/null @@ -1,8 +0,0 @@ -CREATE INDEX IF NOT EXISTS lookup_ereporter_restart_by_slot -ON omicron.public.ereporter_restart ( - reporter, - slot_type, - slot -) -WHERE - slot IS NOT NULL; diff --git a/schema/crdb/ereporter-restart-order-v2/up03.verify.sql b/schema/crdb/ereporter-restart-order-v2/up03.verify.sql deleted file mode 100644 index 9bd922fd091..00000000000 --- a/schema/crdb/ereporter-restart-order-v2/up03.verify.sql +++ /dev/null @@ -1,2 +0,0 @@ --- DO NOT EDIT. Generated by test_migration_verification_files. -SELECT CAST(IF((SELECT true WHERE EXISTS (SELECT index_name FROM omicron.crdb_internal.table_indexes WHERE descriptor_name = 'ereporter_restart' AND index_name = 'lookup_ereporter_restart_by_slot')),'true','Schema change verification failed: index lookup_ereporter_restart_by_slot on table ereporter_restart does not exist') AS BOOL); From 68a2f3952e67cb04ba978b5849f1ac5d835ac9d4 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Mon, 15 Jun 2026 14:20:12 -0700 Subject: [PATCH 12/29] wip add to loader --- Cargo.lock | 1 + nexus/db-model/src/ereporter_restart.rs | 15 +++++++++++++++ nexus/fm/Cargo.toml | 1 + nexus/fm/src/analysis_input.rs | 18 ++++++++++++++++++ nexus/src/app/background/tasks/fm_analysis.rs | 8 ++++++++ 5 files changed, 43 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index e74419761c7..6fd7b83960e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7205,6 +7205,7 @@ dependencies = [ "chrono", "ereport-types", "iddqd", + "nexus-db-model", "nexus-reconfigurator-planning", "nexus-types", "omicron-test-utils", diff --git a/nexus/db-model/src/ereporter_restart.rs b/nexus/db-model/src/ereporter_restart.rs index 8d8f854b402..60b8edd7107 100644 --- a/nexus/db-model/src/ereporter_restart.rs +++ b/nexus/db-model/src/ereporter_restart.rs @@ -10,6 +10,7 @@ use chrono::DateTime; use chrono::Utc; use nexus_db_schema::schema::ereporter_restart; use omicron_uuid_kinds::EreporterRestartKind; +use omicron_uuid_kinds::EreporterRestartUuid; #[derive(Clone, Debug, Insertable, Queryable, Selectable)] #[diesel(table_name = ereporter_restart)] @@ -25,4 +26,18 @@ impl EreporterRestart { pub fn slot_number(&self) -> Option { self.slot.map(|slot| (*slot).0) } + + pub fn id(&self) -> &EreporterRestartUuid { + &self.id.0 + } +} + +impl iddqd::IdOrdItem for EreporterRestart { + type Key<'a> = &'a EreporterRestartUuid; + + fn key(&self) -> Self::Key<'_> { + self.id() + } + + iddqd::id_upcast!(); } diff --git a/nexus/fm/Cargo.toml b/nexus/fm/Cargo.toml index 5e88e88b88a..3a1b393f07c 100644 --- a/nexus/fm/Cargo.toml +++ b/nexus/fm/Cargo.toml @@ -18,6 +18,7 @@ anyhow.workspace = true chrono.workspace = true iddqd.workspace = true nexus-types.workspace = true +nexus-db-model.workspace = true omicron-uuid-kinds.workspace = true rand.workspace = true serde.workspace = true diff --git a/nexus/fm/src/analysis_input.rs b/nexus/fm/src/analysis_input.rs index 50749cca262..05bda17d7ce 100644 --- a/nexus/fm/src/analysis_input.rs +++ b/nexus/fm/src/analysis_input.rs @@ -6,6 +6,7 @@ use chrono::{DateTime, Utc}; use iddqd::IdOrdMap; +use nexus_db_model::EreporterRestart; use nexus_types::fm::analysis_reports::ClosedCaseReport; use nexus_types::fm::{self, Sitrep, SitrepVersion}; use nexus_types::inventory; @@ -39,6 +40,7 @@ pub struct Input { new_ereports: IdOrdMap, open_cases: IdOrdMap, closed_cases_copied_forward: IdOrdMap, + ereporter_restarts: IdOrdMap, } impl Input { @@ -61,6 +63,10 @@ impl Input { &self.open_cases } + pub fn ereporter_restarts(&self) -> &IdOrdMap { + &self.ereporter_restarts + } + pub(crate) fn closed_cases_copied_forward(&self) -> &IdOrdMap { &self.closed_cases_copied_forward } @@ -95,6 +101,7 @@ impl Input { parent_sitrep, inv, new_ereports: IdOrdMap::default(), + ereporter_restarts: IdOrdMap::default(), unmarked_seen_ereports: BTreeSet::default(), }) } @@ -127,6 +134,8 @@ pub struct Builder { /// These must be tracked in order to determine which closed cases must be /// copied forwards due to containing unmarked ereports. unmarked_seen_ereports: BTreeSet, + + ereporter_restarts: IdOrdMap, } impl Builder { @@ -155,6 +164,14 @@ impl Builder { })) } + /// Adds a set of ereport restart IDs to the input. + pub fn add_ereport_restarts( + &mut self, + restarts: impl IntoIterator, + ) { + self.ereporter_restarts.extend(restarts) + } + pub fn num_ereports(&self) -> usize { self.new_ereports.len() } @@ -234,6 +251,7 @@ impl Builder { new_ereports: self.new_ereports, open_cases, closed_cases_copied_forward, + ereporter_restarts: self.ereporter_restarts, }; (input, report) diff --git a/nexus/src/app/background/tasks/fm_analysis.rs b/nexus/src/app/background/tasks/fm_analysis.rs index 10173be4021..d054d3efa7a 100644 --- a/nexus/src/app/background/tasks/fm_analysis.rs +++ b/nexus/src/app/background/tasks/fm_analysis.rs @@ -228,6 +228,14 @@ impl FmAnalysis { Ok((input, status::PreparationStatus { errors, report })) } + async fn load_ereporter_restarts( + &mut self, + opctx: &OpContext, + builder: &mut fm::analysis_input::Builder, + ) -> anyhow::Result<()> { + todo!() + } + async fn load_new_ereports( &mut self, opctx: &OpContext, From 82c5cb477f80f3e9f328551c90b65a7716456887 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Mon, 15 Jun 2026 14:54:58 -0700 Subject: [PATCH 13/29] finish loader --- nexus/fm/src/analysis_input.rs | 11 +++++--- nexus/src/app/background/tasks/fm_analysis.rs | 26 ++++++++++++++++++- nexus/types/src/fm/analysis_reports.rs | 2 ++ 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/nexus/fm/src/analysis_input.rs b/nexus/fm/src/analysis_input.rs index 05bda17d7ce..8bb76a0fd6f 100644 --- a/nexus/fm/src/analysis_input.rs +++ b/nexus/fm/src/analysis_input.rs @@ -164,16 +164,20 @@ impl Builder { })) } + pub fn num_ereports(&self) -> usize { + self.new_ereports.len() + } + /// Adds a set of ereport restart IDs to the input. - pub fn add_ereport_restarts( + pub fn add_ereporter_restarts( &mut self, restarts: impl IntoIterator, ) { self.ereporter_restarts.extend(restarts) } - pub fn num_ereports(&self) -> usize { - self.new_ereports.len() + pub fn num_ereporter_restarts(&self) -> usize { + self.ereporter_restarts.len() } /// Finish constructing the [`Input`] and return it, along with a [`Report`] @@ -199,6 +203,7 @@ impl Builder { .iter() .map(|e| *e.id()) .collect(), + num_ereporter_restarts: self.ereporter_restarts.len(), open_cases: BTreeMap::new(), closed_cases_copied_forward: BTreeMap::new(), }; diff --git a/nexus/src/app/background/tasks/fm_analysis.rs b/nexus/src/app/background/tasks/fm_analysis.rs index d054d3efa7a..0ef9c634919 100644 --- a/nexus/src/app/background/tasks/fm_analysis.rs +++ b/nexus/src/app/background/tasks/fm_analysis.rs @@ -220,6 +220,9 @@ impl FmAnalysis { let mut builder = fm::analysis_input::Input::builder(parent_sitrep, inv)?; let mut errors = Vec::new(); + self.load_ereporter_restarts(opctx, &mut builder) + .await + .context("failed to load ereporter restarts")?; self.load_new_ereports(opctx, &mut builder, &mut errors) .await .context("failed to load new ereports")?; @@ -233,7 +236,28 @@ impl FmAnalysis { opctx: &OpContext, builder: &mut fm::analysis_input::Builder, ) -> anyhow::Result<()> { - todo!() + let mut nbatches = 0; + let mut paginator = Paginator::new( + nexus_db_queries::db::datastore::SQL_BATCH_SIZE, + dropshot::PaginationOrder::Ascending, + ); + while let Some(p) = paginator.next() { + nbatches += 1; + let batch = self + .datastore + .ereporter_restart_list(opctx, &p.current_pagparams()) + .await?; + paginator = p.found_batch(&batch, &|e| e.id().into_untyped_uuid()); + builder.add_ereporter_restarts(batch); + } + + slog::debug!( + opctx.log, + "loaded {} ereporter restarts (in {nbatches} batches)", + builder.num_ereporter_restarts(), + ); + + Ok(()) } async fn load_new_ereports( diff --git a/nexus/types/src/fm/analysis_reports.rs b/nexus/types/src/fm/analysis_reports.rs index 249e26f1b8b..765f0246a80 100644 --- a/nexus/types/src/fm/analysis_reports.rs +++ b/nexus/types/src/fm/analysis_reports.rs @@ -227,6 +227,8 @@ pub struct InputReport { /// Cases which have closed, but which have been copied forwards as they /// contain ereports which have not yet been marked seen. pub closed_cases_copied_forward: BTreeMap, + /// Number of entries in the ereporter restart table. + pub num_ereporter_restarts: usize, } #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] From 3732a2947a83b3f46e7e5d418ac485e63319bb9c Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Mon, 15 Jun 2026 15:27:39 -0700 Subject: [PATCH 14/29] gotta also add it here --- nexus/types/output/analysis_input_report_empty.out | 1 + nexus/types/output/analysis_input_report_same_inv.out | 1 + .../types/output/analysis_input_report_with_cases.out | 1 + nexus/types/src/fm/analysis_reports.rs | 11 +++++++++++ 4 files changed, 14 insertions(+) diff --git a/nexus/types/output/analysis_input_report_empty.out b/nexus/types/output/analysis_input_report_empty.out index f221b3960a0..4b736a4e354 100644 --- a/nexus/types/output/analysis_input_report_empty.out +++ b/nexus/types/output/analysis_input_report_empty.out @@ -2,5 +2,6 @@ fault management analysis inputs -------------------------------- parent sitrep: inventory collection: bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb +total known ereport restart IDs: 0 no new ereports since the parent sitrep no cases copied forward diff --git a/nexus/types/output/analysis_input_report_same_inv.out b/nexus/types/output/analysis_input_report_same_inv.out index 5ed6b60d151..42dd77debd8 100644 --- a/nexus/types/output/analysis_input_report_same_inv.out +++ b/nexus/types/output/analysis_input_report_same_inv.out @@ -3,5 +3,6 @@ fault management analysis inputs parent sitrep: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa inventory collection: bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb --> same collection as parent sitrep +total known ereport restart IDs: 420 no new ereports since the parent sitrep no cases copied forward diff --git a/nexus/types/output/analysis_input_report_with_cases.out b/nexus/types/output/analysis_input_report_with_cases.out index f3af0fe55ff..c35ca6f3c8b 100644 --- a/nexus/types/output/analysis_input_report_with_cases.out +++ b/nexus/types/output/analysis_input_report_with_cases.out @@ -3,6 +3,7 @@ fault management analysis inputs parent sitrep: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa inventory collection: bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb --> different from parent sitrep (collection cccccccc-cccc-cccc-cccc-cccccccccccc) +total known ereport restart IDs: 420 new ereports (2 total): * ereport dddddddd-dddd-dddd-dddd-dddddddddddd:3 diff --git a/nexus/types/src/fm/analysis_reports.rs b/nexus/types/src/fm/analysis_reports.rs index 765f0246a80..73491f47f39 100644 --- a/nexus/types/src/fm/analysis_reports.rs +++ b/nexus/types/src/fm/analysis_reports.rs @@ -259,6 +259,7 @@ impl fmt::Display for InputReportMultilineDisplay<'_> { new_ereport_ids, open_cases, closed_cases_copied_forward, + num_ereporter_restarts, }, indent, } = self; @@ -283,6 +284,13 @@ impl fmt::Display for InputReportMultilineDisplay<'_> { )?; } + writeln!( + f, + "{:indent$}total known ereport restart IDs: \ + {num_ereporter_restarts}", + "", + )?; + if !new_ereport_ids.is_empty() { writeln!( f, @@ -440,6 +448,7 @@ mod tests { parent_sitrep_id: Some(parent_sitrep_id), parent_inv_id: Some(parent_inv_id), inv_id, + num_ereporter_restarts: 420, new_ereport_ids, open_cases, closed_cases_copied_forward, @@ -455,6 +464,7 @@ mod tests { parent_sitrep_id: None, parent_inv_id: None, inv_id, + num_ereporter_restarts: 0, new_ereport_ids: BTreeSet::new(), open_cases: BTreeMap::new(), closed_cases_copied_forward: BTreeMap::new(), @@ -473,6 +483,7 @@ mod tests { parent_sitrep_id: Some(parent_sitrep_id), parent_inv_id: Some(inv_id), inv_id, + num_ereporter_restarts: 420, new_ereport_ids: BTreeSet::new(), open_cases: BTreeMap::new(), closed_cases_copied_forward: BTreeMap::new(), From 9f1437b751e2a60aa42edf334c93ae3d44148ae5 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Mon, 15 Jun 2026 15:27:48 -0700 Subject: [PATCH 15/29] data migration test --- .../ereporter_restart_order_v2.rs | 275 ++++++++++++++++++ .../integration_tests/data_migrations/mod.rs | 2 + 2 files changed, 277 insertions(+) create mode 100644 nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs diff --git a/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs b/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs new file mode 100644 index 00000000000..46a363f9e78 --- /dev/null +++ b/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs @@ -0,0 +1,275 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::super::schema::{DataMigrationFns, MigrationContext}; +use chrono::{DateTime, Utc}; +use futures::future::BoxFuture; +use pretty_assertions::assert_eq; +use std::collections::HashMap; +use uuid::Uuid; + +const COLLECTOR: Uuid = Uuid::from_u128(0xeb020000_0000_0000_0000_000000000001); + +// A sled SP restart ID with a single ereport. +const SP_SLED: Uuid = Uuid::from_u128(0xeb020001_0000_0000_0000_000000000001); +// A switch SP reporter with multiple ereports at different times. +const SP_SWITCH: Uuid = Uuid::from_u128(0xeb020001_0000_0000_0000_000000000002); +// A host OS reporter whose slot is NULL in its earliest ereports and then +// resolved to a non-NULL value. The non-NULL slot should be recovered, and +// `time_first_seen` should still come from the earliest (NULL-slot) ereport. +const HOST_NULL_THEN_SLOT: Uuid = + Uuid::from_u128(0xeb020001_0000_0000_0000_000000000003); +// A host OS reporter whose slot number is NULL in *every* ereport. A row for +// this restart ID should still created, with a NULL slot. +const HOST_SLOT_ALL_NULL: Uuid = + Uuid::from_u128(0xeb020001_0000_0000_0000_000000000004); +// A host OS reporter whose slot is known in its earliest ereport and then NULL +// in a later one. The known slot must not be clobbered back to NULL. +const HOST_SLOT_THEN_NULL: Uuid = + Uuid::from_u128(0xeb020001_0000_0000_0000_000000000005); +// A host OS reporter whose only ereport is soft-deleted. +const HOST_ONLY_DELETED: Uuid = + Uuid::from_u128(0xeb020001_0000_0000_0000_000000000006); +// A host OS reporter with a mix of soft-deleted and live ereports, where the +// *earliest* ereport is soft-deleted. The restart ID is included, and +// `time_first_seen` must come from that earliest (soft-deleted) ereport. +const HOST_SOME_DELETED: Uuid = + Uuid::from_u128(0xeb020001_0000_0000_0000_000000000007); + +// Sled IDs for the host OS reporters (host ereports require a non-NULL +// `sled_id`, which is otherwise irrelevant to the test). +const SLED_A: Uuid = Uuid::from_u128(0xeb020002_0000_0000_0000_000000000001); +const SLED_B: Uuid = Uuid::from_u128(0xeb020002_0000_0000_0000_000000000002); +const SLED_C: Uuid = Uuid::from_u128(0xeb020002_0000_0000_0000_000000000003); +const SLED_D: Uuid = Uuid::from_u128(0xeb020002_0000_0000_0000_000000000004); +const SLED_E: Uuid = Uuid::from_u128(0xeb020002_0000_0000_0000_000000000005); + +// Some hard-coded timestamps that will parse as both `TIMESTAMPTZ` literals and +// via `str::parse::>`. +const T_05: &str = "2024-01-01T00:00:05Z"; +const T_10: &str = "2024-01-01T00:00:10Z"; +const T_15: &str = "2024-01-01T00:00:15Z"; +const T_20: &str = "2024-01-01T00:00:20Z"; +const T_25: &str = "2024-01-01T00:00:25Z"; +const T_30: &str = "2024-01-01T00:00:30Z"; +const T_40: &str = "2024-01-01T00:00:40Z"; +const T_50: &str = "2024-01-01T00:00:50Z"; +const T_60: &str = "2024-01-01T00:01:00Z"; +const T_70: &str = "2024-01-01T00:01:10Z"; +const T_120: &str = "2024-01-01T00:02:00Z"; +const T_180: &str = "2024-01-01T00:03:00Z"; +const T_240: &str = "2024-01-01T00:04:00Z"; +const T_270: &str = "2024-01-01T00:04:30Z"; +const T_300: &str = "2024-01-01T00:05:00Z"; + +fn ts(s: &str) -> DateTime { + s.parse().expect("test timestamp should be valid RFC 3339") +} + +pub(crate) fn checks() -> DataMigrationFns { + DataMigrationFns::new().before(before).after(after) +} + +fn before<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { + Box::pin(async move { + ctx.client + .batch_execute(&format!( + " + INSERT INTO omicron.public.ereport ( + restart_id, ena, time_collected, collector_id, report, + reporter, slot_type, slot, sled_id, time_deleted + ) VALUES + ('{SP_SLED}', 1, '{T_10}', '{COLLECTOR}', '{{}}', + 'sp', 'sled', 5, NULL, NULL), + + ('{SP_SWITCH}', 1, '{T_30}', '{COLLECTOR}', '{{}}', + 'sp', 'switch', 1, NULL, NULL), + ('{SP_SWITCH}', 2, '{T_10}', '{COLLECTOR}', '{{}}', + 'sp', 'switch', 1, NULL, NULL), + ('{SP_SWITCH}', 3, '{T_20}', '{COLLECTOR}', '{{}}', + 'sp', 'switch', 1, NULL, NULL), + + ('{HOST_NULL_THEN_SLOT}', 1, '{T_05}', '{COLLECTOR}', '{{}}', + 'host', 'sled', NULL, '{SLED_A}', NULL), + ('{HOST_NULL_THEN_SLOT}', 2, '{T_15}', '{COLLECTOR}', '{{}}', + 'host', 'sled', NULL, '{SLED_A}', NULL), + ('{HOST_NULL_THEN_SLOT}', 3, '{T_25}', '{COLLECTOR}', '{{}}', + 'host', 'sled', 7, '{SLED_A}', NULL), + + ('{HOST_SLOT_ALL_NULL}', 1, '{T_40}', '{COLLECTOR}', '{{}}', + 'host', 'sled', NULL, '{SLED_B}', NULL), + ('{HOST_SLOT_ALL_NULL}', 2, '{T_50}', '{COLLECTOR}', '{{}}', + 'host', 'sled', NULL, '{SLED_B}', NULL), + + ('{HOST_SLOT_THEN_NULL}', 1, '{T_60}', '{COLLECTOR}', '{{}}', + 'host', 'sled', 3, '{SLED_C}', NULL), + ('{HOST_SLOT_THEN_NULL}', 2, '{T_70}', '{COLLECTOR}', '{{}}', + 'host', 'sled', NULL, '{SLED_C}', NULL), + + ('{HOST_ONLY_DELETED}', 1, '{T_120}', '{COLLECTOR}', '{{}}', + 'host', 'sled', 4, '{SLED_D}', '{T_180}'), + + ('{HOST_SOME_DELETED}', 1, '{T_240}', '{COLLECTOR}', '{{}}', + 'host', 'sled', NULL, '{SLED_E}', '{T_300}'), + ('{HOST_SOME_DELETED}', 2, '{T_270}', '{COLLECTOR}', '{{}}', + 'host', 'sled', 8, '{SLED_E}', NULL); + " + )) + .await + .expect("failed to insert test ereports"); + }) +} + +fn after<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { + Box::pin(async move { + let rows = ctx + .client + .query( + &format!( + "SELECT id, reporter::text, slot_type::text, slot, + time_first_seen + FROM omicron.public.ereporter_restart + WHERE id IN ( + '{SP_SLED}', '{SP_SWITCH}', '{HOST_NULL_THEN_SLOT}', + '{HOST_SLOT_ALL_NULL}', '{HOST_SLOT_THEN_NULL}', + '{HOST_ONLY_DELETED}', '{HOST_SOME_DELETED}' + )" + ), + &[], + ) + .await + .expect("failed to query backfilled ereporter_restart rows"); + + assert_eq!(rows.len(), 7, "expected one row per restart ID"); + + let results: HashMap< + Uuid, + (String, String, Option, DateTime), + > = rows + .iter() + .map(|row| { + let id: Uuid = row.get("id"); + let reporter: String = row.get("reporter"); + let slot_type: String = row.get("slot_type"); + let slot: Option = row.get("slot"); + let time_first_seen: DateTime = row.get("time_first_seen"); + (id, (reporter, slot_type, slot, time_first_seen)) + }) + .collect(); + + // SP sled: reports its slot and (only) collection time. + let (reporter, slot_type, slot, first_seen) = &results[&SP_SLED]; + assert_eq!(reporter, "sp"); + assert_eq!(slot_type, "sled"); + assert_eq!(*slot, Some(5)); + assert_eq!(*first_seen, ts(T_10)); + + // SP switch: time_first_seen is the earliest of the three ereports. + let (reporter, slot_type, slot, first_seen) = &results[&SP_SWITCH]; + assert_eq!(reporter, "sp"); + assert_eq!(slot_type, "switch",); + assert_eq!(*slot, Some(1)); + assert_eq!( + *first_seen, + ts(T_10), + "time_first_seen should be the earliest time_collected of \ + multiple ereports" + ); + + // Host, slot number NULL then resolved. The slot should be set to the + // value from the non-NULL value, while time_first_seen comes from the + // earliest (NULL-slot) ereport. + let (reporter, slot_type, slot, first_seen) = + &results[&HOST_NULL_THEN_SLOT]; + assert_eq!(reporter, "host"); + assert_eq!(slot_type, "sled"); + assert_eq!( + *slot, + Some(7), + "a host's NULL slot should be backfilled from a later non-NULL one" + ); + assert_eq!( + *first_seen, + ts(T_05), + "time_first_seen should span the NULL-slot ereports" + ); + + // Host, all NULL: a row is created with a NULL slot. + let (reporter, slot_type, slot, first_seen) = + &results[&HOST_SLOT_ALL_NULL]; + assert_eq!(reporter, "host"); + assert_eq!(slot_type, "sled"); + assert_eq!( + *slot, None, + "a host reporter with no known slot should have a NULL slot" + ); + assert_eq!(*first_seen, ts(T_40), "all-NULL host time_first_seen"); + + // Host, slot known then NULL: the known slot is not clobbered. This is + // an edge case that shouldn't occur in production but which is + // nonetheless worth checking. + let (reporter, slot_type, slot, first_seen) = + &results[&HOST_SLOT_THEN_NULL]; + assert_eq!(reporter, "host"); + assert_eq!(slot_type, "sled"); + assert_eq!( + *slot, + Some(3), + "a known slot must not be clobbered by a later NULL one" + ); + assert_eq!( + *first_seen, + ts(T_60), + "known-then-NULL host time_first_seen" + ); + + // All ereports soft-deleted: there should still be an entry for the + // reporter. + let (reporter, slot_type, slot, first_seen) = + &results[&HOST_SOME_DELETED]; + assert_eq!(reporter, "host",); + assert_eq!(slot_type, "sled"); + assert_eq!(*slot, Some(4)); + assert_eq!( + *first_seen, + ts(T_240), + "time_first_seen should be from the soft-deleted ereport" + ); + + // Mixed deleted/live: included, with the slot recovered from the live + // ereport (8) and time_first_seen from the earliest ereport, even + // though that ereport is soft-deleted. + let (reporter, slot_type, slot, first_seen) = + &results[&HOST_SOME_DELETED]; + assert_eq!(reporter, "host",); + assert_eq!(slot_type, "sled"); + assert_eq!(*slot, Some(8)); + assert_eq!( + *first_seen, + ts(T_240), + "time_first_seen should span the soft-deleted earliest ereport" + ); + + // Clean up test data. + ctx.client + .batch_execute(&format!( + " + DELETE FROM omicron.public.ereporter_restart + WHERE id IN ( + '{SP_SLED}', '{SP_SWITCH}', '{HOST_NULL_THEN_SLOT}', + '{HOST_SLOT_ALL_NULL}', '{HOST_SLOT_THEN_NULL}', + '{HOST_ONLY_DELETED}', '{HOST_SOME_DELETED}' + ); + DELETE FROM omicron.public.ereport + WHERE restart_id IN ( + '{SP_SLED}', '{SP_SWITCH}', '{HOST_NULL_THEN_SLOT}', + '{HOST_SLOT_ALL_NULL}', '{HOST_SLOT_THEN_NULL}', + '{HOST_ONLY_DELETED}', '{HOST_SOME_DELETED}' + ); + " + )) + .await + .expect("failed to clean up ereporter-restart-order-v2 test data"); + }) +} diff --git a/nexus/tests/integration_tests/data_migrations/mod.rs b/nexus/tests/integration_tests/data_migrations/mod.rs index 3f8e9ae56f8..6f4f663e2fc 100644 --- a/nexus/tests/integration_tests/data_migrations/mod.rs +++ b/nexus/tests/integration_tests/data_migrations/mod.rs @@ -36,6 +36,7 @@ mod disk_types; mod drop_uninitialized_svc_enabled_not_online_state; mod ereport_everyone_gets_a_slot; mod ereport_trim_serial_trailing_nulls; +mod ereporter_restart_order_v2; mod fix_leaked_bp_oximeter_read_policy_rows; mod fix_session_token_column_order; mod inv_clear_mupdate_override; @@ -94,6 +95,7 @@ pub(crate) fn get_migration_checks() -> BTreeMap { register!(bgp_unnumbered_peer_cleanup); register!(ereport_trim_serial_trailing_nulls); register!(sled_resource_vmm_state); + register!(ereporter_restart_order_v2); map } From dd76be4e356a8f21eaedf80b836f54b0f06e7162 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 16 Jun 2026 09:34:48 -0700 Subject: [PATCH 16/29] update omdb command to use restart table as authortiative source --- dev-tools/omdb/src/bin/omdb/db/ereport.rs | 146 +++++++++++++--------- 1 file changed, 86 insertions(+), 60 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db/ereport.rs b/dev-tools/omdb/src/bin/omdb/db/ereport.rs index d9653772928..7f4dfb8eeef 100644 --- a/dev-tools/omdb/src/bin/omdb/db/ereport.rs +++ b/dev-tools/omdb/src/bin/omdb/db/ereport.rs @@ -9,7 +9,6 @@ use super::check_limit; use crate::Omdb; use crate::helpers::CONNECTION_OPTIONS_HEADING; use crate::helpers::const_max_len; -use crate::helpers::datetime_opt_rfc3339_concise; use crate::helpers::datetime_rfc3339_concise; use crate::helpers::display_option_blank; use crate::helpers::display_option_invalid; @@ -23,20 +22,24 @@ use chrono::Utc; use clap::Args; use clap::Subcommand; use diesel::AggregateExpressionMethods; -use diesel::dsl::{count, min}; +use diesel::dsl::{count, max}; use diesel::prelude::*; use internal_dns_types::names::ServiceName; use nexus_db_lookup::DbConnection; +use nexus_db_model::EreporterRestart; use nexus_db_model::ereport as model; use nexus_db_model::ereport::DbEna; use nexus_db_queries::db; use nexus_db_queries::db::DataStore; use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_db_schema::schema::ereport::dsl; +use nexus_db_schema::schema::ereporter_restart::dsl as restart_dsl; use nexus_types::fm::ereport::Ena; use nexus_types::fm::ereport::Reporter; use omicron_uuid_kinds::EreporterRestartUuid; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; +use std::collections::HashMap; use tabled::Tabled; use uuid::Uuid; @@ -143,9 +146,10 @@ struct ReportersArgs { slot_type: Option, #[clap(long = "slot", short = 's', requires = "slot_type")] - slot: Option, + want_slot: Option, - serial: Option, + #[clap(long = "serial", short = 'S')] + want_serial: Option, } #[derive( @@ -508,69 +512,73 @@ async fn cmd_db_ereporters( datastore: &DataStore, args: &ReportersArgs, ) -> anyhow::Result<()> { - let &ReportersArgs { slot, slot_type, ref serial } = args; + let &ReportersArgs { slot_type, want_slot, ref want_serial } = args; let slot_type = slot_type.map(nexus_db_model::SpType::from); let conn = datastore.pool_connection_for_tests().await?; - let reporters = (*conn).transaction_async({ - let serial = serial.clone(); - async move |conn| { - // Selecting all reporters may require a full table scan, depending - // on filters. + let (restarts, ereport_info) = (*conn) + .transaction_async(async move |conn| { + // The canonical list of reporter restarts comes from the + // `ereporter_restart` table. + let mut query = restart_dsl::ereporter_restart + .select(EreporterRestart::as_select()) + .into_boxed(); + if let Some(slot_type) = slot_type { + query = query.filter(restart_dsl::slot_type.eq(slot_type)); + } + if let Some(slot) = want_slot { + query = query + .filter(restart_dsl::slot.eq(db::model::SqlU16::new(slot))); + } + let restarts = query + .load_async::(&conn) + .await + .context("listing ereporter restarts")?; + + // The `ereporter_restart` table doesn't record the reporter's sled + // ID, VPD identity, or ereport count, so gather those from the + // `ereport` table, grouped by restart ID. The sled ID is part of + // the `GROUP BY` (rather than aggregated) since it is constant for + // a given restart ID; the VPD identity may start out NULL and be + // filled in later, so we take its `MAX` (ignoring NULLs) as we do + // for the slot number. This may require a full table scan. conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?; - let mut query = dsl::ereport - .group_by(( - dsl::restart_id, - dsl::reporter, - dsl::sled_id, - dsl::slot_type, - dsl::slot, - dsl::serial_number, - dsl::part_number - )) + let ereport_info = dsl::ereport + .group_by((dsl::restart_id, dsl::sled_id)) .select(( dsl::restart_id, - model::Reporter::as_select(), - dsl::serial_number, - dsl::part_number, - min(dsl::time_collected), + dsl::sled_id, + max(dsl::serial_number), + max(dsl::part_number), count(dsl::ena).aggregate_distinct(), )) - .into_boxed(); - - if let Some(slot) = slot { - if slot_type.is_some() { - query = query - .filter(dsl::slot.eq(db::model::SqlU16::new(slot))); - } else { - anyhow::bail!( - "cannot filter reporters by slot without a value for `--type`" - ) - } - } - - if let Some(slot_type) = slot_type { - query = query - .filter(dsl::slot_type.eq(slot_type)); - } + .load_async::<( + Uuid, + Option, + Option, + Option, + i64, + )>(&conn) + .await + .context("querying ereport metadata by restart ID")?; - if let Some(serial) = serial { - query = query.filter(dsl::serial_number.eq(serial.clone())); - } + Ok::<_, anyhow::Error>((restarts, ereport_info)) + }) + .await?; - query - .load_async::<(Uuid, model::Reporter, Option, Option, Option>, i64)>( - &conn, - ) - .await.context("listing reporter entries") - } - }).await?; + // Index the per-restart `ereport` metadata by restart ID. + let mut ereport_info = ereport_info + .into_iter() + .map(|(id, sled_id, serial, part_number, ereports)| { + (id, (sled_id, serial, part_number, ereports)) + }) + .collect::>(); #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] struct ReporterRow { - #[tabled(display_with = "datetime_opt_rfc3339_concise")] - first_seen: Option>, + #[tabled(display_with = "datetime_rfc3339_concise")] + first_seen: DateTime, id: Uuid, #[tabled(display_with = "display_option_invalid")] identity: Option, @@ -581,27 +589,45 @@ async fn cmd_db_ereporters( ereports: i64, } - let mut rows = reporters + let mut rows = restarts .into_iter() - .map(|(id, reporter, serial, part_number, first_seen, ereports)| { - let identity = match reporter.try_into() { + .filter_map(|restart| { + let id = restart.id.into_untyped_uuid(); + let (sled_id, serial, part_number, ereports) = + ereport_info.remove(&id).unwrap_or((None, None, None, 0)); + // If we are filtering by serial number, skip any that don't match. + if want_serial.is_some() && serial.as_ref() != want_serial.as_ref() + { + return None; + } + // The reporter identity combines the restart's location (from + // `ereporter_restart`) with the sled ID (from `ereport`). + let identity = match (model::Reporter { + reporter: restart.reporter, + sled_id: sled_id + .map(|sled| SledUuid::from_untyped_uuid(sled).into()), + slot_type: restart.slot_type, + slot: restart.slot, + }) + .try_into() + { Ok(reporter) => Some(reporter), Err(err) => { eprintln!( - "error: encounted an invalid reporter entry for \ + "error: encountered an invalid reporter entry for \ reporter ID {id}: {err}", ); None } }; - ReporterRow { - first_seen, + Some(ReporterRow { + first_seen: restart.time_first_seen, id, identity, serial, part_number, ereports, - } + }) }) .collect::>(); rows.sort_by_key(|row| row.first_seen); From 30d72f2c6b098fd3c640adbd52a6b5c09dcc7e7d Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 16 Jun 2026 10:23:40 -0700 Subject: [PATCH 17/29] issue warnings for ereports with unknown restart IDs --- nexus/fm/src/analysis_input.rs | 5 ++- nexus/src/app/background/tasks/fm_analysis.rs | 40 ++++++++++++------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/nexus/fm/src/analysis_input.rs b/nexus/fm/src/analysis_input.rs index 8bb76a0fd6f..c81697293d9 100644 --- a/nexus/fm/src/analysis_input.rs +++ b/nexus/fm/src/analysis_input.rs @@ -176,8 +176,9 @@ impl Builder { self.ereporter_restarts.extend(restarts) } - pub fn num_ereporter_restarts(&self) -> usize { - self.ereporter_restarts.len() + /// Borrows the map of known ereport reporter restart IDs. + pub fn ereporter_restarts(&self) -> &IdOrdMap { + &self.ereporter_restarts } /// Finish constructing the [`Input`] and return it, along with a [`Report`] diff --git a/nexus/src/app/background/tasks/fm_analysis.rs b/nexus/src/app/background/tasks/fm_analysis.rs index ea326b2b532..50b35f22ec3 100644 --- a/nexus/src/app/background/tasks/fm_analysis.rs +++ b/nexus/src/app/background/tasks/fm_analysis.rs @@ -267,7 +267,7 @@ impl FmAnalysis { slog::debug!( opctx.log, "loaded {} ereporter restarts (in {nbatches} batches)", - builder.num_ereporter_restarts(), + builder.ereporter_restarts().len(), ); Ok(()) @@ -295,21 +295,31 @@ impl FmAnalysis { (e.restart_id.into_untyped_uuid(), e.ena) }); let loaded = batch.len(); + let mut invalid = 0; - builder.add_unmarked_ereports(batch.into_iter().filter_map( - |ereport| { - let ereport = match fm::Ereport::try_from(ereport) { - Ok(ereport) => ereport, - Err(e) => { - invalid += 1; - warnings.push(e.to_string()); - return None; - } - }; - - Some(ereport) - }, - )); + for ereport in batch { + let ereport = match fm::Ereport::try_from(ereport) { + Ok(ereport) => ereport, + Err(e) => { + invalid += 1; + warnings.push(e.to_string()); + continue; + } + }; + + // Check if this is a reporter we know about, and issue a + // warning if it is not. + let id = ereport.id(); + if !builder.ereporter_restarts().contains_key(&id.restart_id) { + let msg = format!( + "ereport {id} has a restart ID not contained in the \ + `ereporter_restart` table" + ); + slog::warn!(&opctx.log, "{msg}"); + warnings.push(msg); + } + builder.add_unmarked_ereports(std::iter::once(ereport)); + } let total = builder.num_ereports(); let new = total - prev_total; From 6b157b243ff737423e6a54aeb88f20d96ea5d64b Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 16 Jun 2026 10:27:16 -0700 Subject: [PATCH 18/29] post merge test fixup --- nexus/db-queries/src/db/datastore/fm.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nexus/db-queries/src/db/datastore/fm.rs b/nexus/db-queries/src/db/datastore/fm.rs index b538955de08..0be78fd47b5 100644 --- a/nexus/db-queries/src/db/datastore/fm.rs +++ b/nexus/db-queries/src/db/datastore/fm.rs @@ -2399,6 +2399,7 @@ mod tests { let input_report = InputReport { parent_sitrep_id: sitrep.parent_id(), parent_inv_id: None, + num_ereporter_restarts: 0, inv_id: sitrep.inv_id(), new_ereport_ids: Default::default(), open_cases: Default::default(), @@ -3014,6 +3015,7 @@ mod tests { new_ereport_ids: Default::default(), open_cases: Default::default(), closed_cases_copied_forward: Default::default(), + num_ereporter_restarts: 0, }; let analysis_report = AnalysisReport { sitrep_id: ghost_sitrep_id, From c063bd265b750c0ca7815c76a7530e89a40cf76c Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 16 Jun 2026 10:50:09 -0700 Subject: [PATCH 19/29] nexus-fm now depends on a db type sigh --- Cargo.lock | 1 + nexus/fm/Cargo.toml | 3 +++ nexus/fm/build.rs | 9 +++++++++ 3 files changed, 13 insertions(+) create mode 100644 nexus/fm/build.rs diff --git a/Cargo.lock b/Cargo.lock index 02a6e723877..d178e36a5fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7209,6 +7209,7 @@ dependencies = [ "nexus-db-model", "nexus-reconfigurator-planning", "nexus-types", + "omicron-rpaths", "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", diff --git a/nexus/fm/Cargo.toml b/nexus/fm/Cargo.toml index 3a1b393f07c..7f6545ee698 100644 --- a/nexus/fm/Cargo.toml +++ b/nexus/fm/Cargo.toml @@ -13,6 +13,9 @@ testing = [ "ereport-types", ] +[build-dependencies] +omicron-rpaths.workspace = true + [dependencies] anyhow.workspace = true chrono.workspace = true diff --git a/nexus/fm/build.rs b/nexus/fm/build.rs new file mode 100644 index 00000000000..dbdb6a3c8b0 --- /dev/null +++ b/nexus/fm/build.rs @@ -0,0 +1,9 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +fn main() { + // See omicron-rpaths for documentation. NOTE: This file MUST be kept in + // sync with the other build.rs files in this repository. + omicron_rpaths::configure_default_omicron_rpaths(); +} From 6269baa4af35cf9e1097fbe9815da18b96209ae2 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 16 Jun 2026 11:17:31 -0700 Subject: [PATCH 20/29] restart query will also do a full scan --- dev-tools/omdb/src/bin/omdb/db/ereport.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev-tools/omdb/src/bin/omdb/db/ereport.rs b/dev-tools/omdb/src/bin/omdb/db/ereport.rs index 7f4dfb8eeef..6982e323a3b 100644 --- a/dev-tools/omdb/src/bin/omdb/db/ereport.rs +++ b/dev-tools/omdb/src/bin/omdb/db/ereport.rs @@ -518,6 +518,8 @@ async fn cmd_db_ereporters( let conn = datastore.pool_connection_for_tests().await?; let (restarts, ereport_info) = (*conn) .transaction_async(async move |conn| { + conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?; + // The canonical list of reporter restarts comes from the // `ereporter_restart` table. let mut query = restart_dsl::ereporter_restart @@ -542,7 +544,6 @@ async fn cmd_db_ereporters( // a given restart ID; the VPD identity may start out NULL and be // filled in later, so we take its `MAX` (ignoring NULLs) as we do // for the slot number. This may require a full table scan. - conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await?; let ereport_info = dsl::ereport .group_by((dsl::restart_id, dsl::sled_id)) .select(( From efe895f464240ab506165389173934c22c909737 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 16 Jun 2026 11:50:31 -0700 Subject: [PATCH 21/29] you have to actually get the rpaths thing correct lol --- Cargo.lock | 1 + nexus/fm/Cargo.toml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index d178e36a5fe..011e2dee785 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7213,6 +7213,7 @@ dependencies = [ "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", + "pq-sys", "rand 0.9.2", "serde", "serde_json", diff --git a/nexus/fm/Cargo.toml b/nexus/fm/Cargo.toml index 7f6545ee698..e5c6f63109f 100644 --- a/nexus/fm/Cargo.toml +++ b/nexus/fm/Cargo.toml @@ -23,6 +23,8 @@ iddqd.workspace = true nexus-types.workspace = true nexus-db-model.workspace = true omicron-uuid-kinds.workspace = true +# See omicron-rpaths for more about the "pq-sys" dependency. +pq-sys = "*" rand.workspace = true serde.workspace = true serde_json.workspace = true From 8b0140533821c485f0a8647cc24128dcb5b7d030 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 16 Jun 2026 12:43:35 -0700 Subject: [PATCH 22/29] fix typo in migration test --- .../data_migrations/ereporter_restart_order_v2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs b/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs index 46a363f9e78..a967d8da8f1 100644 --- a/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs +++ b/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs @@ -227,7 +227,7 @@ fn after<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { // All ereports soft-deleted: there should still be an entry for the // reporter. let (reporter, slot_type, slot, first_seen) = - &results[&HOST_SOME_DELETED]; + &results[&HOST_ONLY_DELETED]; assert_eq!(reporter, "host",); assert_eq!(slot_type, "sled"); assert_eq!(*slot, Some(4)); From 0b96394f0c95a06339e6e1713da0cd5f7dd89731 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 16 Jun 2026 12:49:23 -0700 Subject: [PATCH 23/29] some commentary improvements --- nexus/db-queries/src/db/datastore/ereport.rs | 25 ++++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index d5230b77b77..1951ab3873b 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -305,6 +305,9 @@ impl DataStore { .select((dsl::restart_id, dsl::ena)) } + /// Inserts the provided tranche of `ereports` into the `ereport` table, and + /// potentially updates the `ereporter_restart` table if the restart ID of + /// the inserted ereports has not been seen before. pub async fn ereports_insert( &self, opctx: &OpContext, @@ -353,11 +356,22 @@ impl DataStore { /// the restart history table if the restart ID of the provided ereports is /// not already present. /// - /// This is performed in a single atomic CTE, in order to ensure that the - /// earliest `time_collected` timestamp in the `ereports` table for a given - /// restart ID will always match the `time_first_seen` timestamp in the - /// restart history table, even if Nexus crashes midway through inserting - /// ereports. + /// The two operations of inserting the ereports into the `ereport` table + /// and inserting an entry into the `ereporter_restart` table for the + /// restart ID are performed in a single atomic CTE. It's necessary for + /// these two operations to be done atomically, because if they are + /// non-atomic, it is possible for a Nexus to die, or go out to lunch for a + /// bit, between inserting the ereport records and creating the + /// `ereporter_restart` record for a tranche of ereports from a not-yet-seen + /// restart ID. If this happens, another Nexus could insert some more + /// ereports from that restart ID, and try to put something in the restart + /// ID table with a timestamp that is later than the actual first time + /// ereports from that ID were observed. + /// + /// The SQL generated here is output to + /// `tests/output/ereports_insert_host.sql` and + /// `tests/output/ereports_insert_sp.sql`, for host-OS and SP ereports, + /// respectively. fn ereports_insert_query( restart_id: EreporterRestartUuid, time_collected: chrono::DateTime, @@ -436,6 +450,7 @@ impl DataStore { .on_conflict((dsl::restart_id, dsl::ena)) .do_nothing() .returning(dsl::ena); + // Query fragment to insert the reporter restart entry into the // ereporter restart table, or update the existing entry's slot column // if one exists and the slot column is null. The null behavior will be From 4de2c50b2a7c3d9dff269ce232f87524994f6abd Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 16 Jun 2026 12:50:46 -0700 Subject: [PATCH 24/29] remove some dead code This was intended for use by `omdb`, but it's doing its own slightly different thing. --- nexus/db-queries/src/db/datastore/ereport.rs | 93 -------------------- 1 file changed, 93 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index 1951ab3873b..eaff9d51b57 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -14,8 +14,6 @@ use crate::db::model::EreporterType; use crate::db::model::SpMgsSlot; use crate::db::model::SpType; use crate::db::model::SqlU16; -use crate::db::model::SqlU32; -use crate::db::model::ereport as model; use crate::db::model::ereport::DbEna; use crate::db::pagination::{paginated, paginated_multicolumn}; use crate::db::raw_query_builder::QueryBuilder; @@ -23,8 +21,6 @@ use crate::db::raw_query_builder::TypedSqlQuery; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::DateTime; use chrono::Utc; -use diesel::AggregateExpressionMethods; -use diesel::dsl::{count, min}; use diesel::pg::Pg; use diesel::prelude::*; use diesel::query_builder::*; @@ -50,14 +46,6 @@ use uuid::Uuid; type EreportIdTuple = (Uuid, DbEna); -#[derive(Clone, Debug)] -pub struct EreporterRestartBySerial { - pub id: EreporterRestartUuid, - pub first_seen_at: DateTime, - pub reporter_kind: fm::Reporter, - pub ereports: u32, -} - impl DataStore { /// Fetch an ereport by its restart ID and ENA. /// @@ -177,62 +165,6 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - /// List unique ereporter restarts for the given serial number. - pub async fn ereporter_restart_list_by_serial( - &self, - opctx: &OpContext, - serial: String, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - let conn = &*self.pool_connection_authorized(opctx).await?; - let rows = Self::restart_list_by_serial_query(serial.clone()) - .load_async::<(Uuid, model::Reporter, Option>, SqlU32)>( - conn, - ) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - - let restarts = rows.into_iter().map(|(restart_id, reporter, first_seen, ereports)| { - let first_seen_at = first_seen.expect( - "`min(time_collected)` should never return `NULL`, since the \ - `time_collected` column is not nullable, and the `SELECT` clause \ - should return nothing if the result set is empty" - ); - EreporterRestartBySerial { - id: EreporterRestartUuid::from_untyped_uuid(restart_id), - reporter_kind: reporter.try_into().unwrap(), - first_seen_at, - ereports: ereports.into(), - } - }).collect(); - - Ok(restarts) - } - - fn restart_list_by_serial_query( - serial: String, - ) -> impl RunnableQuery<(Uuid, model::Reporter, Option>, SqlU32)> - { - dsl::ereport - .filter(dsl::serial_number.eq(serial.clone())) - .filter(dsl::time_deleted.is_null()) - .group_by(( - dsl::restart_id, - dsl::reporter, - dsl::slot_type, - dsl::slot, - dsl::sled_id, - )) - .select(( - dsl::restart_id, - model::Reporter::as_select(), - min(dsl::time_collected), - count(dsl::ena).aggregate_distinct(), - )) - .order_by(dsl::restart_id) - } - pub async fn latest_ereport_id( &self, opctx: &OpContext, @@ -706,31 +638,6 @@ mod tests { logctx.cleanup_successful(); } - #[tokio::test] - async fn explain_restart_list_by_serial() { - let logctx = dev::test_setup_log("explain_restart_list_by_serial"); - let db = TestDatabase::new_with_pool(&logctx.log).await; - let pool = db.pool(); - let conn = pool.claim().await.unwrap(); - - let query = DataStore::restart_list_by_serial_query(String::new()); - let explanation = query - .explain_async(&conn) - .await - .expect("Failed to explain query - is it valid SQL?"); - - eprintln!("{explanation}"); - - assert!( - !explanation.contains("FULL SCAN"), - "Found an unexpected FULL SCAN: {}", - explanation - ); - - db.terminate().await; - logctx.cleanup_successful(); - } - #[tokio::test] async fn explain_ereport_fetch_matching_default() { explain_fetch_matching_query( From 875fc5b5b43f8685798cfe5e6b972b2337c01bb9 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 16 Jun 2026 14:32:09 -0700 Subject: [PATCH 25/29] BLERGH THERES EVEN MORE COPY PASTE MISTAKES IN HERE --- .../data_migrations/ereporter_restart_order_v2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs b/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs index a967d8da8f1..8b6e5fbb0f2 100644 --- a/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs +++ b/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs @@ -233,7 +233,7 @@ fn after<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { assert_eq!(*slot, Some(4)); assert_eq!( *first_seen, - ts(T_240), + ts(T_180), "time_first_seen should be from the soft-deleted ereport" ); From 59cc9fba273d54244e294200076418ce1dd28baf Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 16 Jun 2026 14:39:35 -0700 Subject: [PATCH 26/29] UGH I TYPED THE WRONG ONE KMS KMS KMS KMS --- .../data_migrations/ereporter_restart_order_v2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs b/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs index 8b6e5fbb0f2..b3a731a8f54 100644 --- a/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs +++ b/nexus/tests/integration_tests/data_migrations/ereporter_restart_order_v2.rs @@ -233,7 +233,7 @@ fn after<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { assert_eq!(*slot, Some(4)); assert_eq!( *first_seen, - ts(T_180), + ts(T_120), "time_first_seen should be from the soft-deleted ereport" ); From f9f2e3482ce00d8802aacb54399a86d83c9c3486 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 16 Jun 2026 16:12:51 -0700 Subject: [PATCH 27/29] fixup omdb db ereport reporters arguments, some expectorates --- dev-tools/omdb/src/bin/omdb/db/ereport.rs | 10 +++++----- dev-tools/omdb/tests/successes.out | 2 ++ dev-tools/omdb/tests/usage_errors.out | 6 ++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db/ereport.rs b/dev-tools/omdb/src/bin/omdb/db/ereport.rs index 6982e323a3b..f6ef520bb8c 100644 --- a/dev-tools/omdb/src/bin/omdb/db/ereport.rs +++ b/dev-tools/omdb/src/bin/omdb/db/ereport.rs @@ -146,10 +146,10 @@ struct ReportersArgs { slot_type: Option, #[clap(long = "slot", short = 's', requires = "slot_type")] - want_slot: Option, + slot: Option, - #[clap(long = "serial", short = 'S')] - want_serial: Option, + #[clap(long = "serial")] + serial: Option, } #[derive( @@ -512,7 +512,7 @@ async fn cmd_db_ereporters( datastore: &DataStore, args: &ReportersArgs, ) -> anyhow::Result<()> { - let &ReportersArgs { slot_type, want_slot, ref want_serial } = args; + let &ReportersArgs { slot_type, slot, serial: ref want_serial } = args; let slot_type = slot_type.map(nexus_db_model::SpType::from); let conn = datastore.pool_connection_for_tests().await?; @@ -528,7 +528,7 @@ async fn cmd_db_ereporters( if let Some(slot_type) = slot_type { query = query.filter(restart_dsl::slot_type.eq(slot_type)); } - if let Some(slot) = want_slot { + if let Some(slot) = slot { query = query .filter(restart_dsl::slot.eq(db::model::SqlU16::new(slot))); } diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 11bbb64a8ab..7e8fe461c35 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -710,6 +710,7 @@ task: "fm_analysis" preparation report: parent sitrep: inventory collection: ..................... + total known ereport restart IDs: 0 no new ereports since the parent sitrep no cases copied forward @@ -1401,6 +1402,7 @@ task: "fm_analysis" preparation report: parent sitrep: inventory collection: ..................... + total known ereport restart IDs: 0 no new ereports since the parent sitrep no cases copied forward diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index c17b9c83901..96b9fcc4801 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -650,15 +650,13 @@ termination: Exited(0) stdout: List ereport reporters -Usage: omdb db ereport reporters [OPTIONS] [SERIAL] - -Arguments: - [SERIAL] +Usage: omdb db ereport reporters [OPTIONS] Options: --log-level log level filter [env: LOG_LEVEL=] [default: warn] -t, --type -s, --slot + --serial --color Color output [default: auto] [possible values: auto, always, never] -h, --help Print help From 8376b52a56c01525fa5f3f4f05ce7489c25df2a0 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Thu, 18 Jun 2026 09:41:59 -0700 Subject: [PATCH 28/29] post merge expectoration --- Cargo.lock | 3 +-- dev-tools/omdb/tests/successes.out | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c69fec9da9..8bfd9ca8f80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7161,12 +7161,11 @@ dependencies = [ "ereport-types", "iddqd", "nexus-db-model", - "nexus-inventory", "nexus-reconfigurator-planning", "nexus-types", - "omicron-rpaths", "omicron-common", + "omicron-rpaths", "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index bb998a07066..6aeac9f1049 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -710,8 +710,8 @@ task: "fm_analysis" preparation report: parent sitrep: ..................... inventory collection: ..................... - total known ereport restart IDs: 0 + total known ereport restart IDs: 0 no new ereports since the parent sitrep no cases copied forward @@ -1409,8 +1409,8 @@ task: "fm_analysis" preparation report: parent sitrep: ..................... inventory collection: ..................... - total known ereport restart IDs: 0 + total known ereport restart IDs: 0 no new ereports since the parent sitrep no cases copied forward From 3c289b5bf4f064ff3d1e272b04db675e6d9f4990 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Thu, 18 Jun 2026 12:00:22 -0700 Subject: [PATCH 29/29] comment explaining ereports insert return value --- nexus/db-queries/src/db/datastore/ereport.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index eaff9d51b57..a23b89d7519 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -240,6 +240,21 @@ impl DataStore { /// Inserts the provided tranche of `ereports` into the `ereport` table, and /// potentially updates the `ereporter_restart` table if the restart ID of /// the inserted ereports has not been seen before. + /// + /// # Returns + /// + /// This function returns a tuple containing the number of new `ereport` + /// rows that were created, along with the newest [`EreportId`] for the same + /// reporter index as the inserted ereports. + /// + /// The returned ereport ID is intended to provide the caller with the + /// latest ENA to use in a subsequent request to ingest ereports from the + /// same reporter. In some cases, it may: + /// + /// - be a newer ENA than the highest ENA in the inserted ereports, if + /// additional ereports were inserted concurrently + /// - have a different restart ID than the one provided, if ereports from + /// a newer restart of that reporter were inserted concurrently pub async fn ereports_insert( &self, opctx: &OpContext,