From eb867a459820e75c2dd5c4baf9f12647f0e3fbdc Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Thu, 23 Jun 2022 18:02:15 +0000 Subject: [PATCH 01/15] Add stat framework to Propolis --- Cargo.toml | 4 ++ cli/src/main.rs | 9 ++++ client/src/api.rs | 3 ++ propolis/Cargo.toml | 1 + propolis/src/block/crucible.rs | 7 ++- server/Cargo.toml | 4 ++ server/src/lib/initializer.rs | 3 ++ server/src/lib/lib.rs | 1 + server/src/lib/server.rs | 83 ++++++++++++++++++++++++++++++- server/src/main.rs | 9 +++- server/tests/integration_tests.rs | 9 +++- 11 files changed, 127 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d4708b81c..87057993b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,3 +25,7 @@ exclude = [ panic = "abort" [profile.release] panic = "abort" + +# Development +[patch."https://github.com/oxidecomputer/crucible"] +crucible = { path = "../stock/upstairs" } diff --git a/cli/src/main.rs b/cli/src/main.rs index 740e64885..8f09d3edc 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -69,6 +69,10 @@ enum Command { // cloud_init ISO file #[clap(long, action)] cloud_init: Option, + + /// Enable metrics with Oximeter + #[clap(long, action)] + metrics: bool, }, /// Get the properties of a propolis instance @@ -149,6 +153,7 @@ async fn new_instance( memory: u64, disks: Vec, cloud_init_bytes: Option, + metrics: bool, ) -> anyhow::Result<()> { let properties = InstanceProperties { id, @@ -169,6 +174,7 @@ async fn new_instance( disks, migrate: None, cloud_init_bytes, + metrics, }; // Try to create the instance @@ -405,6 +411,7 @@ async fn migrate_instance( src_uuid, }), cloud_init_bytes: None, + metrics: false, }; // Get the source instance ready @@ -475,6 +482,7 @@ async fn main() -> anyhow::Result<()> { memory, crucible_disks, cloud_init, + metrics, } => { let disks = if let Some(crucible_disks) = crucible_disks { parse_json_file(&crucible_disks)? @@ -494,6 +502,7 @@ async fn main() -> anyhow::Result<()> { memory, disks, cloud_init_bytes, + metrics, ) .await? } diff --git a/client/src/api.rs b/client/src/api.rs index 5e463d8c1..67e0bcae0 100644 --- a/client/src/api.rs +++ b/client/src/api.rs @@ -33,6 +33,9 @@ pub struct InstanceEnsureRequest { // base64 encoded cloud-init ISO pub cloud_init_bytes: Option, + + // If the instance should post metrics to Oximeter + pub metrics: bool, } #[derive(Clone, Deserialize, Serialize, JsonSchema)] diff --git a/propolis/Cargo.toml b/propolis/Cargo.toml index ef7873534..b2515e5c8 100644 --- a/propolis/Cargo.toml +++ b/propolis/Cargo.toml @@ -22,6 +22,7 @@ usdt = { version = "0.3.2", default-features = false } tokio = { version = "1", features = ["full"] } futures = "0.3" crucible = { git = "https://github.com/oxidecomputer/crucible", rev = "8314eeddd228ec0d76cefa40c4a41d3e2611ac18", optional = true } +oximeter = { git = "https://github.com/oxidecomputer/omicron", branch = "main" } anyhow = "1" slog = "2.7" serde = { version = "1" } diff --git a/propolis/src/block/crucible.rs b/propolis/src/block/crucible.rs index 673d399cc..5ea554090 100644 --- a/propolis/src/block/crucible.rs +++ b/propolis/src/block/crucible.rs @@ -14,6 +14,7 @@ use crucible::{ crucible_bail, BlockIO, Buffer, CrucibleError, Volume, VolumeConstructionRequest, }; +use oximeter::types::ProducerRegistry; /// Helper function, because Rust couldn't derive the types fn map_crucible_error_to_io(x: CrucibleError) -> std::io::Error { @@ -34,8 +35,9 @@ impl CrucibleBackend { gen: u64, request: VolumeConstructionRequest, read_only: bool, + producer_registry: Arc>>, ) -> Result> { - CrucibleBackend::_create(gen, request, read_only) + CrucibleBackend::_create(gen, request, read_only, producer_registry) .map_err(map_crucible_error_to_io) } @@ -43,12 +45,13 @@ impl CrucibleBackend { gen: u64, request: VolumeConstructionRequest, read_only: bool, + producer_registry: Arc>>, ) -> anyhow::Result, crucible::CrucibleError> { // XXX Crucible uses std::sync::mpsc::Receiver, not // tokio::sync::mpsc::Receiver, so use tokio::task::block_in_place here. // Remove that when Crucible changes over to the tokio mpsc. let volume = Arc::new(tokio::task::block_in_place(|| { - Volume::construct(request) + Volume::construct(request, producer_registry) })?); volume.activate(gen)?; diff --git a/server/Cargo.toml b/server/Cargo.toml index e7be03772..3035b1715 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -22,6 +22,7 @@ async-trait = "0.1.53" bit_field = "0.10.1" bitvec = "1.0" bytes = "1.1" +chrono = { version = "0.4.19", features = [ "serde" ] } clap = { version = "3.2", features = ["derive"] } const_format = "0.2" dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main" } @@ -29,6 +30,9 @@ erased-serde = "0.3" futures = "0.3" hyper = "0.14" num_enum = "0.5" +omicron-common = { git = "https://github.com/oxidecomputer/omicron", branch = "main" } +oximeter-producer = { git = "https://github.com/oxidecomputer/omicron", branch = "main" } +oximeter = { git = "https://github.com/oxidecomputer/omicron", branch = "main" } ron = "0.7" thiserror = "1.0" tokio = { version = "1", features = ["full"] } diff --git a/server/src/lib/initializer.rs b/server/src/lib/initializer.rs index 03099162d..f1ea2118d 100644 --- a/server/src/lib/initializer.rs +++ b/server/src/lib/initializer.rs @@ -4,6 +4,7 @@ use std::num::NonZeroUsize; use std::sync::Arc; use std::time::SystemTime; +use oximeter::types::ProducerRegistry; use propolis::block; use propolis::chardev::{self, BlockingSource, Source}; use propolis::common::PAGE_SIZE; @@ -294,12 +295,14 @@ impl<'a> MachineInitializer<'a> { chipset: &RegisteredChipset, disk: &propolis_client::api::DiskRequest, bdf: pci::Bdf, + producer_registry: Arc>>, ) -> Result<(), Error> { info!(self.log, "Creating Crucible disk from {:#?}", disk); let be = propolis::block::CrucibleBackend::create( disk.gen, disk.volume_construction_request.clone(), disk.read_only, + producer_registry, )?; info!(self.log, "Creating ChildRegister"); diff --git a/server/src/lib/lib.rs b/server/src/lib/lib.rs index cb2386b2c..63ec57740 100644 --- a/server/src/lib/lib.rs +++ b/server/src/lib/lib.rs @@ -6,4 +6,5 @@ mod initializer; mod migrate; mod serial; pub mod server; +pub mod stats; pub mod vnc; diff --git a/server/src/lib/server.rs b/server/src/lib/server.rs index 3594cebe3..80e61f425 100644 --- a/server/src/lib/server.rs +++ b/server/src/lib/server.rs @@ -11,9 +11,11 @@ use hyper::upgrade::{self, Upgraded}; use hyper::{header, Body, Response, StatusCode}; use propolis::hw::qemu::ramfb::RamFb; use rfb::server::VncServer; +use oximeter::types::ProducerRegistry; use slog::{error, info, o, Logger}; use std::borrow::Cow; use std::io::{Error, ErrorKind}; +use std::net::SocketAddr; use std::ops::Range; use std::sync::Arc; use thiserror::Error; @@ -36,6 +38,7 @@ use propolis_client::api; use crate::config::Config; use crate::initializer::{build_instance, MachineInitializer}; use crate::serial::Serial; +use crate::stats::{prop_oximeter, PropCountStat, PropStatOuter}; use crate::vnc::PropolisVncServer; use crate::{migrate, vnc}; @@ -87,6 +90,13 @@ pub(crate) struct InstanceContext { serial_task: Option, } + +#[derive(Debug, Clone)] +pub struct InstanceMetrics { + pub(crate) producer_registry: Arc>>, + pub(crate) pso: Arc>>, +} + /// Contextual information accessible from HTTP callbacks. pub struct Context { pub(crate) context: Mutex>, @@ -94,6 +104,9 @@ pub struct Context { config: Config, log: Logger, pub(crate) vnc_server: Arc>>, + // To register with Oximeter, we need to know our own address. + pub(crate) propolis_addr: SocketAddr, + pub instance_metrics: InstanceMetrics, } impl Context { @@ -102,13 +115,21 @@ impl Context { config: Config, vnc_server: VncServer, log: Logger, + propolis_addr: SocketAddr, ) -> Self { + + let instance_metrics = InstanceMetrics { + producer_registry: Arc::new(Mutex::new(None)), + pso: Arc::new(Mutex::new(None)), + }; Context { context: Mutex::new(None), migrate_task: Mutex::new(None), config, log, vnc_server: Arc::new(Mutex::new(vnc_server)), + propolis_addr, + instance_metrics, } } } @@ -234,6 +255,55 @@ async fn instance_ensure( })); } + // Determine if we need to setup the metrics endpoint or not. + if request.metrics { + let prop_count_stat = PropCountStat::new(properties.id.clone()); + let pso = PropStatOuter { + prop_stat_wrap: + Arc::new(std::sync::Mutex::new(prop_count_stat)) + }; + let mut lpso = server_context.instance_metrics.pso.lock().await; + assert!(lpso.is_none()); + *lpso = Some(pso.clone()); + drop(lpso); + + // This is the address where stats will be collected. + let la = server_context.propolis_addr.ip(); + let listen_addr = SocketAddr::new(la, 0); + // XXX if registration fails here, do we give up forever? + match prop_oximeter( + properties.id.clone(), + listen_addr, + rqctx.log.clone() + ).await { + Err(e) => { + error!(rqctx.log, "Failed to register with Oximeter {:?}", e); + }, + Ok(server) => { + info!( + rqctx.log, + "registering metrics with instance uuid: {}", + properties.id, + ); + server.registry().register_producer(pso.clone()).unwrap(); + let mut producer_registry = + server_context. + instance_metrics. + producer_registry. + lock().await; + assert!(producer_registry.is_none()); + *producer_registry = Some(server.registry().clone()); + drop(producer_registry); + // Spawn the metric endpoint. + tokio::spawn(async move { + server.serve_forever().await.unwrap(); + }); + } + } + } else { + info!(rqctx.log, "No metrics registration was requested"); + } + const MB: usize = 1024 * 1024; const GB: usize = 1024 * 1024 * 1024; let memsize = properties.memory as usize * MB; @@ -319,7 +389,12 @@ async fn instance_ensure( ) })?; - init.initialize_crucible(&chipset, disk, bdf)?; + init.initialize_crucible( + &chipset, + disk, + bdf, + server_context.instance_metrics.producer_registry.clone(), + )?; info!(rqctx.log, "Disk {} created successfully", disk.name); } @@ -645,6 +720,12 @@ async fn instance_state_put( HttpError::for_internal_error(format!("Failed to set state: {:?}", err)) })?; + let server_context = rqctx.context(); + let pso = server_context.instance_metrics.pso.lock().await; + if let Some(p) = &*pso { + p.add_activation(); + } + Ok(HttpResponseUpdatedNoContent {}) } diff --git a/server/src/main.rs b/server/src/main.rs index 6783eb1f1..d3ee9ed95 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -82,10 +82,15 @@ async fn main() -> anyhow::Result<()> { let vnc_server = setup_vnc(&log, vnc_addr); let vnc_server_hdl = vnc_server.clone(); - let context = - server::Context::new(config, vnc_server, log.new(slog::o!())); + let context = server::Context::new( + config, + vnc_server, + log.new(slog::o!()), + propolis_addr, + ); info!(log, "Starting server..."); + let server = HttpServerStarter::new( &config_dropshot, server::api(), diff --git a/server/tests/integration_tests.rs b/server/tests/integration_tests.rs index 4ff7313f0..939e912bd 100644 --- a/server/tests/integration_tests.rs +++ b/server/tests/integration_tests.rs @@ -71,7 +71,13 @@ async fn initialize_server(log: &Logger) -> HttpServer { block_devices, vec![], ); - let context = server::Context::new(config, vnc_server, log.new(slog::o!())); + let p_ip = SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), 0); + let context = server::Context::new( + config, + vnc_server, + log.new(slog::o!()), + p_ip, + ); let config_dropshot = ConfigDropshot { bind_address: "127.0.0.1:0".parse().unwrap(), @@ -125,6 +131,7 @@ mod illumos_integration_tests { nics: vec![], migrate: None, cloud_init_bytes: None, + metrics: false, } } From 56b6352787e03b5d5c757a9a9844d286a420f96a Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Thu, 23 Jun 2022 19:48:40 +0000 Subject: [PATCH 02/15] cargo fmt fixes --- server/src/lib/server.rs | 28 +++++++++++++--------------- server/tests/integration_tests.rs | 8 ++------ 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/server/src/lib/server.rs b/server/src/lib/server.rs index 80e61f425..65d72b97a 100644 --- a/server/src/lib/server.rs +++ b/server/src/lib/server.rs @@ -9,9 +9,9 @@ use futures::future::Fuse; use futures::{FutureExt, SinkExt, StreamExt}; use hyper::upgrade::{self, Upgraded}; use hyper::{header, Body, Response, StatusCode}; +use oximeter::types::ProducerRegistry; use propolis::hw::qemu::ramfb::RamFb; use rfb::server::VncServer; -use oximeter::types::ProducerRegistry; use slog::{error, info, o, Logger}; use std::borrow::Cow; use std::io::{Error, ErrorKind}; @@ -90,7 +90,6 @@ pub(crate) struct InstanceContext { serial_task: Option, } - #[derive(Debug, Clone)] pub struct InstanceMetrics { pub(crate) producer_registry: Arc>>, @@ -117,7 +116,6 @@ impl Context { log: Logger, propolis_addr: SocketAddr, ) -> Self { - let instance_metrics = InstanceMetrics { producer_registry: Arc::new(Mutex::new(None)), pso: Arc::new(Mutex::new(None)), @@ -259,8 +257,7 @@ async fn instance_ensure( if request.metrics { let prop_count_stat = PropCountStat::new(properties.id.clone()); let pso = PropStatOuter { - prop_stat_wrap: - Arc::new(std::sync::Mutex::new(prop_count_stat)) + prop_stat_wrap: Arc::new(std::sync::Mutex::new(prop_count_stat)), }; let mut lpso = server_context.instance_metrics.pso.lock().await; assert!(lpso.is_none()); @@ -274,23 +271,24 @@ async fn instance_ensure( match prop_oximeter( properties.id.clone(), listen_addr, - rqctx.log.clone() - ).await { + rqctx.log.clone(), + ) + .await + { Err(e) => { error!(rqctx.log, "Failed to register with Oximeter {:?}", e); - }, + } Ok(server) => { info!( rqctx.log, - "registering metrics with instance uuid: {}", - properties.id, + "registering metrics with instance uuid: {}", properties.id, ); server.registry().register_producer(pso.clone()).unwrap(); - let mut producer_registry = - server_context. - instance_metrics. - producer_registry. - lock().await; + let mut producer_registry = server_context + .instance_metrics + .producer_registry + .lock() + .await; assert!(producer_registry.is_none()); *producer_registry = Some(server.registry().clone()); drop(producer_registry); diff --git a/server/tests/integration_tests.rs b/server/tests/integration_tests.rs index 939e912bd..4756ba81f 100644 --- a/server/tests/integration_tests.rs +++ b/server/tests/integration_tests.rs @@ -72,12 +72,8 @@ async fn initialize_server(log: &Logger) -> HttpServer { vec![], ); let p_ip = SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), 0); - let context = server::Context::new( - config, - vnc_server, - log.new(slog::o!()), - p_ip, - ); + let context = + server::Context::new(config, vnc_server, log.new(slog::o!()), p_ip); let config_dropshot = ConfigDropshot { bind_address: "127.0.0.1:0".parse().unwrap(), From d131552a889685f22bebe51ca6c55a1ec35031c4 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Fri, 24 Jun 2022 00:06:59 +0000 Subject: [PATCH 03/15] Make oximeter optional for standalone --- propolis/Cargo.toml | 2 +- server/Cargo.toml | 2 +- standalone/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/propolis/Cargo.toml b/propolis/Cargo.toml index b2515e5c8..d27d6767e 100644 --- a/propolis/Cargo.toml +++ b/propolis/Cargo.toml @@ -22,7 +22,7 @@ usdt = { version = "0.3.2", default-features = false } tokio = { version = "1", features = ["full"] } futures = "0.3" crucible = { git = "https://github.com/oxidecomputer/crucible", rev = "8314eeddd228ec0d76cefa40c4a41d3e2611ac18", optional = true } -oximeter = { git = "https://github.com/oxidecomputer/omicron", branch = "main" } +oximeter = { git = "https://github.com/oxidecomputer/omicron", branch = "main", optional = true } anyhow = "1" slog = "2.7" serde = { version = "1" } diff --git a/server/Cargo.toml b/server/Cargo.toml index 3035b1715..889143fdf 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -44,7 +44,7 @@ serde = "1.0" serde_derive = "1.0" serde_json = "1.0" slog = "2.7" -propolis = { path = "../propolis", features = ["crucible"], default-features = false } +propolis = { path = "../propolis", features = ["crucible", "oximeter"], default-features = false } propolis-client = { path = "../client" } rfb = { git = "https://github.com/oxidecomputer/rfb", branch = "main" } uuid = "1.0.0" diff --git a/standalone/Cargo.toml b/standalone/Cargo.toml index d84dd1905..1d9075e44 100644 --- a/standalone/Cargo.toml +++ b/standalone/Cargo.toml @@ -13,7 +13,7 @@ libc = "0.2" toml = "0.5" serde = "1.0" serde_derive = "1.0" -propolis = { path = "../propolis", features = ["crucible"], default-features = false } +propolis = { path = "../propolis", features = ["crucible", "oximeter"], default-features = false } erased-serde = "0.3" serde_json = "1.0" slog = "2.7" From 9adebeba9ae86a8c44c16b490b6c07f58d66e505 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 27 Jun 2022 16:02:53 +0000 Subject: [PATCH 04/15] Include stats.rs file for propolis-server --- server/src/lib/stats.rs | 162 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 server/src/lib/stats.rs diff --git a/server/src/lib/stats.rs b/server/src/lib/stats.rs new file mode 100644 index 000000000..bb94794c1 --- /dev/null +++ b/server/src/lib/stats.rs @@ -0,0 +1,162 @@ +// Copyright 2022 Oxide Computer Company +use anyhow::anyhow; +use dropshot::{ConfigDropshot, ConfigLogging, ConfigLoggingLevel}; +use omicron_common::api::internal::nexus::ProducerEndpoint; +use oximeter::{ + types::{Cumulative, Sample}, + Metric, MetricsError, Producer, Target, +}; +use oximeter_producer::{Config, Server}; +use slog::{error, info, Logger}; + +use std::net::IpAddr; +use std::net::SocketAddr; +use std::str::FromStr; +use std::sync::{Arc, Mutex}; +use uuid::Uuid; + +// These structs are used to construct the desired stats for Oximeter. +#[derive(Debug, Copy, Clone, Target)] +struct InstanceUuid { + pub uuid: Uuid, +} +#[derive(Debug, Default, Copy, Clone, Metric)] +pub struct Rebooted { + /// Count of times instance was rebooted + #[datum] + pub count: Cumulative, +} + +// All the counter stats in one struct. +#[derive(Clone, Debug)] +pub struct PropCountStat { + stat_name: InstanceUuid, + run_count: Rebooted, +} + +impl PropCountStat { + pub fn new(uuid: Uuid) -> Self { + PropCountStat { + stat_name: InstanceUuid { uuid: uuid }, + run_count: Default::default(), + } + } + pub fn uuid(&self) -> Uuid { + return self.stat_name.uuid; + } +} + +// This struct wraps the stat struct in an Arc/Mutex so the worker tasks can +// share it with the producer trait. +#[derive(Clone, Debug)] +pub struct PropStatOuter { + pub prop_stat_wrap: Arc>, +} + +impl PropStatOuter { + // When an operation happens that we wish to record in Oximeter, + // one of these methods will be called. Each method will get the + // correct field of PropCountStat to record the update. + pub fn add_activation(&self) { + let mut pso = self.prop_stat_wrap.lock().unwrap(); + let datum = pso.run_count.datum_mut(); + *datum += 1; + } +} + +// This trait is what is called to update the data to send to Oximeter. +// It is called on whatever interval was specified when setting up the +// connection to Oximeter. Since we get a lock in here (and on every +// IO, don't call this too frequently, for some value of frequently that +// I'm not sure of. +impl Producer for PropStatOuter { + fn produce( + &mut self, + ) -> Result + 'static>, MetricsError> { + let pso = self.prop_stat_wrap.lock().unwrap(); + + let mut data = Vec::with_capacity(1); + let name = pso.stat_name; + + data.push(Sample::new(&name, &pso.run_count)); + + // Yield the available samples. + Ok(Box::new(data.into_iter())) + } +} + +/// Setup Oximeter +/// This starts a dropshot server, and then registers the PropStatOuter +/// producer with Oximeter. +/// Once registered, we return the server to the caller. +pub async fn prop_oximeter( + id: Uuid, + my_address: SocketAddr, + plog: Logger, +) -> anyhow::Result { + // XXX Replace with proper oximeter registration + let ra = IpAddr::from_str("fd00:1122:3344:101::3").unwrap(); + let registration_address = SocketAddr::new(ra, 12221); + + info!( + plog, + "Attempt to register {:?} with Nexus/Oximeter at {:?}", + my_address, + registration_address + ); + + let dropshot_config = ConfigDropshot { + bind_address: my_address, + request_body_max_bytes: 2048, + tls: None, + }; + + // XXX change this level + let logging_config = + ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Error }; + let log = logging_config + .to_logger("propolis-metric-server") + .map_err(|error| anyhow!("failed to create logger: {}", error))?; + + let server_info = ProducerEndpoint { + id, + address: my_address, + base_route: "/collect".to_string(), + interval: tokio::time::Duration::from_secs(30), + }; + + let config = Config { + server_info, + registration_address, + dropshot_config, + logging_config, + }; + + // If the server is not responding when propolis starts, keep trying. + // XXX What is the retry idea here? If we cant start the server and + // register with Oximeter, what do we do? + let mut retry_print_timeout = 0; + loop { + let server = Server::start(&config).await; + match server { + Ok(server) => { + info!( + log, + "connected {:?} to oximeter {:?}", + my_address, + registration_address + ); + return Ok(server); + } + Err(e) => { + if retry_print_timeout == 0 { + error!(log, "Can't connect to oximeter server:\n{}", e); + retry_print_timeout = 1; + } + // Retry every 10 seconds, but only print once a minute + retry_print_timeout = (retry_print_timeout + 1) % 7; + tokio::time::sleep(tokio::time::Duration::from_secs(10)).await; + } + } + } +} From 5e9550d08293d2323e01a538f65464fb08b34264 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 27 Jun 2022 23:07:05 +0000 Subject: [PATCH 05/15] Point to crucible metric branch --- Cargo.toml | 4 ---- client/Cargo.toml | 2 +- propolis/Cargo.toml | 2 +- standalone/Cargo.toml | 1 + 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 87057993b..d4708b81c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,3 @@ exclude = [ panic = "abort" [profile.release] panic = "abort" - -# Development -[patch."https://github.com/oxidecomputer/crucible"] -crucible = { path = "../stock/upstairs" } diff --git a/client/Cargo.toml b/client/Cargo.toml index 4ca4030dd..f727b4a6b 100644 --- a/client/Cargo.toml +++ b/client/Cargo.toml @@ -15,4 +15,4 @@ serde_json = "1.0" slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_debug" ] } thiserror = "1.0" uuid = { version = "1.0.0", features = [ "serde", "v4" ] } -crucible = { git = "https://github.com/oxidecomputer/crucible", rev = "fed3e8ca7762130ee146fc516a4ef6eed2b91629" } +crucible = { git = "https://github.com/oxidecomputer/crucible", branch = "producemetric" } diff --git a/propolis/Cargo.toml b/propolis/Cargo.toml index b86f094c3..7955b5c1c 100644 --- a/propolis/Cargo.toml +++ b/propolis/Cargo.toml @@ -21,7 +21,7 @@ viona_api = { path = "../viona-api" } usdt = { version = "0.3.2", default-features = false } tokio = { version = "1", features = ["full"] } futures = "0.3" -crucible = { git = "https://github.com/oxidecomputer/crucible", rev = "fed3e8ca7762130ee146fc516a4ef6eed2b91629", optional = true } +crucible = { git = "https://github.com/oxidecomputer/crucible", branch = "producemetric", optional = true } oximeter = { git = "https://github.com/oxidecomputer/omicron", branch = "main", optional = true } anyhow = "1" slog = "2.7" diff --git a/standalone/Cargo.toml b/standalone/Cargo.toml index 3f328d214..815c3f271 100644 --- a/standalone/Cargo.toml +++ b/standalone/Cargo.toml @@ -24,3 +24,4 @@ slog-term = "2.7" default = ["dtrace-probes"] dtrace-probes = ["propolis/dtrace-probes"] crucible = ["propolis/crucible"] +oximeter = ["propolis/oximeter"] From 62641b4a4bc31b300d3565e907522c58033a708e Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 27 Jun 2022 23:18:18 +0000 Subject: [PATCH 06/15] Polishing before PR --- cli/src/main.rs | 1 + server/src/lib/server.rs | 1 - server/src/lib/stats.rs | 6 +----- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 8f09d3edc..a56af2268 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -145,6 +145,7 @@ fn create_logger(opt: &Opt) -> Logger { Logger::root(drain, o!()) } +#[allow(clippy::too_many_arguments)] async fn new_instance( client: &Client, name: String, diff --git a/server/src/lib/server.rs b/server/src/lib/server.rs index ada8b2a7d..2afa58ab4 100644 --- a/server/src/lib/server.rs +++ b/server/src/lib/server.rs @@ -273,7 +273,6 @@ async fn instance_ensure( // This is the address where stats will be collected. let la = server_context.propolis_addr.ip(); let listen_addr = SocketAddr::new(la, 0); - // XXX if registration fails here, do we give up forever? match prop_oximeter( properties.id.clone(), listen_addr, diff --git a/server/src/lib/stats.rs b/server/src/lib/stats.rs index bb94794c1..75506af2f 100644 --- a/server/src/lib/stats.rs +++ b/server/src/lib/stats.rs @@ -94,7 +94,7 @@ pub async fn prop_oximeter( my_address: SocketAddr, plog: Logger, ) -> anyhow::Result { - // XXX Replace with proper oximeter registration + // TODO: Replace with proper oximeter registration let ra = IpAddr::from_str("fd00:1122:3344:101::3").unwrap(); let registration_address = SocketAddr::new(ra, 12221); @@ -111,7 +111,6 @@ pub async fn prop_oximeter( tls: None, }; - // XXX change this level let logging_config = ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Error }; let log = logging_config @@ -132,9 +131,6 @@ pub async fn prop_oximeter( logging_config, }; - // If the server is not responding when propolis starts, keep trying. - // XXX What is the retry idea here? If we cant start the server and - // register with Oximeter, what do we do? let mut retry_print_timeout = 0; loop { let server = Server::start(&config).await; From 0bdf5f5aa043a5c9083dded9f547410ca8c7d15e Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Tue, 28 Jun 2022 03:25:35 +0000 Subject: [PATCH 07/15] Just count resets --- server/src/lib/server.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/server/src/lib/server.rs b/server/src/lib/server.rs index 2afa58ab4..d1a400f91 100644 --- a/server/src/lib/server.rs +++ b/server/src/lib/server.rs @@ -737,8 +737,11 @@ async fn instance_state_put( let server_context = rqctx.context(); let pso = server_context.instance_metrics.pso.lock().await; - if let Some(p) = &*pso { - p.add_activation(); + + if state == propolis::instance::ReqState::Reset { + if let Some(p) = &*pso { + p.add_activation(); + } } Ok(HttpResponseUpdatedNoContent {}) From 3c411acedc1d4695fed081303321a57c51036687 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Wed, 29 Jun 2022 18:10:34 +0000 Subject: [PATCH 08/15] More comments --- server/src/lib/stats.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/src/lib/stats.rs b/server/src/lib/stats.rs index 75506af2f..3722024fe 100644 --- a/server/src/lib/stats.rs +++ b/server/src/lib/stats.rs @@ -28,6 +28,11 @@ pub struct Rebooted { } // All the counter stats in one struct. +// To create additional stats that Oximeter will collect, add fields to this +// structure. See Oximeter for details, but each fields should be +// constructed similar to "run_count". In addition, a new method should +// be added to the PropStatOuter impl that will be called when the new +// field has changed. #[derive(Clone, Debug)] pub struct PropCountStat { stat_name: InstanceUuid, From 8cedc2c5edc9b3b65b8d1e7e23c27599d4298e0e Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Thu, 30 Jun 2022 02:58:59 +0000 Subject: [PATCH 09/15] Optional metric registration comes from Nexus --- cli/src/main.rs | 8 ++++---- client/src/api.rs | 4 ++-- server/src/lib/server.rs | 4 +++- server/src/lib/stats.rs | 9 +++------ server/tests/integration_tests.rs | 2 +- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index a56af2268..401c98d29 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -70,9 +70,9 @@ enum Command { #[clap(long, action)] cloud_init: Option, - /// Enable metrics with Oximeter + /// Register address to enable metrics with Oximeter #[clap(long, action)] - metrics: bool, + metrics: Option, }, /// Get the properties of a propolis instance @@ -154,7 +154,7 @@ async fn new_instance( memory: u64, disks: Vec, cloud_init_bytes: Option, - metrics: bool, + metrics: Option, ) -> anyhow::Result<()> { let properties = InstanceProperties { id, @@ -412,7 +412,7 @@ async fn migrate_instance( src_uuid, }), cloud_init_bytes: None, - metrics: false, + metrics: None, }; // Get the source instance ready diff --git a/client/src/api.rs b/client/src/api.rs index 48d12dd47..a693107a7 100644 --- a/client/src/api.rs +++ b/client/src/api.rs @@ -34,8 +34,8 @@ pub struct InstanceEnsureRequest { // base64 encoded cloud-init ISO pub cloud_init_bytes: Option, - // If the instance should post metrics to Oximeter - pub metrics: bool, + // If provided, register here to post metrics to Oximeter + pub metrics: Option, } #[derive(Clone, Deserialize, Serialize, JsonSchema)] diff --git a/server/src/lib/server.rs b/server/src/lib/server.rs index d1a400f91..097243193 100644 --- a/server/src/lib/server.rs +++ b/server/src/lib/server.rs @@ -260,7 +260,7 @@ async fn instance_ensure( } // Determine if we need to setup the metrics endpoint or not. - if request.metrics { + if request.metrics.is_some() { let prop_count_stat = PropCountStat::new(properties.id.clone()); let pso = PropStatOuter { prop_stat_wrap: Arc::new(std::sync::Mutex::new(prop_count_stat)), @@ -273,9 +273,11 @@ async fn instance_ensure( // This is the address where stats will be collected. let la = server_context.propolis_addr.ip(); let listen_addr = SocketAddr::new(la, 0); + let register_addr = request.metrics.unwrap(); match prop_oximeter( properties.id.clone(), listen_addr, + register_addr, rqctx.log.clone(), ) .await diff --git a/server/src/lib/stats.rs b/server/src/lib/stats.rs index 3722024fe..2d9f084e9 100644 --- a/server/src/lib/stats.rs +++ b/server/src/lib/stats.rs @@ -9,9 +9,7 @@ use oximeter::{ use oximeter_producer::{Config, Server}; use slog::{error, info, Logger}; -use std::net::IpAddr; use std::net::SocketAddr; -use std::str::FromStr; use std::sync::{Arc, Mutex}; use uuid::Uuid; @@ -94,15 +92,14 @@ impl Producer for PropStatOuter { /// This starts a dropshot server, and then registers the PropStatOuter /// producer with Oximeter. /// Once registered, we return the server to the caller. +/// By returning the server to the caller, we allow the ProducerRegister +/// to be cloned and passed to any library that wishes to record metrics. pub async fn prop_oximeter( id: Uuid, my_address: SocketAddr, + registration_address: SocketAddr, plog: Logger, ) -> anyhow::Result { - // TODO: Replace with proper oximeter registration - let ra = IpAddr::from_str("fd00:1122:3344:101::3").unwrap(); - let registration_address = SocketAddr::new(ra, 12221); - info!( plog, "Attempt to register {:?} with Nexus/Oximeter at {:?}", diff --git a/server/tests/integration_tests.rs b/server/tests/integration_tests.rs index 4756ba81f..04e74c04d 100644 --- a/server/tests/integration_tests.rs +++ b/server/tests/integration_tests.rs @@ -127,7 +127,7 @@ mod illumos_integration_tests { nics: vec![], migrate: None, cloud_init_bytes: None, - metrics: false, + metrics: None, } } From 395d4db3495b847ba2e43aeff92b44f1ec75bbb9 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Fri, 1 Jul 2022 00:45:36 +0000 Subject: [PATCH 10/15] PR comments addressed --- server/src/lib/server.rs | 2 +- server/src/lib/stats.rs | 30 +++++++++++++++--------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/server/src/lib/server.rs b/server/src/lib/server.rs index a6b5e6030..2c065168c 100644 --- a/server/src/lib/server.rs +++ b/server/src/lib/server.rs @@ -746,7 +746,7 @@ async fn instance_state_put( if state == propolis::instance::ReqState::Reset { if let Some(p) = &*pso { - p.add_activation(); + p.count_reset(); } } diff --git a/server/src/lib/stats.rs b/server/src/lib/stats.rs index 2d9f084e9..3c1e495d8 100644 --- a/server/src/lib/stats.rs +++ b/server/src/lib/stats.rs @@ -13,28 +13,31 @@ use std::net::SocketAddr; use std::sync::{Arc, Mutex}; use uuid::Uuid; -// These structs are used to construct the desired stats for Oximeter. +// How frequently Oximeter will collect metrics. +const OXIMETER_STAT_INTERVAL: u64 = 30; + +// These structs are used to construct the desired metrics for Oximeter. #[derive(Debug, Copy, Clone, Target)] struct InstanceUuid { pub uuid: Uuid, } #[derive(Debug, Default, Copy, Clone, Metric)] -pub struct Rebooted { +pub struct Reset { /// Count of times instance was rebooted #[datum] pub count: Cumulative, } -// All the counter stats in one struct. -// To create additional stats that Oximeter will collect, add fields to this +// All the counter metrics in one struct. +// To create additional metrics that Oximeter will collect, add fields to this // structure. See Oximeter for details, but each fields should be -// constructed similar to "run_count". In addition, a new method should -// be added to the PropStatOuter impl that will be called when the new -// field has changed. +// constructed similar to "run_count". A new method should be added to the +// PropStatOuter impl that will be called when the new field has changed. +// The produce method should be updated as well. #[derive(Clone, Debug)] pub struct PropCountStat { stat_name: InstanceUuid, - run_count: Rebooted, + run_count: Reset, } impl PropCountStat { @@ -60,18 +63,15 @@ impl PropStatOuter { // When an operation happens that we wish to record in Oximeter, // one of these methods will be called. Each method will get the // correct field of PropCountStat to record the update. - pub fn add_activation(&self) { + pub fn count_reset(&self) { let mut pso = self.prop_stat_wrap.lock().unwrap(); let datum = pso.run_count.datum_mut(); *datum += 1; } } -// This trait is what is called to update the data to send to Oximeter. -// It is called on whatever interval was specified when setting up the -// connection to Oximeter. Since we get a lock in here (and on every -// IO, don't call this too frequently, for some value of frequently that -// I'm not sure of. +// This trait is what is called to update the data that is collected +// by Oximeter every OXIMETER_STAT_INTERVAL seconds. impl Producer for PropStatOuter { fn produce( &mut self, @@ -123,7 +123,7 @@ pub async fn prop_oximeter( id, address: my_address, base_route: "/collect".to_string(), - interval: tokio::time::Duration::from_secs(30), + interval: tokio::time::Duration::from_secs(OXIMETER_STAT_INTERVAL), }; let config = Config { From 6630d543f5ffdac01e799e2a06d6b40732721751 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Fri, 1 Jul 2022 20:22:28 +0000 Subject: [PATCH 11/15] propolis-server decides if there will be metrics Control of metric server is now determined at propolis-server start time, though the actual metrics endpoint won't start until the instance itself is started. An optional metric_addr arg if provided will indicate that a metrics endpoint will later be started. Remove metric field from InstanceProperties Updated smf manifest to support metric_addr config option. --- cli/src/main.rs | 9 --------- client/src/api.rs | 3 --- server/src/lib/server.rs | 29 +++++++++++++++++++++-------- server/src/main.rs | 20 ++++++++++++++++++-- server/tests/integration_tests.rs | 4 +--- smf/propolis-server/manifest.xml | 6 +++++- 6 files changed, 45 insertions(+), 26 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 401c98d29..b62b3f899 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -69,10 +69,6 @@ enum Command { // cloud_init ISO file #[clap(long, action)] cloud_init: Option, - - /// Register address to enable metrics with Oximeter - #[clap(long, action)] - metrics: Option, }, /// Get the properties of a propolis instance @@ -154,7 +150,6 @@ async fn new_instance( memory: u64, disks: Vec, cloud_init_bytes: Option, - metrics: Option, ) -> anyhow::Result<()> { let properties = InstanceProperties { id, @@ -175,7 +170,6 @@ async fn new_instance( disks, migrate: None, cloud_init_bytes, - metrics, }; // Try to create the instance @@ -412,7 +406,6 @@ async fn migrate_instance( src_uuid, }), cloud_init_bytes: None, - metrics: None, }; // Get the source instance ready @@ -483,7 +476,6 @@ async fn main() -> anyhow::Result<()> { memory, crucible_disks, cloud_init, - metrics, } => { let disks = if let Some(crucible_disks) = crucible_disks { parse_json_file(&crucible_disks)? @@ -503,7 +495,6 @@ async fn main() -> anyhow::Result<()> { memory, disks, cloud_init_bytes, - metrics, ) .await? } diff --git a/client/src/api.rs b/client/src/api.rs index a693107a7..fccc688b0 100644 --- a/client/src/api.rs +++ b/client/src/api.rs @@ -33,9 +33,6 @@ pub struct InstanceEnsureRequest { // base64 encoded cloud-init ISO pub cloud_init_bytes: Option, - - // If provided, register here to post metrics to Oximeter - pub metrics: Option, } #[derive(Clone, Deserialize, Serialize, JsonSchema)] diff --git a/server/src/lib/server.rs b/server/src/lib/server.rs index 2c065168c..81d6543a8 100644 --- a/server/src/lib/server.rs +++ b/server/src/lib/server.rs @@ -102,6 +102,17 @@ pub struct InstanceMetrics { pub(crate) pso: Arc>>, } +#[derive(Debug, Clone)] +pub struct InstanceMetricsConfig { + pub propolis_addr: SocketAddr, + pub metric_addr: SocketAddr, +} +impl InstanceMetricsConfig { + pub fn new(propolis_addr: SocketAddr, metric_addr: SocketAddr) -> Self { + InstanceMetricsConfig { propolis_addr, metric_addr } + } +} + /// Contextual information accessible from HTTP callbacks. pub struct Context { pub(crate) context: Mutex>, @@ -110,8 +121,8 @@ pub struct Context { log: Logger, pub(crate) vnc_server: Arc>>, pub(crate) use_reservoir: bool, - // To register with Oximeter, we need to know our own address. - pub(crate) propolis_addr: SocketAddr, + // To register with Oximeter. + pub(crate) metric_config: Option, pub instance_metrics: InstanceMetrics, } @@ -122,7 +133,7 @@ impl Context { vnc_server: VncServer, use_reservoir: bool, log: Logger, - propolis_addr: SocketAddr, + metric_config: Option, ) -> Self { let instance_metrics = InstanceMetrics { producer_registry: Arc::new(Mutex::new(None)), @@ -135,7 +146,7 @@ impl Context { log, vnc_server: Arc::new(Mutex::new(vnc_server)), use_reservoir, - propolis_addr, + metric_config, instance_metrics, } } @@ -263,7 +274,7 @@ async fn instance_ensure( } // Determine if we need to setup the metrics endpoint or not. - if request.metrics.is_some() { + if server_context.metric_config.is_some() { let prop_count_stat = PropCountStat::new(properties.id.clone()); let pso = PropStatOuter { prop_stat_wrap: Arc::new(std::sync::Mutex::new(prop_count_stat)), @@ -274,9 +285,11 @@ async fn instance_ensure( drop(lpso); // This is the address where stats will be collected. - let la = server_context.propolis_addr.ip(); - let listen_addr = SocketAddr::new(la, 0); - let register_addr = request.metrics.unwrap(); + let propolis_addr = + server_context.metric_config.as_ref().unwrap().propolis_addr.ip(); + let listen_addr = SocketAddr::new(propolis_addr, 0); + let register_addr = + server_context.metric_config.as_ref().unwrap().metric_addr; match prop_oximeter( properties.id.clone(), listen_addr, diff --git a/server/src/main.rs b/server/src/main.rs index b6d24ed68..df6c156c6 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -15,6 +15,7 @@ use slog::info; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use std::path::PathBuf; +use propolis_server::server::InstanceMetricsConfig; use propolis_server::vnc::setup_vnc; use propolis_server::{config, server}; @@ -32,6 +33,10 @@ enum Args { #[clap(name = "PROPOLIS_IP:PORT", action)] propolis_addr: SocketAddr, + /// IP:Port for the Oximeter register address, which is Nexus. + #[clap(long, action)] + metric_addr: Option, + #[clap( name = "VNC_IP:PORT", default_value_t = SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), 5900), @@ -62,7 +67,7 @@ async fn main() -> anyhow::Result<()> { match args { Args::OpenApi => run_openapi() .map_err(|e| anyhow!("Cannot generate OpenAPI spec: {}", e)), - Args::Run { cfg, propolis_addr, vnc_addr } => { + Args::Run { cfg, propolis_addr, metric_addr, vnc_addr } => { let config = config::parse(&cfg)?; // Dropshot configuration. @@ -82,12 +87,23 @@ async fn main() -> anyhow::Result<()> { let vnc_server_hdl = vnc_server.clone(); let use_reservoir = config::reservoir_decide(&log); + let metric_config = if metric_addr.is_some() { + let imc = InstanceMetricsConfig::new( + propolis_addr, + metric_addr.unwrap(), + ); + info!(log, "Metrics server will use {:?}", imc); + Some(imc) + } else { + None + }; + let context = server::Context::new( config, vnc_server, use_reservoir, log.new(slog::o!()), - propolis_addr, + metric_config, ); info!(log, "Starting server..."); diff --git a/server/tests/integration_tests.rs b/server/tests/integration_tests.rs index 8a2c192d3..afd66654d 100644 --- a/server/tests/integration_tests.rs +++ b/server/tests/integration_tests.rs @@ -72,14 +72,13 @@ async fn initialize_server(log: &Logger) -> HttpServer { vec![], ); - let p_ip = SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), 0); let use_reservoir = propolis_server::config::reservoir_decide(log); let context = server::Context::new( config, vnc_server, use_reservoir, log.new(slog::o!()), - p_ip, + None, ); let config_dropshot = ConfigDropshot { @@ -134,7 +133,6 @@ mod illumos_integration_tests { nics: vec![], migrate: None, cloud_init_bytes: None, - metrics: None, } } diff --git a/smf/propolis-server/manifest.xml b/smf/propolis-server/manifest.xml index 22b5c4e8b..5f14eacf8 100644 --- a/smf/propolis-server/manifest.xml +++ b/smf/propolis-server/manifest.xml @@ -15,7 +15,7 @@ @@ -23,6 +23,10 @@ + + + + From bc3f45e52a014f9825e71faaaaea1da7e0c52092 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Sat, 2 Jul 2022 00:44:10 +0000 Subject: [PATCH 12/15] Remove some Arc Mutex layers. No need to wrap things in Arc Mutex, and I don't have to keep track of producer_register outside of instance_ensure(). --- propolis/src/block/crucible.rs | 4 +-- server/src/lib/initializer.rs | 2 +- server/src/lib/server.rs | 60 ++++++++++++++++------------------ 3 files changed, 32 insertions(+), 34 deletions(-) diff --git a/propolis/src/block/crucible.rs b/propolis/src/block/crucible.rs index 9aae01286..15731e8d5 100644 --- a/propolis/src/block/crucible.rs +++ b/propolis/src/block/crucible.rs @@ -36,7 +36,7 @@ impl CrucibleBackend { gen: u64, request: VolumeConstructionRequest, read_only: bool, - producer_registry: Arc>>, + producer_registry: Option, ) -> Result> { CrucibleBackend::_create(gen, request, read_only, producer_registry) .map_err(map_crucible_error_to_io) @@ -46,7 +46,7 @@ impl CrucibleBackend { gen: u64, request: VolumeConstructionRequest, read_only: bool, - producer_registry: Arc>>, + producer_registry: Option, ) -> anyhow::Result, crucible::CrucibleError> { // XXX Crucible uses std::sync::mpsc::Receiver, not // tokio::sync::mpsc::Receiver, so use tokio::task::block_in_place here. diff --git a/server/src/lib/initializer.rs b/server/src/lib/initializer.rs index 0e636af7f..73b33eae9 100644 --- a/server/src/lib/initializer.rs +++ b/server/src/lib/initializer.rs @@ -291,7 +291,7 @@ impl<'a> MachineInitializer<'a> { chipset: &RegisteredChipset, disk: &propolis_client::api::DiskRequest, bdf: pci::Bdf, - producer_registry: Arc>>, + producer_registry: Option, ) -> Result, Error> { info!(self.log, "Creating Crucible disk from {:#?}", disk); let be = propolis::block::CrucibleBackend::create( diff --git a/server/src/lib/server.rs b/server/src/lib/server.rs index 81d6543a8..b04c696a6 100644 --- a/server/src/lib/server.rs +++ b/server/src/lib/server.rs @@ -10,7 +10,6 @@ use futures::stream::{SplitSink, SplitStream}; use futures::{FutureExt, SinkExt, StreamExt}; use hyper::upgrade::{self, Upgraded}; use hyper::{header, Body, Response, StatusCode}; -use oximeter::types::ProducerRegistry; use propolis::hw::qemu::ramfb::RamFb; use rfb::server::VncServer; use slog::{error, info, o, Logger}; @@ -96,12 +95,6 @@ pub(crate) struct InstanceContext { Mutex>>, } -#[derive(Debug, Clone)] -pub struct InstanceMetrics { - pub(crate) producer_registry: Arc>>, - pub(crate) pso: Arc>>, -} - #[derive(Debug, Clone)] pub struct InstanceMetricsConfig { pub propolis_addr: SocketAddr, @@ -123,7 +116,7 @@ pub struct Context { pub(crate) use_reservoir: bool, // To register with Oximeter. pub(crate) metric_config: Option, - pub instance_metrics: InstanceMetrics, + pub instance_metrics: Mutex>, } impl Context { @@ -135,10 +128,6 @@ impl Context { log: Logger, metric_config: Option, ) -> Self { - let instance_metrics = InstanceMetrics { - producer_registry: Arc::new(Mutex::new(None)), - pso: Arc::new(Mutex::new(None)), - }; Context { context: Mutex::new(None), migrate_task: Mutex::new(None), @@ -147,7 +136,7 @@ impl Context { vnc_server: Arc::new(Mutex::new(vnc_server)), use_reservoir, metric_config, - instance_metrics, + instance_metrics: Mutex::new(None), } } } @@ -273,16 +262,18 @@ async fn instance_ensure( })); } + // If anyone outside Propolis wishes to register for metrics, this + // will hold the producer registry they can use. + let mut producer_registry = None; + // Determine if we need to setup the metrics endpoint or not. + // If we do, we will then populate producer_registry with something. if server_context.metric_config.is_some() { + // Create some propolis level metrics. let prop_count_stat = PropCountStat::new(properties.id.clone()); let pso = PropStatOuter { prop_stat_wrap: Arc::new(std::sync::Mutex::new(prop_count_stat)), }; - let mut lpso = server_context.instance_metrics.pso.lock().await; - assert!(lpso.is_none()); - *lpso = Some(pso.clone()); - drop(lpso); // This is the address where stats will be collected. let propolis_addr = @@ -290,6 +281,7 @@ async fn instance_ensure( let listen_addr = SocketAddr::new(propolis_addr, 0); let register_addr = server_context.metric_config.as_ref().unwrap().metric_addr; + match prop_oximeter( properties.id.clone(), listen_addr, @@ -306,15 +298,21 @@ async fn instance_ensure( rqctx.log, "registering metrics with instance uuid: {}", properties.id, ); + // Register the propolis level instance metrics. server.registry().register_producer(pso.clone()).unwrap(); - let mut producer_registry = server_context - .instance_metrics - .producer_registry - .lock() - .await; - assert!(producer_registry.is_none()); - *producer_registry = Some(server.registry().clone()); - drop(producer_registry); + + // Now that our metrics are registered, attach them to + // the server context so they can be updated. + let mut im = server_context.instance_metrics.lock().await; + *im = Some(pso.clone()); + drop(im); + + // Clone the producer_registry that we can pass to any + // other library that may want to register their own + // metrics. Doing it this way means propolis does not have + // to know what metrics they register. + producer_registry = Some(server.registry().clone()); + // Spawn the metric endpoint. tokio::spawn(async move { server.serve_forever().await.unwrap(); @@ -417,7 +415,7 @@ async fn instance_ensure( &chipset, disk, bdf, - server_context.instance_metrics.producer_registry.clone(), + producer_registry.clone(), )?; info!(rqctx.log, "Disk {} created successfully", disk.name); @@ -754,12 +752,12 @@ async fn instance_state_put( HttpError::for_internal_error(format!("Failed to set state: {:?}", err)) })?; - let server_context = rqctx.context(); - let pso = server_context.instance_metrics.pso.lock().await; - + // Update the metrics counter when we apply a reset if state == propolis::instance::ReqState::Reset { - if let Some(p) = &*pso { - p.count_reset(); + let server_context = rqctx.context(); + let instance_metrics = server_context.instance_metrics.lock().await; + if let Some(im) = &*instance_metrics { + im.count_reset(); } } From cb57337ba2f0b12286cfd70ecb1ea59f4ade4223 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Sat, 2 Jul 2022 01:58:48 +0000 Subject: [PATCH 13/15] Update Crucible rev --- client/Cargo.toml | 2 +- propolis/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/client/Cargo.toml b/client/Cargo.toml index f727b4a6b..bf0b40f15 100644 --- a/client/Cargo.toml +++ b/client/Cargo.toml @@ -15,4 +15,4 @@ serde_json = "1.0" slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_debug" ] } thiserror = "1.0" uuid = { version = "1.0.0", features = [ "serde", "v4" ] } -crucible = { git = "https://github.com/oxidecomputer/crucible", branch = "producemetric" } +crucible = { git = "https://github.com/oxidecomputer/crucible", rev = "2add0de8489f1d4de901bfe98fc28b0a6efcc3ea" } diff --git a/propolis/Cargo.toml b/propolis/Cargo.toml index 7955b5c1c..f86c59969 100644 --- a/propolis/Cargo.toml +++ b/propolis/Cargo.toml @@ -21,7 +21,7 @@ viona_api = { path = "../viona-api" } usdt = { version = "0.3.2", default-features = false } tokio = { version = "1", features = ["full"] } futures = "0.3" -crucible = { git = "https://github.com/oxidecomputer/crucible", branch = "producemetric", optional = true } +crucible = { git = "https://github.com/oxidecomputer/crucible", rev = "2add0de8489f1d4de901bfe98fc28b0a6efcc3ea", optional = true } oximeter = { git = "https://github.com/oxidecomputer/omicron", branch = "main", optional = true } anyhow = "1" slog = "2.7" From 5f4e2808f783f025e1a4fddf32f10a40837df432 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 11 Jul 2022 20:41:57 +0000 Subject: [PATCH 14/15] PR comments --- propolis/Cargo.toml | 12 ++++++++++-- standalone/Cargo.toml | 3 +-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/propolis/Cargo.toml b/propolis/Cargo.toml index f86c59969..34cb6ed35 100644 --- a/propolis/Cargo.toml +++ b/propolis/Cargo.toml @@ -21,8 +21,6 @@ viona_api = { path = "../viona-api" } usdt = { version = "0.3.2", default-features = false } tokio = { version = "1", features = ["full"] } futures = "0.3" -crucible = { git = "https://github.com/oxidecomputer/crucible", rev = "2add0de8489f1d4de901bfe98fc28b0a6efcc3ea", optional = true } -oximeter = { git = "https://github.com/oxidecomputer/omicron", branch = "main", optional = true } anyhow = "1" slog = "2.7" serde = { version = "1" } @@ -31,6 +29,16 @@ erased-serde = "0.3" serde_json = "1.0" uuid = "1.0.0" +[dependencies.crucible] +git = "https://github.com/oxidecomputer/crucible" +rev = "2add0de8489f1d4de901bfe98fc28b0a6efcc3ea" +optional = true + +[dependencies.oximeter] +git = "https://github.com/oxidecomputer/omicron" +branch = "main" +optional = true + [dev-dependencies] crossbeam-channel = "0.5" tempfile = "3.2" diff --git a/standalone/Cargo.toml b/standalone/Cargo.toml index e230400b6..d883acd11 100644 --- a/standalone/Cargo.toml +++ b/standalone/Cargo.toml @@ -26,5 +26,4 @@ slog-term = "2.7" [features] default = ["dtrace-probes"] dtrace-probes = ["propolis/dtrace-probes"] -crucible = ["propolis/crucible"] -oximeter = ["propolis/oximeter"] +crucible = ["propolis/crucible", "propolis/oximeter"] From d67de50005a0129d2e9bdb24fde4c4328fef7649 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 11 Jul 2022 22:30:06 +0000 Subject: [PATCH 15/15] cargo fmt --- server/src/lib/initializer.rs | 10 +++++++++- server/src/lib/server.rs | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/server/src/lib/initializer.rs b/server/src/lib/initializer.rs index 4b22bf828..809eca564 100644 --- a/server/src/lib/initializer.rs +++ b/server/src/lib/initializer.rs @@ -137,7 +137,15 @@ impl<'a> MachineInitializer<'a> { spec: &'a InstanceSpec, producer_registry: Option, ) -> Self { - MachineInitializer { log, machine, mctx, disp, inv, spec, producer_registry } + MachineInitializer { + log, + machine, + mctx, + disp, + inv, + spec, + producer_registry, + } } pub fn initialize_rom>( diff --git a/server/src/lib/server.rs b/server/src/lib/server.rs index 2e172ec5a..613e34a4c 100644 --- a/server/src/lib/server.rs +++ b/server/src/lib/server.rs @@ -32,8 +32,8 @@ use propolis_client::{api, instance_spec}; use crate::config::Config; use crate::initializer::{build_instance, MachineInitializer}; use crate::serial::Serial; -use crate::stats::{prop_oximeter, PropCountStat, PropStatOuter}; use crate::spec::SpecBuilder; +use crate::stats::{prop_oximeter, PropCountStat, PropStatOuter}; use crate::vnc::PropolisVncServer; use crate::{migrate, vnc}; use uuid::Uuid;