From 5394ceb953141e9f9d22160064408d8a9bddc0ab Mon Sep 17 00:00:00 2001 From: Dennis Felsing Date: Sun, 24 May 2026 02:05:55 +0000 Subject: [PATCH] storage-types: redact credentials in ContentSource Debug Use SensitiveUrl for ContentSource::Http and skip the AwsConnection field in Debug so reconcile log lines no longer leak url passwords or inline AWS credentials. Also narrow the storage_state reconcile log to ids only. Carry SensitiveUrl through HttpOneshotSource and HttpObject so the `tracing::info!(?object, "found objects")` line in the HTTP oneshot source also redacts. Move URL-userinfo Basic auth into a Basic auth header before handing the URL to reqwest, so reqwest::Error::Display cannot leak the password on connect/DNS failures. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 1 + .../src/coord/sequencer/inner/copy_from.rs | 4 +- src/storage-operators/Cargo.toml | 1 + .../src/oneshot_source/http_source.rs | 49 ++++++++++++++----- src/storage-types/src/oneshot_sources.rs | 9 ++-- src/storage/src/storage_state.rs | 7 ++- 6 files changed, 54 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 30e85446cec91..8b41b1d27ce65 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8170,6 +8170,7 @@ dependencies = [ "tokio-util", "tracing", "url", + "urlencoding", "uuid", ] diff --git a/src/adapter/src/coord/sequencer/inner/copy_from.rs b/src/adapter/src/coord/sequencer/inner/copy_from.rs index 7e89505be4e41..481afc097d154 100644 --- a/src/adapter/src/coord/sequencer/inner/copy_from.rs +++ b/src/adapter/src/coord/sequencer/inner/copy_from.rs @@ -143,7 +143,9 @@ impl Coordinator { .retire(Err(AdapterError::Unstructured(anyhow::anyhow!("{err}")))); } } - mz_storage_types::oneshot_sources::ContentSource::Http { url } + mz_storage_types::oneshot_sources::ContentSource::Http { + url: mz_ore::url::SensitiveUrl(url), + } } CopyFromSource::AwsS3 { uri, diff --git a/src/storage-operators/Cargo.toml b/src/storage-operators/Cargo.toml index 5abd2e858d35b..7d918ca78cfa1 100644 --- a/src/storage-operators/Cargo.toml +++ b/src/storage-operators/Cargo.toml @@ -56,6 +56,7 @@ tokio-stream.workspace = true tokio-util = { workspace = true, features = ["io"] } tracing.workspace = true url.workspace = true +urlencoding.workspace = true uuid = { workspace = true, features = ["v4"] } [features] diff --git a/src/storage-operators/src/oneshot_source/http_source.rs b/src/storage-operators/src/oneshot_source/http_source.rs index ab2f76ac6a2c5..086efa58eb1ca 100644 --- a/src/storage-operators/src/oneshot_source/http_source.rs +++ b/src/storage-operators/src/oneshot_source/http_source.rs @@ -16,10 +16,10 @@ use bytes::Bytes; use derivative::Derivative; use futures::TryStreamExt; use futures::stream::{BoxStream, StreamExt}; +use mz_ore::url::SensitiveUrl; use reqwest::Client; use reqwest::dns::{Addrs, Name, Resolve, Resolving}; use serde::{Deserialize, Serialize}; -use url::Url; use crate::oneshot_source::util::IntoRangeHeaderValue; use crate::oneshot_source::{ @@ -87,20 +87,49 @@ fn check_not_redirect(response: &reqwest::Response) -> Result<(), StorageErrorX> pub struct HttpOneshotSource { #[derivative(Debug = "ignore")] client: Client, - origin: Url, + origin: SensitiveUrl, } impl HttpOneshotSource { - pub fn new(client: Client, origin: Url) -> Self { + pub fn new(client: Client, origin: SensitiveUrl) -> Self { HttpOneshotSource { client, origin } } } +/// Build a request, transferring any userinfo on `url` to a `Basic` auth header +/// so the password does not surface via `reqwest::Error::Display` (which +/// renders the URL using `Url::Display` and would otherwise leak it on +/// connect/DNS failures). +/// +/// `Url::username` and `Url::password` return values in their +/// percent-encoded form, but `RequestBuilder::basic_auth` base64-encodes its +/// arguments verbatim. Decode here so credentials containing characters that +/// must be percent-encoded in a URL (`@`, spaces, `:`, ...) authenticate +/// correctly. +fn build_request( + client: &Client, + method: reqwest::Method, + url: &SensitiveUrl, +) -> reqwest::RequestBuilder { + let mut url = url.0.clone(); + let user = String::from_utf8_lossy(&urlencoding::decode_binary(url.username().as_bytes())) + .into_owned(); + let pass = url + .password() + .map(|p| String::from_utf8_lossy(&urlencoding::decode_binary(p.as_bytes())).into_owned()); + if user.is_empty() && pass.is_none() { + return client.request(method, url); + } + let _ = url.set_password(None); + let _ = url.set_username(""); + client.request(method, url).basic_auth(user, pass) +} + /// Object returned from an [`HttpOneshotSource`]. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HttpObject { - /// [`Url`] to access the file. - url: Url, + /// [`SensitiveUrl`] to access the file. + url: SensitiveUrl, /// Name of the file. filename: String, /// Size of this file reported by the [`Content-Length`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Length) header @@ -207,9 +236,7 @@ impl OneshotSource for HttpOneshotSource { // To get metadata about a file we'll first try issuing a `HEAD` request, which // canonically is the right thing do. - let response = self - .client - .head(self.origin.clone()) + let response = build_request(&self.client, reqwest::Method::HEAD, &self.origin) .send() .await .context("HEAD request")?; @@ -223,9 +250,7 @@ impl OneshotSource for HttpOneshotSource { Err(err) => { tracing::warn!(status = ?err.status(), "HEAD request failed"); - let response = self - .client - .get(self.origin.clone()) + let response = build_request(&self.client, reqwest::Method::GET, &self.origin) .send() .await .context("GET request")?; @@ -296,7 +321,7 @@ impl OneshotSource for HttpOneshotSource { // TODO(cf1): Validate our checksum. let initial_response = async move { - let mut request = self.client.get(object.url); + let mut request = build_request(&self.client, reqwest::Method::GET, &object.url); if let Some(range) = &range { let value = range.into_range_header_value(); diff --git a/src/storage-types/src/oneshot_sources.rs b/src/storage-types/src/oneshot_sources.rs index 46d699ec81525..379793886f8f1 100644 --- a/src/storage-types/src/oneshot_sources.rs +++ b/src/storage-types/src/oneshot_sources.rs @@ -9,14 +9,15 @@ //! Types for oneshot sources. +use derivative::Derivative; use mz_expr::SafeMfpPlan; +use mz_ore::url::SensitiveUrl; use mz_pgcopy::CopyCsvFormatParams; use mz_repr::{CatalogItemId, RelationDesc}; use mz_timely_util::builder_async::PressOnDropButton; use serde::{Deserialize, Serialize}; use tokio::sync::mpsc::UnboundedReceiver; -use url::Url; use crate::connections::aws::AwsConnection; @@ -39,12 +40,14 @@ pub struct OneshotIngestionRequest { pub shape: ContentShape, } -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +#[derive(Derivative, Clone, PartialEq, Eq, Deserialize, Serialize)] +#[derivative(Debug)] pub enum ContentSource { Http { - url: Url, + url: SensitiveUrl, }, AwsS3 { + #[derivative(Debug = "ignore")] connection: AwsConnection, connection_id: CatalogItemId, uri: String, diff --git a/src/storage/src/storage_state.rs b/src/storage/src/storage_state.rs index 2811d34d0cb19..47f0221894c4e 100644 --- a/src/storage/src/storage_state.rs +++ b/src/storage/src/storage_state.rs @@ -1057,7 +1057,12 @@ impl<'w> Worker<'w> { } } StorageCommand::RunOneshotIngestion(ingestion) => { - info!(%worker_id, ?ingestion, "reconcile: received RunOneshotIngestion command"); + info!( + %worker_id, + ingestion_id = %ingestion.ingestion_id, + collection_id = %ingestion.collection_id, + "reconcile: received RunOneshotIngestion command", + ); create_oneshot_ingestions.insert(ingestion.ingestion_id); } StorageCommand::CancelOneshotIngestion(uuid) => {