From b29d8bc239c3817de774e318a7152f859b663d2e Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Sat, 11 Apr 2026 16:37:44 +0800 Subject: [PATCH 01/23] feat(auth): implement SigV4 authentication for REST catalog --- CMakeLists.txt | 1 + .../IcebergThirdpartyToolchain.cmake | 18 + src/iceberg/catalog/rest/CMakeLists.txt | 19 + .../catalog/rest/auth/auth_manager_internal.h | 7 + .../catalog/rest/auth/auth_managers.cc | 33 +- .../catalog/rest/auth/auth_properties.h | 14 + src/iceberg/catalog/rest/auth/auth_session.cc | 4 +- src/iceberg/catalog/rest/auth/auth_session.h | 11 +- .../catalog/rest/auth/sigv4_auth_manager.cc | 309 +++++++++++ .../catalog/rest/auth/sigv4_auth_manager.h | 129 +++++ src/iceberg/catalog/rest/http_client.cc | 58 ++- src/iceberg/test/CMakeLists.txt | 5 + src/iceberg/test/auth_manager_test.cc | 18 +- src/iceberg/test/sigv4_auth_test.cc | 487 ++++++++++++++++++ 14 files changed, 1087 insertions(+), 26 deletions(-) create mode 100644 src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc create mode 100644 src/iceberg/catalog/rest/auth/sigv4_auth_manager.h create mode 100644 src/iceberg/test/sigv4_auth_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index b03e586b0..bdd019fc6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,7 @@ option(ICEBERG_SQL_SQLITE "Build the SQLite connector for the SQL catalog" OFF) option(ICEBERG_SQL_POSTGRESQL "Build the PostgreSQL connector for the SQL catalog" OFF) option(ICEBERG_SQL_MYSQL "Build the MySQL connector for the SQL catalog" OFF) option(ICEBERG_S3 "Build with S3 support" OFF) +option(ICEBERG_BUILD_SIGV4 "Build SigV4 authentication support (requires AWS SDK)" OFF) option(ICEBERG_ENABLE_ASAN "Enable Address Sanitizer" OFF) option(ICEBERG_ENABLE_UBSAN "Enable Undefined Behavior Sanitizer" OFF) diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 152af0cb9..fd1235faf 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -633,3 +633,21 @@ endif() if(ICEBERG_BUILD_SQL_CATALOG) resolve_sql_catalog_dependencies() endif() + +# ---------------------------------------------------------------------- +# AWS SDK for C++ + +function(resolve_aws_sdk_dependency) + find_package(AWSSDK REQUIRED COMPONENTS core) + list(APPEND ICEBERG_SYSTEM_DEPENDENCIES AWSSDK) + set(ICEBERG_SYSTEM_DEPENDENCIES + ${ICEBERG_SYSTEM_DEPENDENCIES} + PARENT_SCOPE) +endfunction() + +if(ICEBERG_BUILD_SIGV4) + if(NOT ICEBERG_BUILD_REST) + message(FATAL_ERROR "ICEBERG_BUILD_SIGV4 requires ICEBERG_BUILD_REST to be ON") + endif() + resolve_aws_sdk_dependency() +endif() diff --git a/src/iceberg/catalog/rest/CMakeLists.txt b/src/iceberg/catalog/rest/CMakeLists.txt index 8fb2e93c0..8230b7a40 100644 --- a/src/iceberg/catalog/rest/CMakeLists.txt +++ b/src/iceberg/catalog/rest/CMakeLists.txt @@ -35,6 +35,10 @@ set(ICEBERG_REST_SOURCES rest_util.cc types.cc) +if(ICEBERG_BUILD_SIGV4) + list(APPEND ICEBERG_REST_SOURCES auth/sigv4_auth_manager.cc) +endif() + set(ICEBERG_REST_STATIC_BUILD_INTERFACE_LIBS) set(ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS) set(ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS) @@ -53,6 +57,13 @@ list(APPEND "$,iceberg::iceberg_shared,iceberg::iceberg_static>" "$,iceberg::cpr,cpr::cpr>") +if(ICEBERG_BUILD_SIGV4) + list(APPEND ICEBERG_REST_STATIC_BUILD_INTERFACE_LIBS aws-cpp-sdk-core) + list(APPEND ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS aws-cpp-sdk-core) + list(APPEND ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS aws-cpp-sdk-core) + list(APPEND ICEBERG_REST_SHARED_INSTALL_INTERFACE_LIBS aws-cpp-sdk-core) +endif() + add_iceberg_lib(iceberg_rest SOURCES ${ICEBERG_REST_SOURCES} @@ -65,4 +76,12 @@ add_iceberg_lib(iceberg_rest SHARED_INSTALL_INTERFACE_LIBS ${ICEBERG_REST_SHARED_INSTALL_INTERFACE_LIBS}) +if(ICEBERG_BUILD_SIGV4) + foreach(LIB iceberg_rest_static iceberg_rest_shared) + if(TARGET ${LIB}) + target_compile_definitions(${LIB} PUBLIC ICEBERG_BUILD_SIGV4) + endif() + endforeach() +endif() + iceberg_install_all_headers(iceberg/catalog/rest) diff --git a/src/iceberg/catalog/rest/auth/auth_manager_internal.h b/src/iceberg/catalog/rest/auth/auth_manager_internal.h index 051d05505..783fb2e70 100644 --- a/src/iceberg/catalog/rest/auth/auth_manager_internal.h +++ b/src/iceberg/catalog/rest/auth/auth_manager_internal.h @@ -47,4 +47,11 @@ Result> MakeOAuth2Manager( std::string_view name, const std::unordered_map& properties); +#ifdef ICEBERG_BUILD_SIGV4 +/// \brief Create a SigV4 authentication manager with a delegate. +Result> MakeSigV4AuthManager( + std::string_view name, + const std::unordered_map& properties); +#endif + } // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/auth_managers.cc b/src/iceberg/catalog/rest/auth/auth_managers.cc index f55885d75..0ff3a5623 100644 --- a/src/iceberg/catalog/rest/auth/auth_managers.cc +++ b/src/iceberg/catalog/rest/auth/auth_managers.cc @@ -22,6 +22,9 @@ #include #include "iceberg/catalog/rest/auth/auth_manager_internal.h" +#ifdef ICEBERG_BUILD_SIGV4 +# include "iceberg/catalog/rest/auth/sigv4_auth_manager.h" +#endif #include "iceberg/catalog/rest/auth/auth_properties.h" #include "iceberg/util/string_util.h" @@ -62,11 +65,15 @@ std::string InferAuthType( } AuthManagerRegistry CreateDefaultRegistry() { - return { + AuthManagerRegistry registry = { {AuthProperties::kAuthTypeNone, MakeNoopAuthManager}, {AuthProperties::kAuthTypeBasic, MakeBasicAuthManager}, {AuthProperties::kAuthTypeOAuth2, MakeOAuth2Manager}, }; +#ifdef ICEBERG_BUILD_SIGV4 + registry[AuthProperties::kAuthTypeSigV4] = MakeSigV4AuthManager; +#endif + return registry; } // Get the global registry of auth manager factories. @@ -98,4 +105,28 @@ Result> AuthManagers::Load( return it->second(name, properties); } +#ifdef ICEBERG_BUILD_SIGV4 +Result> MakeSigV4AuthManager( + std::string_view name, + const std::unordered_map& properties) { + // Determine the delegate auth type. Default to OAuth2 if not specified. + std::string delegate_type = AuthProperties::kAuthTypeOAuth2; + auto it = properties.find(AuthProperties::kSigV4DelegateAuthType); + if (it != properties.end() && !it->second.empty()) { + delegate_type = StringUtils::ToLower(it->second); + } + + // Prevent circular delegation (sigv4 -> sigv4 -> ...). + ICEBERG_PRECHECK(delegate_type != AuthProperties::kAuthTypeSigV4, + "Cannot delegate a SigV4 auth manager to another SigV4 auth manager"); + + // Load the delegate auth manager. + auto delegate_props = properties; + delegate_props[AuthProperties::kAuthType] = delegate_type; + + ICEBERG_ASSIGN_OR_RAISE(auto delegate, AuthManagers::Load(name, delegate_props)); + return std::make_unique(std::move(delegate)); +} +#endif + } // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/auth_properties.h b/src/iceberg/catalog/rest/auth/auth_properties.h index 05a7ea2c6..f5de44ea8 100644 --- a/src/iceberg/catalog/rest/auth/auth_properties.h +++ b/src/iceberg/catalog/rest/auth/auth_properties.h @@ -59,6 +59,20 @@ class ICEBERG_REST_EXPORT AuthProperties : public ConfigBase { inline static const std::string kSigV4DelegateAuthType = "rest.auth.sigv4.delegate-auth-type"; + // ---- SigV4 AWS credential entries ---- + + /// AWS region for SigV4 signing. + inline static const std::string kSigV4SigningRegion = "rest.signing-region"; + /// AWS service name for SigV4 signing. + inline static const std::string kSigV4SigningName = "rest.signing-name"; + inline static const std::string kSigV4SigningNameDefault = "execute-api"; + /// Static access key ID for SigV4 signing. + inline static const std::string kSigV4AccessKeyId = "rest.access-key-id"; + /// Static secret access key for SigV4 signing. + inline static const std::string kSigV4SecretAccessKey = "rest.secret-access-key"; + /// Optional session token for SigV4 signing. + inline static const std::string kSigV4SessionToken = "rest.session-token"; + // ---- OAuth2 entries ---- inline static Entry kToken{"token", ""}; diff --git a/src/iceberg/catalog/rest/auth/auth_session.cc b/src/iceberg/catalog/rest/auth/auth_session.cc index 31688eedf..82b85e5e6 100644 --- a/src/iceberg/catalog/rest/auth/auth_session.cc +++ b/src/iceberg/catalog/rest/auth/auth_session.cc @@ -43,7 +43,9 @@ class DefaultAuthSession : public AuthSession { explicit DefaultAuthSession(std::unordered_map headers) : headers_(std::move(headers)) {} - Status Authenticate(std::unordered_map& headers) override { + Status Authenticate( + std::unordered_map& headers, + [[maybe_unused]] const HTTPRequestContext& request_context) override { for (const auto& [key, value] : headers_) { headers.try_emplace(key, value); } diff --git a/src/iceberg/catalog/rest/auth/auth_session.h b/src/iceberg/catalog/rest/auth/auth_session.h index 5cccacec9..e19a9d984 100644 --- a/src/iceberg/catalog/rest/auth/auth_session.h +++ b/src/iceberg/catalog/rest/auth/auth_session.h @@ -23,6 +23,7 @@ #include #include +#include "iceberg/catalog/rest/endpoint.h" #include "iceberg/catalog/rest/iceberg_rest_export.h" #include "iceberg/catalog/rest/type_fwd.h" #include "iceberg/result.h" @@ -32,6 +33,13 @@ namespace iceberg::rest::auth { +/// \brief Context about the HTTP request being authenticated. +struct ICEBERG_REST_EXPORT HTTPRequestContext { + HttpMethod method = HttpMethod::kGet; + std::string url; + std::string body; +}; + /// \brief An authentication session that can authenticate outgoing HTTP requests. class ICEBERG_REST_EXPORT AuthSession { public: @@ -50,7 +58,8 @@ class ICEBERG_REST_EXPORT AuthSession { /// - NotAuthorized: Not authenticated (401) /// - IOError: Network or connection errors when reaching auth server /// - RestError: HTTP errors from authentication service - virtual Status Authenticate(std::unordered_map& headers) = 0; + virtual Status Authenticate(std::unordered_map& headers, + const HTTPRequestContext& request_context) = 0; /// \brief Close the session and release any resources. /// diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc new file mode 100644 index 000000000..96603dbdb --- /dev/null +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/catalog/rest/auth/sigv4_auth_manager.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "iceberg/catalog/rest/auth/auth_properties.h" +#include "iceberg/catalog/rest/endpoint.h" +#include "iceberg/util/macros.h" +#include "iceberg/util/string_util.h" + +namespace iceberg::rest::auth { + +namespace { + +/// \brief Ensures AWS SDK is initialized exactly once per process. +/// ShutdownAPI is intentionally never called (leak-by-design) to avoid +/// static destruction order issues with objects that may outlive shutdown. +class AwsSdkGuard { + public: + static void EnsureInitialized() { + static AwsSdkGuard instance; + (void)instance; + } + + private: + AwsSdkGuard() { + Aws::SDKOptions options; + Aws::InitAPI(options); + } +}; + +Aws::Http::HttpMethod ToAwsMethod(HttpMethod method) { + switch (method) { + case HttpMethod::kGet: + return Aws::Http::HttpMethod::HTTP_GET; + case HttpMethod::kPost: + return Aws::Http::HttpMethod::HTTP_POST; + case HttpMethod::kPut: + return Aws::Http::HttpMethod::HTTP_PUT; + case HttpMethod::kDelete: + return Aws::Http::HttpMethod::HTTP_DELETE; + case HttpMethod::kHead: + return Aws::Http::HttpMethod::HTTP_HEAD; + } + return Aws::Http::HttpMethod::HTTP_GET; +} + +std::unordered_map MergeProperties( + const std::unordered_map& base, + const std::unordered_map& overrides) { + auto merged = base; + for (const auto& [key, value] : overrides) { + merged.insert_or_assign(key, value); + } + return merged; +} + +} // namespace + +// ---- SigV4AuthSession ---- + +SigV4AuthSession::SigV4AuthSession( + std::shared_ptr delegate, std::string signing_region, + std::string signing_name, + std::shared_ptr credentials_provider) + : delegate_(std::move(delegate)), + signing_region_(std::move(signing_region)), + signing_name_(std::move(signing_name)), + credentials_provider_(std::move(credentials_provider)), + signer_(std::make_unique( + credentials_provider_, signing_name_.c_str(), signing_region_.c_str(), + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Always, + /*urlEscapePath=*/false)) {} + +SigV4AuthSession::~SigV4AuthSession() = default; + +Status SigV4AuthSession::Authenticate( + std::unordered_map& headers, + const HTTPRequestContext& request_context) { + // 1. Delegate authenticates first (e.g., adds OAuth2 Bearer token). + ICEBERG_RETURN_UNEXPECTED(delegate_->Authenticate(headers, request_context)); + + auto original_headers = headers; + + // 2. Relocate Authorization header (case-insensitive) so SigV4 takes precedence. + std::unordered_map signing_headers; + for (const auto& [name, value] : headers) { + if (StringUtils::EqualsIgnoreCase(name, "Authorization")) { + signing_headers[std::string(kRelocatedHeaderPrefix) + name] = value; + } else { + signing_headers[name] = value; + } + } + + // 3. Build AWS SDK request. + Aws::Http::URI aws_uri(request_context.url.c_str()); + auto aws_request = std::make_shared( + aws_uri, ToAwsMethod(request_context.method)); + + for (const auto& [name, value] : signing_headers) { + aws_request->SetHeaderValue(Aws::String(name.c_str()), Aws::String(value.c_str())); + } + + // 4. Set body content hash (matching Java's RESTSigV4AuthSession). + // Empty body: set EMPTY_BODY_SHA256 explicitly (Java line 118-121 workaround). + // Non-empty body: set body stream; the signer (PayloadSigningPolicy::Always) + // computes the real hex hash. Step 7 converts hex to Base64 after signing. + if (request_context.body.empty()) { + aws_request->SetHeaderValue("x-amz-content-sha256", Aws::String(kEmptyBodySha256)); + } else { + auto body_stream = + Aws::MakeShared("SigV4Body", request_context.body); + aws_request->AddContentBody(body_stream); + } + + // 5. Sign. + if (!signer_->SignRequest(*aws_request)) { + return std::unexpected( + Error{ErrorKind::kAuthenticationFailed, "SigV4 signing failed"}); + } + + // 6. Extract signed headers, relocating conflicts with originals. + headers.clear(); + auto signed_headers = aws_request->GetHeaders(); + for (auto it = signed_headers.begin(); it != signed_headers.end(); ++it) { + std::string name_str(it->first.c_str(), it->first.size()); + std::string value_str(it->second.c_str(), it->second.size()); + + for (const auto& [orig_name, orig_value] : original_headers) { + if (StringUtils::EqualsIgnoreCase(orig_name, name_str) && orig_value != value_str) { + headers[std::string(kRelocatedHeaderPrefix) + orig_name] = orig_value; + break; + } + } + + headers[name_str] = value_str; + } + + // 7. Convert body hash from hex to Base64 (matching Java's SignerChecksumParams + // output). Only convert if the value is a valid hex SHA256 (64 hex chars). + if (!request_context.body.empty()) { + auto it = headers.find("x-amz-content-sha256"); + if (it != headers.end() && it->second.size() == 64 && + it->second != std::string(kEmptyBodySha256)) { + auto decoded = Aws::Utils::HashingUtils::HexDecode(Aws::String(it->second.c_str())); + it->second = std::string(Aws::Utils::HashingUtils::Base64Encode(decoded).c_str()); + } + } + + return {}; +} + +Status SigV4AuthSession::Close() { return delegate_->Close(); } + +// ---- SigV4AuthManager ---- + +SigV4AuthManager::SigV4AuthManager(std::unique_ptr delegate) + : delegate_(std::move(delegate)) {} + +SigV4AuthManager::~SigV4AuthManager() = default; + +Result> SigV4AuthManager::InitSession( + HttpClient& init_client, + const std::unordered_map& properties) { + AwsSdkGuard::EnsureInitialized(); + ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, + delegate_->InitSession(init_client, properties)); + return WrapSession(std::move(delegate_session), properties); +} + +Result> SigV4AuthManager::CatalogSession( + HttpClient& shared_client, + const std::unordered_map& properties) { + AwsSdkGuard::EnsureInitialized(); + catalog_properties_ = properties; + ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, + delegate_->CatalogSession(shared_client, properties)); + return WrapSession(std::move(delegate_session), properties); +} + +Result> SigV4AuthManager::ContextualSession( + const std::unordered_map& context, + std::shared_ptr parent) { + auto* sigv4_parent = dynamic_cast(parent.get()); + ICEBERG_PRECHECK(sigv4_parent != nullptr, "Parent session is not SigV4"); + + ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->ContextualSession( + context, sigv4_parent->delegate())); + + auto merged = MergeProperties(catalog_properties_, context); + return WrapSession(std::move(delegate_session), merged); +} + +Result> SigV4AuthManager::TableSession( + const TableIdentifier& table, + const std::unordered_map& properties, + std::shared_ptr parent) { + auto* sigv4_parent = dynamic_cast(parent.get()); + ICEBERG_PRECHECK(sigv4_parent != nullptr, "Parent session is not SigV4"); + + ICEBERG_ASSIGN_OR_RAISE( + auto delegate_session, + delegate_->TableSession(table, properties, sigv4_parent->delegate())); + + auto merged = MergeProperties(catalog_properties_, properties); + return WrapSession(std::move(delegate_session), merged); +} + +Status SigV4AuthManager::Close() { return delegate_->Close(); } + +Result> +SigV4AuthManager::MakeCredentialsProvider( + const std::unordered_map& properties) { + auto access_key_it = properties.find(AuthProperties::kSigV4AccessKeyId); + auto secret_key_it = properties.find(AuthProperties::kSigV4SecretAccessKey); + bool has_ak = access_key_it != properties.end() && !access_key_it->second.empty(); + bool has_sk = secret_key_it != properties.end() && !secret_key_it->second.empty(); + + // Reject partial credentials — providing only one of AK/SK is a misconfiguration. + ICEBERG_PRECHECK( + has_ak == has_sk, "Both '{}' and '{}' must be set together, or neither", + AuthProperties::kSigV4AccessKeyId, AuthProperties::kSigV4SecretAccessKey); + + if (has_ak) { + auto session_token_it = properties.find(AuthProperties::kSigV4SessionToken); + if (session_token_it != properties.end() && !session_token_it->second.empty()) { + Aws::Auth::AWSCredentials credentials(access_key_it->second.c_str(), + secret_key_it->second.c_str(), + session_token_it->second.c_str()); + return std::make_shared(credentials); + } + Aws::Auth::AWSCredentials credentials(access_key_it->second.c_str(), + secret_key_it->second.c_str()); + return std::make_shared(credentials); + } + + return std::make_shared(); +} + +std::string SigV4AuthManager::ResolveSigningRegion( + const std::unordered_map& properties) { + auto it = properties.find(AuthProperties::kSigV4SigningRegion); + if (it != properties.end() && !it->second.empty()) { + return it->second; + } + auto legacy_it = properties.find(AuthProperties::kSigV4Region); + if (legacy_it != properties.end() && !legacy_it->second.empty()) { + return legacy_it->second; + } + if (const char* env = std::getenv("AWS_REGION")) { + return std::string(env); + } + if (const char* env = std::getenv("AWS_DEFAULT_REGION")) { + return std::string(env); + } + return "us-east-1"; +} + +std::string SigV4AuthManager::ResolveSigningName( + const std::unordered_map& properties) { + auto it = properties.find(AuthProperties::kSigV4SigningName); + if (it != properties.end() && !it->second.empty()) { + return it->second; + } + auto legacy_it = properties.find(AuthProperties::kSigV4Service); + if (legacy_it != properties.end() && !legacy_it->second.empty()) { + return legacy_it->second; + } + return AuthProperties::kSigV4SigningNameDefault; +} + +Result> SigV4AuthManager::WrapSession( + std::shared_ptr delegate_session, + const std::unordered_map& properties) { + auto region = ResolveSigningRegion(properties); + auto service = ResolveSigningName(properties); + ICEBERG_ASSIGN_OR_RAISE(auto credentials, MakeCredentialsProvider(properties)); + return std::make_shared(std::move(delegate_session), + std::move(region), std::move(service), + std::move(credentials)); +} + +} // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h new file mode 100644 index 000000000..7ee9aa7bc --- /dev/null +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include + +#include "iceberg/catalog/rest/auth/auth_manager.h" +#include "iceberg/catalog/rest/auth/auth_session.h" +#include "iceberg/catalog/rest/iceberg_rest_export.h" +#include "iceberg/result.h" + +namespace Aws::Auth { +class AWSCredentialsProvider; +} // namespace Aws::Auth + +namespace Aws::Client { +class AWSAuthV4Signer; +} // namespace Aws::Client + +namespace iceberg::rest::auth { + +/// \brief An AuthSession that signs requests with AWS SigV4. +/// +/// The request is first authenticated by the delegate AuthSession (e.g., OAuth2), +/// then signed with SigV4. In case of conflicting headers, the Authorization header +/// set by the delegate is relocated with an "Original-" prefix, then included in +/// the canonical headers to sign. +/// +/// See https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv.html +/// +/// Thread safety: Authenticate() is NOT thread-safe. Each session should be used +/// from a single thread, or callers must synchronize externally. +class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { + public: + /// SHA-256 hash of empty string, used for requests with no body. + static constexpr std::string_view kEmptyBodySha256 = + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; + + /// Prefix prepended to relocated headers that conflict with SigV4-signed headers. + static constexpr std::string_view kRelocatedHeaderPrefix = "Original-"; + + SigV4AuthSession( + std::shared_ptr delegate, std::string signing_region, + std::string signing_name, + std::shared_ptr credentials_provider); + + ~SigV4AuthSession() override; + + Status Authenticate(std::unordered_map& headers, + const HTTPRequestContext& request_context) override; + + Status Close() override; + + const std::shared_ptr& delegate() const { return delegate_; } + + private: + std::shared_ptr delegate_; + std::string signing_region_; + std::string signing_name_; + std::shared_ptr credentials_provider_; + /// Shared signer instance, matching Java's single Aws4Signer per manager. + std::unique_ptr signer_; +}; + +/// \brief An AuthManager that produces SigV4AuthSession instances. +/// +/// Wraps a delegate AuthManager to handle double authentication (e.g., OAuth2 + SigV4). +/// +/// Thread safety: CatalogSession() must be called before ContextualSession() or +/// TableSession(). Concurrent calls are NOT safe — callers must synchronize externally. +class ICEBERG_REST_EXPORT SigV4AuthManager : public AuthManager { + public: + explicit SigV4AuthManager(std::unique_ptr delegate); + ~SigV4AuthManager() override; + + Result> InitSession( + HttpClient& init_client, + const std::unordered_map& properties) override; + + Result> CatalogSession( + HttpClient& shared_client, + const std::unordered_map& properties) override; + + Result> ContextualSession( + const std::unordered_map& context, + std::shared_ptr parent) override; + + Result> TableSession( + const TableIdentifier& table, + const std::unordered_map& properties, + std::shared_ptr parent) override; + + Status Close() override; + + private: + static Result> + MakeCredentialsProvider(const std::unordered_map& properties); + static std::string ResolveSigningRegion( + const std::unordered_map& properties); + static std::string ResolveSigningName( + const std::unordered_map& properties); + Result> WrapSession( + std::shared_ptr delegate_session, + const std::unordered_map& properties); + + std::unique_ptr delegate_; + std::unordered_map catalog_properties_; +}; + +} // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/http_client.cc b/src/iceberg/catalog/rest/http_client.cc index 2e383b0ae..f7d7c80b0 100644 --- a/src/iceberg/catalog/rest/http_client.cc +++ b/src/iceberg/catalog/rest/http_client.cc @@ -19,6 +19,8 @@ #include "iceberg/catalog/rest/http_client.h" +#include + #include #include @@ -72,12 +74,12 @@ constexpr std::string_view kRestExceptionType = "RESTException"; Result BuildHeaders( const std::unordered_map& request_headers, const std::unordered_map& default_headers, - auth::AuthSession& session) { + auth::AuthSession& session, const auth::HTTPRequestContext& request_context) { std::unordered_map headers(default_headers); for (const auto& [key, val] : request_headers) { headers.insert_or_assign(key, val); } - ICEBERG_RETURN_UNEXPECTED(session.Authenticate(headers)); + ICEBERG_RETURN_UNEXPECTED(session.Authenticate(headers, request_context)); return cpr::Header(headers.begin(), headers.end()); } @@ -91,6 +93,24 @@ cpr::Parameters GetParameters( return cpr_params; } +/// \brief Append URL-encoded query parameters to a URL, sorted by key. +std::string AppendQueryString( + const std::string& base_url, + const std::unordered_map& params) { + if (params.empty()) return base_url; + std::map sorted(params.begin(), params.end()); + std::string url = base_url + "?"; + bool first = true; + for (const auto& [k, v] : sorted) { + if (!first) url += "&"; + auto ek = EncodeString(k); + auto ev = EncodeString(v); + url += (ek ? *ek : k) + "=" + (ev ? *ev : v); + first = false; + } + return url; +} + /// \brief Checks if the HTTP status code indicates a successful response. bool IsSuccessful(int32_t status_code) { return status_code == 200 // OK @@ -149,8 +169,10 @@ Result HttpClient::Get( const std::string& path, const std::unordered_map& params, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { - ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(headers, default_headers_, session)); + ICEBERG_ASSIGN_OR_RAISE( + auto all_headers, + BuildHeaders(headers, default_headers_, session, + {HttpMethod::kGet, AppendQueryString(path, params), ""})); cpr::Response response = cpr::Get(cpr::Url{path}, GetParameters(params), all_headers, *connection_pool_); @@ -164,8 +186,9 @@ Result HttpClient::Post( const std::string& path, const std::string& body, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { - ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(headers, default_headers_, session)); + ICEBERG_ASSIGN_OR_RAISE( + auto all_headers, + BuildHeaders(headers, default_headers_, session, {HttpMethod::kPost, path, body})); cpr::Response response = cpr::Post(cpr::Url{path}, cpr::Body{body}, all_headers, *connection_pool_); @@ -182,16 +205,20 @@ Result HttpClient::PostForm( const ErrorHandler& error_handler, auth::AuthSession& session) { std::unordered_map form_headers(headers); form_headers.insert_or_assign(kHeaderContentType, kMimeTypeFormUrlEncoded); - ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(form_headers, default_headers_, session)); std::vector pair_list; pair_list.reserve(form_data.size()); for (const auto& [key, val] : form_data) { pair_list.emplace_back(key, val); } + // Use cpr's own encoding as the signing body to ensure consistency with the + // actual payload sent over the wire. + cpr::Payload payload(pair_list.begin(), pair_list.end()); + std::string encoded_body = payload.GetContent(); + ICEBERG_ASSIGN_OR_RAISE(auto all_headers, + BuildHeaders(form_headers, default_headers_, session, + {HttpMethod::kPost, path, encoded_body})); cpr::Response response = - cpr::Post(cpr::Url{path}, cpr::Payload(pair_list.begin(), pair_list.end()), - all_headers, *connection_pool_); + cpr::Post(cpr::Url{path}, std::move(payload), all_headers, *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); HttpResponse http_response; @@ -202,8 +229,9 @@ Result HttpClient::PostForm( Result HttpClient::Head( const std::string& path, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { - ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(headers, default_headers_, session)); + ICEBERG_ASSIGN_OR_RAISE( + auto all_headers, + BuildHeaders(headers, default_headers_, session, {HttpMethod::kHead, path, ""})); cpr::Response response = cpr::Head(cpr::Url{path}, all_headers, *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); @@ -216,8 +244,10 @@ Result HttpClient::Delete( const std::string& path, const std::unordered_map& params, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { - ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(headers, default_headers_, session)); + ICEBERG_ASSIGN_OR_RAISE( + auto all_headers, + BuildHeaders(headers, default_headers_, session, + {HttpMethod::kDelete, AppendQueryString(path, params), ""})); cpr::Response response = cpr::Delete(cpr::Url{path}, GetParameters(params), all_headers, *connection_pool_); diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 2d56d7f35..b2ed9a7cc 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -290,6 +290,11 @@ if(ICEBERG_BUILD_REST) rest_json_serde_test.cc rest_util_test.cc) + if(ICEBERG_BUILD_SIGV4) + add_rest_iceberg_test(sigv4_auth_test SOURCES sigv4_auth_test.cc) + target_link_libraries(sigv4_auth_test PRIVATE aws-cpp-sdk-core) + endif() + if(ICEBERG_BUILD_REST_INTEGRATION_TESTS) add_rest_iceberg_test(rest_catalog_integration_test SOURCES diff --git a/src/iceberg/test/auth_manager_test.cc b/src/iceberg/test/auth_manager_test.cc index 40c0f8607..35e08a615 100644 --- a/src/iceberg/test/auth_manager_test.cc +++ b/src/iceberg/test/auth_manager_test.cc @@ -79,7 +79,7 @@ TEST_F(AuthManagerTest, LoadNoopAuthManagerExplicit) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); EXPECT_TRUE(headers.empty()); } @@ -123,7 +123,7 @@ TEST_F(AuthManagerTest, LoadBasicAuthManager) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); // base64("admin:secret") == "YWRtaW46c2VjcmV0" EXPECT_EQ(headers["Authorization"], "Basic YWRtaW46c2VjcmV0"); } @@ -142,7 +142,7 @@ TEST_F(AuthManagerTest, BasicAuthTypeCaseInsensitive) { ASSERT_THAT(session_result, IsOk()) << "Failed for auth type: " << auth_type; std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); // base64("user:pass") == "dXNlcjpwYXNz" EXPECT_EQ(headers["Authorization"], "Basic dXNlcjpwYXNz"); } @@ -188,7 +188,7 @@ TEST_F(AuthManagerTest, BasicAuthSpecialCharacters) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); // base64("user@domain.com:p@ss:w0rd!") == "dXNlckBkb21haW4uY29tOnBAc3M6dzByZCE=" EXPECT_EQ(headers["Authorization"], "Basic dXNlckBkb21haW4uY29tOnBAc3M6dzByZCE="); } @@ -220,7 +220,7 @@ TEST_F(AuthManagerTest, RegisterCustomAuthManager) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); EXPECT_EQ(headers["X-Custom-Auth"], "custom-value"); } @@ -238,7 +238,7 @@ TEST_F(AuthManagerTest, OAuth2StaticToken) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); EXPECT_EQ(headers["Authorization"], "Bearer my-static-token"); } @@ -255,7 +255,7 @@ TEST_F(AuthManagerTest, OAuth2InferredFromToken) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); EXPECT_EQ(headers["Authorization"], "Bearer inferred-token"); } @@ -274,7 +274,7 @@ TEST_F(AuthManagerTest, OAuth2MissingCredentials) { // Session should have no auth headers std::unordered_map headers; - ASSERT_TRUE(session_result.value()->Authenticate(headers).has_value()); + ASSERT_TRUE(session_result.value()->Authenticate(headers, {}).has_value()); EXPECT_EQ(headers.find("Authorization"), headers.end()); } @@ -295,7 +295,7 @@ TEST_F(AuthManagerTest, OAuth2TokenTakesPriorityOverCredential) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - ASSERT_THAT(session_result.value()->Authenticate(headers), IsOk()); + ASSERT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); EXPECT_EQ(headers["Authorization"], "Bearer my-static-token"); } diff --git a/src/iceberg/test/sigv4_auth_test.cc b/src/iceberg/test/sigv4_auth_test.cc new file mode 100644 index 000000000..159339d03 --- /dev/null +++ b/src/iceberg/test/sigv4_auth_test.cc @@ -0,0 +1,487 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include + +#include +#include +#include + +#include "iceberg/catalog/rest/auth/auth_managers.h" +#include "iceberg/catalog/rest/auth/auth_properties.h" +#include "iceberg/catalog/rest/auth/auth_session.h" +#include "iceberg/catalog/rest/auth/sigv4_auth_manager.h" +#include "iceberg/catalog/rest/http_client.h" +#include "iceberg/table_identifier.h" +#include "iceberg/test/matchers.h" + +namespace iceberg::rest::auth { + +class SigV4AuthTest : public ::testing::Test { + protected: + static void SetUpTestSuite() { + static bool initialized = false; + if (!initialized) { + Aws::SDKOptions options; + Aws::InitAPI(options); + initialized = true; + } + } + + HttpClient client_{{}}; + + std::unordered_map MakeSigV4Properties() { + return { + {AuthProperties::kAuthType, "sigv4"}, + {AuthProperties::kSigV4SigningRegion, "us-east-1"}, + {AuthProperties::kSigV4SigningName, "execute-api"}, + {AuthProperties::kSigV4AccessKeyId, "AKIAIOSFODNN7EXAMPLE"}, + {AuthProperties::kSigV4SecretAccessKey, + "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"}, + }; + } +}; + +TEST_F(SigV4AuthTest, LoadSigV4AuthManager) { + auto properties = MakeSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); +} + +TEST_F(SigV4AuthTest, CatalogSessionProducesSession) { + auto properties = MakeSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); +} + +TEST_F(SigV4AuthTest, AuthenticateAddsAuthorizationHeader) { + auto properties = MakeSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; + ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + + EXPECT_NE(headers.find("authorization"), headers.end()); + EXPECT_TRUE(headers["authorization"].starts_with("AWS4-HMAC-SHA256")); + EXPECT_NE(headers.find("x-amz-date"), headers.end()); +} + +TEST_F(SigV4AuthTest, AuthenticateWithPostBody) { + auto properties = MakeSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + headers["Content-Type"] = "application/json"; + HTTPRequestContext ctx{HttpMethod::kPost, "https://example.com/v1/namespaces", + R"({"namespace":["ns1"]})"}; + ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + + EXPECT_NE(headers.find("authorization"), headers.end()); + EXPECT_TRUE(headers["authorization"].starts_with("AWS4-HMAC-SHA256")); +} + +TEST_F(SigV4AuthTest, DelegateAuthorizationHeaderRelocated) { + auto properties = MakeSigV4Properties(); + properties[AuthProperties::kToken.key()] = "my-oauth-token"; + properties[AuthProperties::kSigV4DelegateAuthType] = "oauth2"; + + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; + ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + + EXPECT_NE(headers.find("authorization"), headers.end()); + EXPECT_TRUE(headers["authorization"].starts_with("AWS4-HMAC-SHA256")); + EXPECT_NE(headers.find("original-authorization"), headers.end()); + EXPECT_EQ(headers["original-authorization"], "Bearer my-oauth-token"); +} + +TEST_F(SigV4AuthTest, AuthenticateWithSessionToken) { + auto properties = MakeSigV4Properties(); + properties[AuthProperties::kSigV4SessionToken] = "FwoGZXIvYXdzEBYaDHqa0"; + + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; + ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + + EXPECT_NE(headers.find("authorization"), headers.end()); + EXPECT_NE(headers.find("x-amz-security-token"), headers.end()); + EXPECT_EQ(headers["x-amz-security-token"], "FwoGZXIvYXdzEBYaDHqa0"); +} + +TEST_F(SigV4AuthTest, CustomSigningNameAndRegion) { + auto properties = MakeSigV4Properties(); + properties[AuthProperties::kSigV4SigningRegion] = "eu-west-1"; + properties[AuthProperties::kSigV4SigningName] = "custom-service"; + + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; + ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + + auto auth_it = headers.find("authorization"); + ASSERT_NE(auth_it, headers.end()); + EXPECT_TRUE(auth_it->second.find("eu-west-1") != std::string::npos); + EXPECT_TRUE(auth_it->second.find("custom-service") != std::string::npos); +} + +TEST_F(SigV4AuthTest, AuthTypeCaseInsensitive) { + for (const auto& auth_type : {"SIGV4", "SigV4", "sigV4"}) { + auto properties = MakeSigV4Properties(); + properties[AuthProperties::kAuthType] = auth_type; + EXPECT_THAT(AuthManagers::Load("test-catalog", properties), IsOk()) + << "Failed for auth type: " << auth_type; + } +} + +TEST_F(SigV4AuthTest, DelegateDefaultsToOAuth2NoAuth) { + auto properties = MakeSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; + ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + + EXPECT_EQ(headers.find("original-authorization"), headers.end()); +} + +TEST_F(SigV4AuthTest, TableSessionInheritsProperties) { + auto properties = MakeSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto catalog_session = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(catalog_session, IsOk()); + + iceberg::TableIdentifier table_id{iceberg::Namespace{{"ns1"}}, "table1"}; + std::unordered_map table_props; + auto table_session = manager_result.value()->TableSession(table_id, table_props, + catalog_session.value()); + ASSERT_THAT(table_session, IsOk()); + + std::unordered_map headers; + HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/ns1/tables/table1", + ""}; + ASSERT_THAT(table_session.value()->Authenticate(headers, ctx), IsOk()); + EXPECT_NE(headers.find("authorization"), headers.end()); +} + +// ---------- Tests ported from Java TestRESTSigV4AuthSession ---------- + +// Java: authenticateWithoutBody +TEST_F(SigV4AuthTest, AuthenticateWithoutBodyDetailedHeaders) { + auto properties = MakeSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + headers["Content-Type"] = "application/json"; + HTTPRequestContext ctx{HttpMethod::kGet, "http://localhost:8080/path", ""}; + ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + + // Original header preserved + EXPECT_EQ(headers["content-type"], "application/json"); + + // Host header generated by the signer + EXPECT_NE(headers.find("host"), headers.end()); + + // SigV4 headers + auto auth_it = headers.find("authorization"); + ASSERT_NE(auth_it, headers.end()); + EXPECT_TRUE(auth_it->second.starts_with("AWS4-HMAC-SHA256 Credential=")); + + EXPECT_TRUE(auth_it->second.find("content-type") != std::string::npos); + EXPECT_TRUE(auth_it->second.find("host") != std::string::npos); + EXPECT_TRUE(auth_it->second.find("x-amz-content-sha256") != std::string::npos); + EXPECT_TRUE(auth_it->second.find("x-amz-date") != std::string::npos); + + // Empty body SHA256 hash + EXPECT_EQ(headers["x-amz-content-sha256"], SigV4AuthSession::kEmptyBodySha256); + + // X-Amz-Date present + EXPECT_NE(headers.find("x-amz-date"), headers.end()); +} + +// Java: authenticateWithBody +TEST_F(SigV4AuthTest, AuthenticateWithBodyDetailedHeaders) { + auto properties = MakeSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + headers["Content-Type"] = "application/json"; + std::string body = R"({"namespace":["ns1"]})"; + HTTPRequestContext ctx{HttpMethod::kPost, "http://localhost:8080/path", body}; + ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + + // SigV4 Authorization header + auto auth_it = headers.find("authorization"); + ASSERT_NE(auth_it, headers.end()); + EXPECT_TRUE(auth_it->second.starts_with("AWS4-HMAC-SHA256 Credential=")); + + // x-amz-content-sha256 should be Base64-encoded body SHA256 (matching Java) + auto sha_it = headers.find("x-amz-content-sha256"); + ASSERT_NE(sha_it, headers.end()); + EXPECT_NE(sha_it->second, SigV4AuthSession::kEmptyBodySha256); + + EXPECT_EQ(sha_it->second.size(), 44) + << "Expected Base64 SHA256, got: " << sha_it->second; +} + +// Java: authenticateConflictingAuthorizationHeader +TEST_F(SigV4AuthTest, ConflictingAuthorizationHeaderIncludedInSignedHeaders) { + auto properties = MakeSigV4Properties(); + properties[AuthProperties::kToken.key()] = "my-oauth-token"; + properties[AuthProperties::kSigV4DelegateAuthType] = "oauth2"; + + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + headers["Content-Type"] = "application/json"; + HTTPRequestContext ctx{HttpMethod::kGet, "http://localhost:8080/path", ""}; + ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + + // SigV4 Authorization header + auto auth_it = headers.find("authorization"); + ASSERT_NE(auth_it, headers.end()); + EXPECT_TRUE(auth_it->second.starts_with("AWS4-HMAC-SHA256 Credential=")); + + // Relocated delegate header should be in SignedHeaders + EXPECT_TRUE(auth_it->second.find("original-authorization") != std::string::npos) + << "SignedHeaders should include 'original-authorization', got: " + << auth_it->second; + + // Relocated Authorization present + auto orig_it = headers.find("original-authorization"); + ASSERT_NE(orig_it, headers.end()); + EXPECT_EQ(orig_it->second, "Bearer my-oauth-token"); +} + +// Java: authenticateConflictingSigv4Headers +TEST_F(SigV4AuthTest, ConflictingSigV4HeadersRelocated) { + auto delegate = AuthSession::MakeDefault({ + {"x-amz-content-sha256", "fake-sha256"}, + {"X-Amz-Date", "fake-date"}, + {"Content-Type", "application/json"}, + }); + auto credentials = + std::make_shared(Aws::Auth::AWSCredentials( + "AKIAIOSFODNN7EXAMPLE", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")); + auto session = std::make_shared(delegate, "us-east-1", "execute-api", + credentials); + + std::unordered_map headers; + HTTPRequestContext ctx{HttpMethod::kGet, "http://localhost:8080/path", ""}; + ASSERT_THAT(session->Authenticate(headers, ctx), IsOk()); + + // The real x-amz-content-sha256 should be the empty body hash (signer overwrites fake) + EXPECT_EQ(headers["x-amz-content-sha256"], SigV4AuthSession::kEmptyBodySha256); + + // The fake values should be relocated since the signer produced different values + auto orig_sha_it = headers.find("Original-x-amz-content-sha256"); + ASSERT_NE(orig_sha_it, headers.end()); + EXPECT_EQ(orig_sha_it->second, "fake-sha256"); + + auto orig_date_it = headers.find("Original-X-Amz-Date"); + ASSERT_NE(orig_date_it, headers.end()); + EXPECT_EQ(orig_date_it->second, "fake-date"); + + // SigV4 Authorization present + EXPECT_NE(headers.find("authorization"), headers.end()); +} + +// Java: close (TestRESTSigV4AuthSession) +TEST_F(SigV4AuthTest, SessionCloseDelegatesToInner) { + auto delegate = AuthSession::MakeDefault({}); + auto credentials = std::make_shared( + Aws::Auth::AWSCredentials("id", "secret")); + auto session = std::make_shared(delegate, "us-east-1", "execute-api", + credentials); + + // Close should succeed without error + EXPECT_THAT(session->Close(), IsOk()); +} + +// ---------- Tests ported from Java TestRESTSigV4AuthManager ---------- + +// Java: createCustomDelegate +TEST_F(SigV4AuthTest, CreateCustomDelegateNone) { + std::unordered_map properties = { + {AuthProperties::kAuthType, "sigv4"}, + {AuthProperties::kSigV4DelegateAuthType, "none"}, + {AuthProperties::kSigV4SigningRegion, "us-west-2"}, + {AuthProperties::kSigV4AccessKeyId, "id"}, + {AuthProperties::kSigV4SecretAccessKey, "secret"}, + }; + + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + // Authenticate should work with noop delegate + std::unordered_map headers; + HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; + ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + + EXPECT_NE(headers.find("authorization"), headers.end()); + + EXPECT_EQ(headers.find("original-authorization"), headers.end()); + EXPECT_EQ(headers.find("original-authorization"), headers.end()); +} + +// Java: createInvalidCustomDelegate +TEST_F(SigV4AuthTest, CreateInvalidCustomDelegateSigV4Circular) { + std::unordered_map properties = { + {AuthProperties::kAuthType, "sigv4"}, + {AuthProperties::kSigV4DelegateAuthType, "sigv4"}, + {AuthProperties::kSigV4SigningRegion, "us-east-1"}, + {AuthProperties::kSigV4AccessKeyId, "id"}, + {AuthProperties::kSigV4SecretAccessKey, "secret"}, + }; + + auto result = AuthManagers::Load("test-catalog", properties); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(result, + HasErrorMessage("Cannot delegate a SigV4 auth manager to another SigV4")); +} + +// Java: contextualSession +TEST_F(SigV4AuthTest, ContextualSessionOverridesProperties) { + auto properties = MakeSigV4Properties(); + properties[AuthProperties::kSigV4SigningRegion] = "us-west-2"; + + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto catalog_session = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(catalog_session, IsOk()); + + // Context overrides region and credentials + std::unordered_map context = { + {AuthProperties::kSigV4AccessKeyId, "id2"}, + {AuthProperties::kSigV4SecretAccessKey, "secret2"}, + {AuthProperties::kSigV4SigningRegion, "eu-west-1"}, + }; + + auto ctx_session = + manager_result.value()->ContextualSession(context, catalog_session.value()); + ASSERT_THAT(ctx_session, IsOk()); + + std::unordered_map headers; + HTTPRequestContext req_ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; + ASSERT_THAT(ctx_session.value()->Authenticate(headers, req_ctx), IsOk()); + + auto auth_it = headers.find("authorization"); + ASSERT_NE(auth_it, headers.end()); + + EXPECT_TRUE(auth_it->second.find("eu-west-1") != std::string::npos) + << "Expected eu-west-1 in Authorization, got: " << auth_it->second; +} + +// Java: tableSession (with property override) +TEST_F(SigV4AuthTest, TableSessionOverridesProperties) { + auto properties = MakeSigV4Properties(); + properties[AuthProperties::kSigV4SigningRegion] = "us-west-2"; + + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto catalog_session = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(catalog_session, IsOk()); + + // Table properties override region and credentials + std::unordered_map table_props = { + {AuthProperties::kSigV4AccessKeyId, "table-key-id"}, + {AuthProperties::kSigV4SecretAccessKey, "table-secret"}, + {AuthProperties::kSigV4SigningRegion, "ap-southeast-1"}, + }; + + iceberg::TableIdentifier table_id{iceberg::Namespace{{"db1"}}, "table1"}; + auto table_session = manager_result.value()->TableSession(table_id, table_props, + catalog_session.value()); + ASSERT_THAT(table_session, IsOk()); + + std::unordered_map headers; + HTTPRequestContext req_ctx{HttpMethod::kGet, "https://example.com/v1/db1/tables/table1", + ""}; + ASSERT_THAT(table_session.value()->Authenticate(headers, req_ctx), IsOk()); + + auto auth_it = headers.find("authorization"); + ASSERT_NE(auth_it, headers.end()); + + EXPECT_TRUE(auth_it->second.find("ap-southeast-1") != std::string::npos) + << "Expected ap-southeast-1 in Authorization, got: " << auth_it->second; +} + +// Java: close (TestRESTSigV4AuthManager) +TEST_F(SigV4AuthTest, ManagerCloseDelegatesToInner) { + auto properties = MakeSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + // Close should succeed without error + EXPECT_THAT(manager_result.value()->Close(), IsOk()); +} + +} // namespace iceberg::rest::auth From e20ef0f3c1ac595ee737739a087dd507a5bf4e35 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Sat, 11 Apr 2026 19:01:45 +0800 Subject: [PATCH 02/23] fix(ci): enable SigV4 build in cpp-linter workflow --- .github/workflows/cpp-linter.yml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cpp-linter.yml b/.github/workflows/cpp-linter.yml index 4225d662e..fe90d92c4 100644 --- a/.github/workflows/cpp-linter.yml +++ b/.github/workflows/cpp-linter.yml @@ -43,7 +43,17 @@ jobs: shell: bash run: | sudo apt-get update - sudo apt-get install -y libcurl4-openssl-dev libsqlite3-dev libpq-dev default-libmysqlclient-dev + sudo apt-get install -y libcurl4-openssl-dev libsqlite3-dev libpq-dev default-libmysqlclient-dev ninja-build + - name: Cache vcpkg packages + uses: actions/cache@v4 + id: vcpkg-cache + with: + path: /usr/local/share/vcpkg/installed + key: vcpkg-x64-linux-aws-sdk-cpp-core-${{ hashFiles('.github/workflows/cpp-linter.yml') }} + - name: Install AWS SDK via vcpkg + if: steps.vcpkg-cache.outputs.cache-hit != 'true' + shell: bash + run: vcpkg install aws-sdk-cpp[core]:x64-linux - name: Run build env: CC: gcc-14 @@ -54,7 +64,9 @@ jobs: -DICEBERG_BUILD_SQL_CATALOG=ON \ -DICEBERG_SQL_SQLITE=ON \ -DICEBERG_SQL_POSTGRESQL=ON \ - -DICEBERG_SQL_MYSQL=ON + -DICEBERG_SQL_MYSQL=ON \ + -DICEBERG_BUILD_SIGV4=ON \ + -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake cmake --build . - uses: cpp-linter/cpp-linter-action@0f6d1b8d7e38b584cbee606eb23d850c217d54f8 # v2.15.1 id: linter From ffad548c1a6a0ab65a2f0151f3f8fbe96eb0b9eb Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Tue, 14 Apr 2026 11:13:58 +0800 Subject: [PATCH 03/23] address review feedback --- src/iceberg/catalog/rest/auth/auth_managers.cc | 4 +++- .../catalog/rest/auth/sigv4_auth_manager.cc | 16 ++++++---------- .../catalog/rest/auth/sigv4_auth_manager.h | 1 - 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/iceberg/catalog/rest/auth/auth_managers.cc b/src/iceberg/catalog/rest/auth/auth_managers.cc index 0ff3a5623..67d6f9634 100644 --- a/src/iceberg/catalog/rest/auth/auth_managers.cc +++ b/src/iceberg/catalog/rest/auth/auth_managers.cc @@ -118,7 +118,9 @@ Result> MakeSigV4AuthManager( // Prevent circular delegation (sigv4 -> sigv4 -> ...). ICEBERG_PRECHECK(delegate_type != AuthProperties::kAuthTypeSigV4, - "Cannot delegate a SigV4 auth manager to another SigV4 auth manager"); + "Cannot delegate a SigV4 auth manager to another SigV4 auth " + "manager (delegate_type='{}')", + delegate_type); // Load the delegate auth manager. auto delegate_props = properties; diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index 96603dbdb..1e81fcb0f 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -31,6 +31,7 @@ #include "iceberg/catalog/rest/auth/auth_properties.h" #include "iceberg/catalog/rest/endpoint.h" +#include "iceberg/util/checked_cast.h" #include "iceberg/util/macros.h" #include "iceberg/util/string_util.h" @@ -207,8 +208,7 @@ Result> SigV4AuthManager::CatalogSession( Result> SigV4AuthManager::ContextualSession( const std::unordered_map& context, std::shared_ptr parent) { - auto* sigv4_parent = dynamic_cast(parent.get()); - ICEBERG_PRECHECK(sigv4_parent != nullptr, "Parent session is not SigV4"); + auto sigv4_parent = internal::checked_pointer_cast(std::move(parent)); ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->ContextualSession( context, sigv4_parent->delegate())); @@ -221,8 +221,7 @@ Result> SigV4AuthManager::TableSession( const TableIdentifier& table, const std::unordered_map& properties, std::shared_ptr parent) { - auto* sigv4_parent = dynamic_cast(parent.get()); - ICEBERG_PRECHECK(sigv4_parent != nullptr, "Parent session is not SigV4"); + auto sigv4_parent = internal::checked_pointer_cast(std::move(parent)); ICEBERG_ASSIGN_OR_RAISE( auto delegate_session, @@ -248,15 +247,12 @@ SigV4AuthManager::MakeCredentialsProvider( AuthProperties::kSigV4AccessKeyId, AuthProperties::kSigV4SecretAccessKey); if (has_ak) { + Aws::Auth::AWSCredentials credentials(access_key_it->second.c_str(), + secret_key_it->second.c_str()); auto session_token_it = properties.find(AuthProperties::kSigV4SessionToken); if (session_token_it != properties.end() && !session_token_it->second.empty()) { - Aws::Auth::AWSCredentials credentials(access_key_it->second.c_str(), - secret_key_it->second.c_str(), - session_token_it->second.c_str()); - return std::make_shared(credentials); + credentials.SetSessionToken(session_token_it->second.c_str()); } - Aws::Auth::AWSCredentials credentials(access_key_it->second.c_str(), - secret_key_it->second.c_str()); return std::make_shared(credentials); } diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h index 7ee9aa7bc..8b546e4ee 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h @@ -77,7 +77,6 @@ class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { std::string signing_region_; std::string signing_name_; std::shared_ptr credentials_provider_; - /// Shared signer instance, matching Java's single Aws4Signer per manager. std::unique_ptr signer_; }; From 0bde6e981e99556029286cb8ea888c23a3e29cbe Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Tue, 14 Apr 2026 11:31:40 +0800 Subject: [PATCH 04/23] add single-arg Authenticate() overload --- src/iceberg/catalog/rest/auth/auth_session.h | 5 +++++ src/iceberg/test/auth_manager_test.cc | 18 +++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/iceberg/catalog/rest/auth/auth_session.h b/src/iceberg/catalog/rest/auth/auth_session.h index e19a9d984..3ca2bc6b5 100644 --- a/src/iceberg/catalog/rest/auth/auth_session.h +++ b/src/iceberg/catalog/rest/auth/auth_session.h @@ -61,6 +61,11 @@ class ICEBERG_REST_EXPORT AuthSession { virtual Status Authenticate(std::unordered_map& headers, const HTTPRequestContext& request_context) = 0; + /// \brief Convenience overload for callers that don't need a request context. + Status Authenticate(std::unordered_map& headers) { + return Authenticate(headers, HTTPRequestContext{}); + } + /// \brief Close the session and release any resources. /// /// This method is called when the session is no longer needed. For stateful diff --git a/src/iceberg/test/auth_manager_test.cc b/src/iceberg/test/auth_manager_test.cc index 35e08a615..40c0f8607 100644 --- a/src/iceberg/test/auth_manager_test.cc +++ b/src/iceberg/test/auth_manager_test.cc @@ -79,7 +79,7 @@ TEST_F(AuthManagerTest, LoadNoopAuthManagerExplicit) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); EXPECT_TRUE(headers.empty()); } @@ -123,7 +123,7 @@ TEST_F(AuthManagerTest, LoadBasicAuthManager) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); // base64("admin:secret") == "YWRtaW46c2VjcmV0" EXPECT_EQ(headers["Authorization"], "Basic YWRtaW46c2VjcmV0"); } @@ -142,7 +142,7 @@ TEST_F(AuthManagerTest, BasicAuthTypeCaseInsensitive) { ASSERT_THAT(session_result, IsOk()) << "Failed for auth type: " << auth_type; std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); // base64("user:pass") == "dXNlcjpwYXNz" EXPECT_EQ(headers["Authorization"], "Basic dXNlcjpwYXNz"); } @@ -188,7 +188,7 @@ TEST_F(AuthManagerTest, BasicAuthSpecialCharacters) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); // base64("user@domain.com:p@ss:w0rd!") == "dXNlckBkb21haW4uY29tOnBAc3M6dzByZCE=" EXPECT_EQ(headers["Authorization"], "Basic dXNlckBkb21haW4uY29tOnBAc3M6dzByZCE="); } @@ -220,7 +220,7 @@ TEST_F(AuthManagerTest, RegisterCustomAuthManager) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); EXPECT_EQ(headers["X-Custom-Auth"], "custom-value"); } @@ -238,7 +238,7 @@ TEST_F(AuthManagerTest, OAuth2StaticToken) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); EXPECT_EQ(headers["Authorization"], "Bearer my-static-token"); } @@ -255,7 +255,7 @@ TEST_F(AuthManagerTest, OAuth2InferredFromToken) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); + EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); EXPECT_EQ(headers["Authorization"], "Bearer inferred-token"); } @@ -274,7 +274,7 @@ TEST_F(AuthManagerTest, OAuth2MissingCredentials) { // Session should have no auth headers std::unordered_map headers; - ASSERT_TRUE(session_result.value()->Authenticate(headers, {}).has_value()); + ASSERT_TRUE(session_result.value()->Authenticate(headers).has_value()); EXPECT_EQ(headers.find("Authorization"), headers.end()); } @@ -295,7 +295,7 @@ TEST_F(AuthManagerTest, OAuth2TokenTakesPriorityOverCredential) { ASSERT_THAT(session_result, IsOk()); std::unordered_map headers; - ASSERT_THAT(session_result.value()->Authenticate(headers, {}), IsOk()); + ASSERT_THAT(session_result.value()->Authenticate(headers), IsOk()); EXPECT_EQ(headers["Authorization"], "Bearer my-static-token"); } From b30f5d6d60bbee1804659d693508291c02270797 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Tue, 14 Apr 2026 13:34:33 +0800 Subject: [PATCH 05/23] sigv4 x-amz-content-sha256 must be Base64 in canonical headers --- .../catalog/rest/auth/sigv4_auth_manager.cc | 70 ++++++++++--------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index 1e81fcb0f..0d52534c0 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -82,6 +82,25 @@ std::unordered_map MergeProperties( return merged; } +/// SigV4 signer reproducing Java RESTSigV4AuthSession's +/// SignerChecksumParams(SHA256, X_AMZ_CONTENT_SHA256) output: canonical +/// headers carry Base64(SHA256(body)), canonical request trailer uses hex. +class RestSigV4Signer : public Aws::Client::AWSAuthV4Signer { + public: + RestSigV4Signer(const std::shared_ptr& creds, + const char* service_name, const Aws::String& region) + : Aws::Client::AWSAuthV4Signer(creds, service_name, region, + PayloadSigningPolicy::Always, + /*urlEscapePath=*/false) { + // AWSAuthV4Signer normally overwrites x-amz-content-sha256 with the hex + // body hash right before canonicalization, which would clobber the Base64 + // value the caller pre-sets. Clearing this flag skips that overwrite so + // canonical headers see the caller's Base64, while ComputePayloadHash + // still feeds hex into the canonical request trailer. + m_includeSha256HashHeader = false; + } +}; + } // namespace // ---- SigV4AuthSession ---- @@ -94,22 +113,18 @@ SigV4AuthSession::SigV4AuthSession( signing_region_(std::move(signing_region)), signing_name_(std::move(signing_name)), credentials_provider_(std::move(credentials_provider)), - signer_(std::make_unique( - credentials_provider_, signing_name_.c_str(), signing_region_.c_str(), - Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Always, - /*urlEscapePath=*/false)) {} + signer_(std::make_unique( + credentials_provider_, signing_name_.c_str(), signing_region_.c_str())) {} SigV4AuthSession::~SigV4AuthSession() = default; Status SigV4AuthSession::Authenticate( std::unordered_map& headers, const HTTPRequestContext& request_context) { - // 1. Delegate authenticates first (e.g., adds OAuth2 Bearer token). ICEBERG_RETURN_UNEXPECTED(delegate_->Authenticate(headers, request_context)); - auto original_headers = headers; - // 2. Relocate Authorization header (case-insensitive) so SigV4 takes precedence. + // Relocate any delegate-set Authorization so SigV4 takes precedence. std::unordered_map signing_headers; for (const auto& [name, value] : headers) { if (StringUtils::EqualsIgnoreCase(name, "Authorization")) { @@ -119,59 +134,46 @@ Status SigV4AuthSession::Authenticate( } } - // 3. Build AWS SDK request. Aws::Http::URI aws_uri(request_context.url.c_str()); auto aws_request = std::make_shared( aws_uri, ToAwsMethod(request_context.method)); - for (const auto& [name, value] : signing_headers) { aws_request->SetHeaderValue(Aws::String(name.c_str()), Aws::String(value.c_str())); } - // 4. Set body content hash (matching Java's RESTSigV4AuthSession). - // Empty body: set EMPTY_BODY_SHA256 explicitly (Java line 118-121 workaround). - // Non-empty body: set body stream; the signer (PayloadSigningPolicy::Always) - // computes the real hex hash. Step 7 converts hex to Base64 after signing. + // Empty body uses hex EMPTY_BODY_SHA256 (Java workaround for the signer + // producing an invalid checksum for empty bodies); non-empty body uses + // Base64(SHA256(body)). See RestSigV4Signer doc for why this value survives + // signing to land in the canonical headers unchanged. if (request_context.body.empty()) { aws_request->SetHeaderValue("x-amz-content-sha256", Aws::String(kEmptyBodySha256)); } else { auto body_stream = Aws::MakeShared("SigV4Body", request_context.body); aws_request->AddContentBody(body_stream); + auto sha256 = Aws::Utils::HashingUtils::CalculateSHA256( + Aws::String(request_context.body.data(), request_context.body.size())); + aws_request->SetHeaderValue("x-amz-content-sha256", + Aws::Utils::HashingUtils::Base64Encode(sha256)); } - // 5. Sign. if (!signer_->SignRequest(*aws_request)) { return std::unexpected( Error{ErrorKind::kAuthenticationFailed, "SigV4 signing failed"}); } - // 6. Extract signed headers, relocating conflicts with originals. + // Merge signed headers back; relocate any original value that conflicts. headers.clear(); - auto signed_headers = aws_request->GetHeaders(); - for (auto it = signed_headers.begin(); it != signed_headers.end(); ++it) { - std::string name_str(it->first.c_str(), it->first.size()); - std::string value_str(it->second.c_str(), it->second.size()); - + for (const auto& [aws_name, aws_value] : aws_request->GetHeaders()) { + std::string name(aws_name.c_str(), aws_name.size()); + std::string value(aws_value.c_str(), aws_value.size()); for (const auto& [orig_name, orig_value] : original_headers) { - if (StringUtils::EqualsIgnoreCase(orig_name, name_str) && orig_value != value_str) { + if (StringUtils::EqualsIgnoreCase(orig_name, name) && orig_value != value) { headers[std::string(kRelocatedHeaderPrefix) + orig_name] = orig_value; break; } } - - headers[name_str] = value_str; - } - - // 7. Convert body hash from hex to Base64 (matching Java's SignerChecksumParams - // output). Only convert if the value is a valid hex SHA256 (64 hex chars). - if (!request_context.body.empty()) { - auto it = headers.find("x-amz-content-sha256"); - if (it != headers.end() && it->second.size() == 64 && - it->second != std::string(kEmptyBodySha256)) { - auto decoded = Aws::Utils::HashingUtils::HexDecode(Aws::String(it->second.c_str())); - it->second = std::string(Aws::Utils::HashingUtils::Base64Encode(decoded).c_str()); - } + headers[std::move(name)] = std::move(value); } return {}; From e7a2d8eb5438dd4a9a519ad2ef49010d4b38a886 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Tue, 14 Apr 2026 22:51:27 +0800 Subject: [PATCH 06/23] adopt request-in/request-out Authenticate interface --- .../catalog/rest/auth/auth_properties.h | 10 -- src/iceberg/catalog/rest/auth/auth_session.cc | 9 +- src/iceberg/catalog/rest/auth/auth_session.h | 27 ++-- .../catalog/rest/auth/sigv4_auth_manager.cc | 68 ++++----- .../catalog/rest/auth/sigv4_auth_manager.h | 3 +- src/iceberg/catalog/rest/http_client.cc | 57 +++++--- src/iceberg/test/auth_manager_test.cc | 56 +++---- src/iceberg/test/sigv4_auth_test.cc | 138 ++++++++++-------- 8 files changed, 192 insertions(+), 176 deletions(-) diff --git a/src/iceberg/catalog/rest/auth/auth_properties.h b/src/iceberg/catalog/rest/auth/auth_properties.h index f5de44ea8..f6dfc4ae8 100644 --- a/src/iceberg/catalog/rest/auth/auth_properties.h +++ b/src/iceberg/catalog/rest/auth/auth_properties.h @@ -54,23 +54,13 @@ class ICEBERG_REST_EXPORT AuthProperties : public ConfigBase { // ---- SigV4 entries ---- - inline static const std::string kSigV4Region = "rest.auth.sigv4.region"; - inline static const std::string kSigV4Service = "rest.auth.sigv4.service"; inline static const std::string kSigV4DelegateAuthType = "rest.auth.sigv4.delegate-auth-type"; - - // ---- SigV4 AWS credential entries ---- - - /// AWS region for SigV4 signing. inline static const std::string kSigV4SigningRegion = "rest.signing-region"; - /// AWS service name for SigV4 signing. inline static const std::string kSigV4SigningName = "rest.signing-name"; inline static const std::string kSigV4SigningNameDefault = "execute-api"; - /// Static access key ID for SigV4 signing. inline static const std::string kSigV4AccessKeyId = "rest.access-key-id"; - /// Static secret access key for SigV4 signing. inline static const std::string kSigV4SecretAccessKey = "rest.secret-access-key"; - /// Optional session token for SigV4 signing. inline static const std::string kSigV4SessionToken = "rest.session-token"; // ---- OAuth2 entries ---- diff --git a/src/iceberg/catalog/rest/auth/auth_session.cc b/src/iceberg/catalog/rest/auth/auth_session.cc index 82b85e5e6..7c5b9b289 100644 --- a/src/iceberg/catalog/rest/auth/auth_session.cc +++ b/src/iceberg/catalog/rest/auth/auth_session.cc @@ -43,13 +43,12 @@ class DefaultAuthSession : public AuthSession { explicit DefaultAuthSession(std::unordered_map headers) : headers_(std::move(headers)) {} - Status Authenticate( - std::unordered_map& headers, - [[maybe_unused]] const HTTPRequestContext& request_context) override { + Result Authenticate(const HTTPRequest& request) override { + HTTPRequest authenticated = request; for (const auto& [key, value] : headers_) { - headers.try_emplace(key, value); + authenticated.headers.try_emplace(key, value); } - return {}; + return authenticated; } private: diff --git a/src/iceberg/catalog/rest/auth/auth_session.h b/src/iceberg/catalog/rest/auth/auth_session.h index 3ca2bc6b5..d3fa9ea2c 100644 --- a/src/iceberg/catalog/rest/auth/auth_session.h +++ b/src/iceberg/catalog/rest/auth/auth_session.h @@ -33,10 +33,14 @@ namespace iceberg::rest::auth { -/// \brief Context about the HTTP request being authenticated. -struct ICEBERG_REST_EXPORT HTTPRequestContext { +/// \brief An outgoing HTTP request passed through an AuthSession. Mirrors the +/// HTTPRequest type used by the Java reference implementation so signing +/// implementations like SigV4 can operate on method, url, headers, and body +/// as a single value. +struct ICEBERG_REST_EXPORT HTTPRequest { HttpMethod method = HttpMethod::kGet; std::string url; + std::unordered_map headers; std::string body; }; @@ -45,26 +49,21 @@ class ICEBERG_REST_EXPORT AuthSession { public: virtual ~AuthSession() = default; - /// \brief Authenticate the given request headers. + /// \brief Authenticate an outgoing HTTP request. /// - /// This method adds authentication information (e.g., Authorization header) - /// to the provided headers map. The implementation should be idempotent. + /// Returns a new request with authentication information (e.g., an + /// Authorization header) added. Implementations must be idempotent and must + /// not mutate the input request. /// - /// \param[in,out] headers The headers map to add authentication information to. - /// \return Status indicating success or one of the following errors: + /// \param request The request to authenticate. + /// \return The authenticated request on success, or one of: /// - AuthenticationFailed: General authentication failure (invalid credentials, /// etc.) /// - TokenExpired: Authentication token has expired and needs refresh /// - NotAuthorized: Not authenticated (401) /// - IOError: Network or connection errors when reaching auth server /// - RestError: HTTP errors from authentication service - virtual Status Authenticate(std::unordered_map& headers, - const HTTPRequestContext& request_context) = 0; - - /// \brief Convenience overload for callers that don't need a request context. - Status Authenticate(std::unordered_map& headers) { - return Authenticate(headers, HTTPRequestContext{}); - } + virtual Result Authenticate(const HTTPRequest& request) = 0; /// \brief Close the session and release any resources. /// diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index 0d52534c0..bca6c8f55 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -82,9 +82,8 @@ std::unordered_map MergeProperties( return merged; } -/// SigV4 signer reproducing Java RESTSigV4AuthSession's -/// SignerChecksumParams(SHA256, X_AMZ_CONTENT_SHA256) output: canonical -/// headers carry Base64(SHA256(body)), canonical request trailer uses hex. +/// Matches Java RESTSigV4AuthSession: canonical headers carry +/// Base64(SHA256(body)), canonical request trailer uses hex. class RestSigV4Signer : public Aws::Client::AWSAuthV4Signer { public: RestSigV4Signer(const std::shared_ptr& creds, @@ -92,11 +91,9 @@ class RestSigV4Signer : public Aws::Client::AWSAuthV4Signer { : Aws::Client::AWSAuthV4Signer(creds, service_name, region, PayloadSigningPolicy::Always, /*urlEscapePath=*/false) { - // AWSAuthV4Signer normally overwrites x-amz-content-sha256 with the hex - // body hash right before canonicalization, which would clobber the Base64 - // value the caller pre-sets. Clearing this flag skips that overwrite so - // canonical headers see the caller's Base64, while ComputePayloadHash - // still feeds hex into the canonical request trailer. + // Skip the signer's hex overwrite of x-amz-content-sha256 so canonical + // headers see the caller's Base64; ComputePayloadHash still feeds hex + // into the canonical request trailer. m_includeSha256HashHeader = false; } }; @@ -118,15 +115,13 @@ SigV4AuthSession::SigV4AuthSession( SigV4AuthSession::~SigV4AuthSession() = default; -Status SigV4AuthSession::Authenticate( - std::unordered_map& headers, - const HTTPRequestContext& request_context) { - ICEBERG_RETURN_UNEXPECTED(delegate_->Authenticate(headers, request_context)); - auto original_headers = headers; +Result SigV4AuthSession::Authenticate(const HTTPRequest& request) { + ICEBERG_ASSIGN_OR_RAISE(auto delegate_request, delegate_->Authenticate(request)); + const auto& original_headers = delegate_request.headers; // Relocate any delegate-set Authorization so SigV4 takes precedence. std::unordered_map signing_headers; - for (const auto& [name, value] : headers) { + for (const auto& [name, value] : original_headers) { if (StringUtils::EqualsIgnoreCase(name, "Authorization")) { signing_headers[std::string(kRelocatedHeaderPrefix) + name] = value; } else { @@ -134,25 +129,23 @@ Status SigV4AuthSession::Authenticate( } } - Aws::Http::URI aws_uri(request_context.url.c_str()); + Aws::Http::URI aws_uri(delegate_request.url.c_str()); auto aws_request = std::make_shared( - aws_uri, ToAwsMethod(request_context.method)); + aws_uri, ToAwsMethod(delegate_request.method)); for (const auto& [name, value] : signing_headers) { aws_request->SetHeaderValue(Aws::String(name.c_str()), Aws::String(value.c_str())); } // Empty body uses hex EMPTY_BODY_SHA256 (Java workaround for the signer - // producing an invalid checksum for empty bodies); non-empty body uses - // Base64(SHA256(body)). See RestSigV4Signer doc for why this value survives - // signing to land in the canonical headers unchanged. - if (request_context.body.empty()) { + // producing an invalid checksum on empty bodies); non-empty uses Base64. + if (delegate_request.body.empty()) { aws_request->SetHeaderValue("x-amz-content-sha256", Aws::String(kEmptyBodySha256)); } else { auto body_stream = - Aws::MakeShared("SigV4Body", request_context.body); + Aws::MakeShared("SigV4Body", delegate_request.body); aws_request->AddContentBody(body_stream); auto sha256 = Aws::Utils::HashingUtils::CalculateSHA256( - Aws::String(request_context.body.data(), request_context.body.size())); + Aws::String(delegate_request.body.data(), delegate_request.body.size())); aws_request->SetHeaderValue("x-amz-content-sha256", Aws::Utils::HashingUtils::Base64Encode(sha256)); } @@ -162,21 +155,25 @@ Status SigV4AuthSession::Authenticate( Error{ErrorKind::kAuthenticationFailed, "SigV4 signing failed"}); } - // Merge signed headers back; relocate any original value that conflicts. - headers.clear(); + // Fill headers with the signed set, relocating any conflicting originals. + HTTPRequest signed_request{.method = delegate_request.method, + .url = std::move(delegate_request.url), + .headers = {}, + .body = std::move(delegate_request.body)}; for (const auto& [aws_name, aws_value] : aws_request->GetHeaders()) { std::string name(aws_name.c_str(), aws_name.size()); std::string value(aws_value.c_str(), aws_value.size()); for (const auto& [orig_name, orig_value] : original_headers) { if (StringUtils::EqualsIgnoreCase(orig_name, name) && orig_value != value) { - headers[std::string(kRelocatedHeaderPrefix) + orig_name] = orig_value; + signed_request.headers[std::string(kRelocatedHeaderPrefix) + orig_name] = + orig_value; break; } } - headers[std::move(name)] = std::move(value); + signed_request.headers[std::move(name)] = std::move(value); } - return {}; + return signed_request; } Status SigV4AuthSession::Close() { return delegate_->Close(); } @@ -243,7 +240,6 @@ SigV4AuthManager::MakeCredentialsProvider( bool has_ak = access_key_it != properties.end() && !access_key_it->second.empty(); bool has_sk = secret_key_it != properties.end() && !secret_key_it->second.empty(); - // Reject partial credentials — providing only one of AK/SK is a misconfiguration. ICEBERG_PRECHECK( has_ak == has_sk, "Both '{}' and '{}' must be set together, or neither", AuthProperties::kSigV4AccessKeyId, AuthProperties::kSigV4SecretAccessKey); @@ -263,14 +259,10 @@ SigV4AuthManager::MakeCredentialsProvider( std::string SigV4AuthManager::ResolveSigningRegion( const std::unordered_map& properties) { - auto it = properties.find(AuthProperties::kSigV4SigningRegion); - if (it != properties.end() && !it->second.empty()) { + if (auto it = properties.find(AuthProperties::kSigV4SigningRegion); + it != properties.end() && !it->second.empty()) { return it->second; } - auto legacy_it = properties.find(AuthProperties::kSigV4Region); - if (legacy_it != properties.end() && !legacy_it->second.empty()) { - return legacy_it->second; - } if (const char* env = std::getenv("AWS_REGION")) { return std::string(env); } @@ -282,14 +274,10 @@ std::string SigV4AuthManager::ResolveSigningRegion( std::string SigV4AuthManager::ResolveSigningName( const std::unordered_map& properties) { - auto it = properties.find(AuthProperties::kSigV4SigningName); - if (it != properties.end() && !it->second.empty()) { + if (auto it = properties.find(AuthProperties::kSigV4SigningName); + it != properties.end() && !it->second.empty()) { return it->second; } - auto legacy_it = properties.find(AuthProperties::kSigV4Service); - if (legacy_it != properties.end() && !legacy_it->second.empty()) { - return legacy_it->second; - } return AuthProperties::kSigV4SigningNameDefault; } diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h index 8b546e4ee..48cc0eb2e 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h @@ -65,8 +65,7 @@ class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { ~SigV4AuthSession() override; - Status Authenticate(std::unordered_map& headers, - const HTTPRequestContext& request_context) override; + Result Authenticate(const HTTPRequest& request) override; Status Close() override; diff --git a/src/iceberg/catalog/rest/http_client.cc b/src/iceberg/catalog/rest/http_client.cc index f7d7c80b0..8dec6f239 100644 --- a/src/iceberg/catalog/rest/http_client.cc +++ b/src/iceberg/catalog/rest/http_client.cc @@ -70,17 +70,22 @@ namespace { /// \brief Default error type for unparseable REST responses. constexpr std::string_view kRestExceptionType = "RESTException"; -/// \brief Prepare headers for an HTTP request. -Result BuildHeaders( - const std::unordered_map& request_headers, +/// \brief Merge default headers with per-request headers (per-request wins). +std::unordered_map MergeHeaders( const std::unordered_map& default_headers, - auth::AuthSession& session, const auth::HTTPRequestContext& request_context) { - std::unordered_map headers(default_headers); + const std::unordered_map& request_headers) { + std::unordered_map merged(default_headers); for (const auto& [key, val] : request_headers) { - headers.insert_or_assign(key, val); + merged.insert_or_assign(key, val); } - ICEBERG_RETURN_UNEXPECTED(session.Authenticate(headers, request_context)); - return cpr::Header(headers.begin(), headers.end()); + return merged; +} + +/// \brief Authenticate the request and return the final cpr::Header. +Result AuthenticateRequest(const auth::HTTPRequest& request, + auth::AuthSession& session) { + ICEBERG_ASSIGN_OR_RAISE(auto authenticated, session.Authenticate(request)); + return cpr::Header(authenticated.headers.begin(), authenticated.headers.end()); } /// \brief Converts a map of string key-value pairs to cpr::Parameters. @@ -171,8 +176,11 @@ Result HttpClient::Get( const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE( auto all_headers, - BuildHeaders(headers, default_headers_, session, - {HttpMethod::kGet, AppendQueryString(path, params), ""})); + AuthenticateRequest({.method = HttpMethod::kGet, + .url = AppendQueryString(path, params), + .headers = MergeHeaders(default_headers_, headers), + .body = ""}, + session)); cpr::Response response = cpr::Get(cpr::Url{path}, GetParameters(params), all_headers, *connection_pool_); @@ -188,7 +196,11 @@ Result HttpClient::Post( const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE( auto all_headers, - BuildHeaders(headers, default_headers_, session, {HttpMethod::kPost, path, body})); + AuthenticateRequest({.method = HttpMethod::kPost, + .url = path, + .headers = MergeHeaders(default_headers_, headers), + .body = body}, + session)); cpr::Response response = cpr::Post(cpr::Url{path}, cpr::Body{body}, all_headers, *connection_pool_); @@ -214,9 +226,13 @@ Result HttpClient::PostForm( // actual payload sent over the wire. cpr::Payload payload(pair_list.begin(), pair_list.end()); std::string encoded_body = payload.GetContent(); - ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(form_headers, default_headers_, session, - {HttpMethod::kPost, path, encoded_body})); + ICEBERG_ASSIGN_OR_RAISE( + auto all_headers, + AuthenticateRequest({.method = HttpMethod::kPost, + .url = path, + .headers = MergeHeaders(default_headers_, form_headers), + .body = encoded_body}, + session)); cpr::Response response = cpr::Post(cpr::Url{path}, std::move(payload), all_headers, *connection_pool_); @@ -231,7 +247,11 @@ Result HttpClient::Head( const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE( auto all_headers, - BuildHeaders(headers, default_headers_, session, {HttpMethod::kHead, path, ""})); + AuthenticateRequest({.method = HttpMethod::kHead, + .url = path, + .headers = MergeHeaders(default_headers_, headers), + .body = ""}, + session)); cpr::Response response = cpr::Head(cpr::Url{path}, all_headers, *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); @@ -246,8 +266,11 @@ Result HttpClient::Delete( const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE( auto all_headers, - BuildHeaders(headers, default_headers_, session, - {HttpMethod::kDelete, AppendQueryString(path, params), ""})); + AuthenticateRequest({.method = HttpMethod::kDelete, + .url = AppendQueryString(path, params), + .headers = MergeHeaders(default_headers_, headers), + .body = ""}, + session)); cpr::Response response = cpr::Delete(cpr::Url{path}, GetParameters(params), all_headers, *connection_pool_); diff --git a/src/iceberg/test/auth_manager_test.cc b/src/iceberg/test/auth_manager_test.cc index 40c0f8607..85d15dcaa 100644 --- a/src/iceberg/test/auth_manager_test.cc +++ b/src/iceberg/test/auth_manager_test.cc @@ -78,9 +78,9 @@ TEST_F(AuthManagerTest, LoadNoopAuthManagerExplicit) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); - EXPECT_TRUE(headers.empty()); + auto auth_result = session_result.value()->Authenticate({}); + ASSERT_THAT(auth_result, IsOk()); + EXPECT_TRUE(auth_result.value().headers.empty()); } // Verifies that NoopAuthManager is inferred when no auth properties are set @@ -122,10 +122,10 @@ TEST_F(AuthManagerTest, LoadBasicAuthManager) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); + auto auth_result = session_result.value()->Authenticate({}); + ASSERT_THAT(auth_result, IsOk()); // base64("admin:secret") == "YWRtaW46c2VjcmV0" - EXPECT_EQ(headers["Authorization"], "Basic YWRtaW46c2VjcmV0"); + EXPECT_EQ(auth_result.value().headers["Authorization"], "Basic YWRtaW46c2VjcmV0"); } // Verifies BasicAuthManager is case-insensitive for auth type @@ -141,10 +141,10 @@ TEST_F(AuthManagerTest, BasicAuthTypeCaseInsensitive) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()) << "Failed for auth type: " << auth_type; - std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); + auto auth_result = session_result.value()->Authenticate({}); + ASSERT_THAT(auth_result, IsOk()) << "Failed for auth type: " << auth_type; // base64("user:pass") == "dXNlcjpwYXNz" - EXPECT_EQ(headers["Authorization"], "Basic dXNlcjpwYXNz"); + EXPECT_EQ(auth_result.value().headers["Authorization"], "Basic dXNlcjpwYXNz"); } } @@ -187,10 +187,11 @@ TEST_F(AuthManagerTest, BasicAuthSpecialCharacters) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); + auto auth_result = session_result.value()->Authenticate({}); + ASSERT_THAT(auth_result, IsOk()); // base64("user@domain.com:p@ss:w0rd!") == "dXNlckBkb21haW4uY29tOnBAc3M6dzByZCE=" - EXPECT_EQ(headers["Authorization"], "Basic dXNlckBkb21haW4uY29tOnBAc3M6dzByZCE="); + EXPECT_EQ(auth_result.value().headers["Authorization"], + "Basic dXNlckBkb21haW4uY29tOnBAc3M6dzByZCE="); } // Verifies custom auth manager registration @@ -219,9 +220,9 @@ TEST_F(AuthManagerTest, RegisterCustomAuthManager) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); - EXPECT_EQ(headers["X-Custom-Auth"], "custom-value"); + auto auth_result = session_result.value()->Authenticate({}); + ASSERT_THAT(auth_result, IsOk()); + EXPECT_EQ(auth_result.value().headers["X-Custom-Auth"], "custom-value"); } // Verifies OAuth2 with static token @@ -237,9 +238,9 @@ TEST_F(AuthManagerTest, OAuth2StaticToken) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); - EXPECT_EQ(headers["Authorization"], "Bearer my-static-token"); + auto auth_result = session_result.value()->Authenticate({}); + ASSERT_THAT(auth_result, IsOk()); + EXPECT_EQ(auth_result.value().headers["Authorization"], "Bearer my-static-token"); } // Verifies OAuth2 type is inferred from token property @@ -254,9 +255,9 @@ TEST_F(AuthManagerTest, OAuth2InferredFromToken) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - EXPECT_THAT(session_result.value()->Authenticate(headers), IsOk()); - EXPECT_EQ(headers["Authorization"], "Bearer inferred-token"); + auto auth_result = session_result.value()->Authenticate({}); + ASSERT_THAT(auth_result, IsOk()); + EXPECT_EQ(auth_result.value().headers["Authorization"], "Bearer inferred-token"); } // Verifies OAuth2 returns unauthenticated session when neither token nor credential is @@ -273,9 +274,10 @@ TEST_F(AuthManagerTest, OAuth2MissingCredentials) { ASSERT_THAT(session_result, IsOk()); // Session should have no auth headers - std::unordered_map headers; - ASSERT_TRUE(session_result.value()->Authenticate(headers).has_value()); - EXPECT_EQ(headers.find("Authorization"), headers.end()); + auto auth_result = session_result.value()->Authenticate({}); + ASSERT_TRUE(auth_result.has_value()); + EXPECT_EQ(auth_result.value().headers.find("Authorization"), + auth_result.value().headers.end()); } // Verifies that when both token and credential are provided, token takes priority @@ -294,9 +296,9 @@ TEST_F(AuthManagerTest, OAuth2TokenTakesPriorityOverCredential) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - ASSERT_THAT(session_result.value()->Authenticate(headers), IsOk()); - EXPECT_EQ(headers["Authorization"], "Bearer my-static-token"); + auto auth_result = session_result.value()->Authenticate({}); + ASSERT_THAT(auth_result, IsOk()); + EXPECT_EQ(auth_result.value().headers["Authorization"], "Bearer my-static-token"); } // Verifies OAuthTokenResponse JSON parsing diff --git a/src/iceberg/test/sigv4_auth_test.cc b/src/iceberg/test/sigv4_auth_test.cc index 159339d03..caa9b5603 100644 --- a/src/iceberg/test/sigv4_auth_test.cc +++ b/src/iceberg/test/sigv4_auth_test.cc @@ -82,12 +82,13 @@ TEST_F(SigV4AuthTest, AuthenticateAddsAuthorizationHeader) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; - ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + auto auth_result = session_result.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; EXPECT_NE(headers.find("authorization"), headers.end()); - EXPECT_TRUE(headers["authorization"].starts_with("AWS4-HMAC-SHA256")); + EXPECT_TRUE(headers.at("authorization").starts_with("AWS4-HMAC-SHA256")); EXPECT_NE(headers.find("x-amz-date"), headers.end()); } @@ -99,14 +100,16 @@ TEST_F(SigV4AuthTest, AuthenticateWithPostBody) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - headers["Content-Type"] = "application/json"; - HTTPRequestContext ctx{HttpMethod::kPost, "https://example.com/v1/namespaces", - R"({"namespace":["ns1"]})"}; - ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kPost, + .url = "https://example.com/v1/namespaces", + .headers = {{"Content-Type", "application/json"}}, + .body = R"({"namespace":["ns1"]})"}; + auto auth_result = session_result.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; EXPECT_NE(headers.find("authorization"), headers.end()); - EXPECT_TRUE(headers["authorization"].starts_with("AWS4-HMAC-SHA256")); + EXPECT_TRUE(headers.at("authorization").starts_with("AWS4-HMAC-SHA256")); } TEST_F(SigV4AuthTest, DelegateAuthorizationHeaderRelocated) { @@ -120,14 +123,15 @@ TEST_F(SigV4AuthTest, DelegateAuthorizationHeaderRelocated) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; - ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + auto auth_result = session_result.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; EXPECT_NE(headers.find("authorization"), headers.end()); - EXPECT_TRUE(headers["authorization"].starts_with("AWS4-HMAC-SHA256")); + EXPECT_TRUE(headers.at("authorization").starts_with("AWS4-HMAC-SHA256")); EXPECT_NE(headers.find("original-authorization"), headers.end()); - EXPECT_EQ(headers["original-authorization"], "Bearer my-oauth-token"); + EXPECT_EQ(headers.at("original-authorization"), "Bearer my-oauth-token"); } TEST_F(SigV4AuthTest, AuthenticateWithSessionToken) { @@ -140,13 +144,14 @@ TEST_F(SigV4AuthTest, AuthenticateWithSessionToken) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; - ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + auto auth_result = session_result.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; EXPECT_NE(headers.find("authorization"), headers.end()); EXPECT_NE(headers.find("x-amz-security-token"), headers.end()); - EXPECT_EQ(headers["x-amz-security-token"], "FwoGZXIvYXdzEBYaDHqa0"); + EXPECT_EQ(headers.at("x-amz-security-token"), "FwoGZXIvYXdzEBYaDHqa0"); } TEST_F(SigV4AuthTest, CustomSigningNameAndRegion) { @@ -160,9 +165,10 @@ TEST_F(SigV4AuthTest, CustomSigningNameAndRegion) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; - ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + auto auth_result = session_result.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; auto auth_it = headers.find("authorization"); ASSERT_NE(auth_it, headers.end()); @@ -187,9 +193,10 @@ TEST_F(SigV4AuthTest, DelegateDefaultsToOAuth2NoAuth) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; - ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + auto auth_result = session_result.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; EXPECT_EQ(headers.find("original-authorization"), headers.end()); } @@ -208,11 +215,12 @@ TEST_F(SigV4AuthTest, TableSessionInheritsProperties) { catalog_session.value()); ASSERT_THAT(table_session, IsOk()); - std::unordered_map headers; - HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/ns1/tables/table1", - ""}; - ASSERT_THAT(table_session.value()->Authenticate(headers, ctx), IsOk()); - EXPECT_NE(headers.find("authorization"), headers.end()); + HTTPRequest request{.method = HttpMethod::kGet, + .url = "https://example.com/v1/ns1/tables/table1"}; + auto auth_result = table_session.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + EXPECT_NE(auth_result.value().headers.find("authorization"), + auth_result.value().headers.end()); } // ---------- Tests ported from Java TestRESTSigV4AuthSession ---------- @@ -226,13 +234,15 @@ TEST_F(SigV4AuthTest, AuthenticateWithoutBodyDetailedHeaders) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - headers["Content-Type"] = "application/json"; - HTTPRequestContext ctx{HttpMethod::kGet, "http://localhost:8080/path", ""}; - ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kGet, + .url = "http://localhost:8080/path", + .headers = {{"Content-Type", "application/json"}}}; + auto auth_result = session_result.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; // Original header preserved - EXPECT_EQ(headers["content-type"], "application/json"); + EXPECT_EQ(headers.at("content-type"), "application/json"); // Host header generated by the signer EXPECT_NE(headers.find("host"), headers.end()); @@ -248,7 +258,7 @@ TEST_F(SigV4AuthTest, AuthenticateWithoutBodyDetailedHeaders) { EXPECT_TRUE(auth_it->second.find("x-amz-date") != std::string::npos); // Empty body SHA256 hash - EXPECT_EQ(headers["x-amz-content-sha256"], SigV4AuthSession::kEmptyBodySha256); + EXPECT_EQ(headers.at("x-amz-content-sha256"), SigV4AuthSession::kEmptyBodySha256); // X-Amz-Date present EXPECT_NE(headers.find("x-amz-date"), headers.end()); @@ -263,11 +273,13 @@ TEST_F(SigV4AuthTest, AuthenticateWithBodyDetailedHeaders) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - headers["Content-Type"] = "application/json"; - std::string body = R"({"namespace":["ns1"]})"; - HTTPRequestContext ctx{HttpMethod::kPost, "http://localhost:8080/path", body}; - ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kPost, + .url = "http://localhost:8080/path", + .headers = {{"Content-Type", "application/json"}}, + .body = R"({"namespace":["ns1"]})"}; + auto auth_result = session_result.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; // SigV4 Authorization header auto auth_it = headers.find("authorization"); @@ -295,10 +307,12 @@ TEST_F(SigV4AuthTest, ConflictingAuthorizationHeaderIncludedInSignedHeaders) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - std::unordered_map headers; - headers["Content-Type"] = "application/json"; - HTTPRequestContext ctx{HttpMethod::kGet, "http://localhost:8080/path", ""}; - ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kGet, + .url = "http://localhost:8080/path", + .headers = {{"Content-Type", "application/json"}}}; + auto auth_result = session_result.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; // SigV4 Authorization header auto auth_it = headers.find("authorization"); @@ -329,12 +343,13 @@ TEST_F(SigV4AuthTest, ConflictingSigV4HeadersRelocated) { auto session = std::make_shared(delegate, "us-east-1", "execute-api", credentials); - std::unordered_map headers; - HTTPRequestContext ctx{HttpMethod::kGet, "http://localhost:8080/path", ""}; - ASSERT_THAT(session->Authenticate(headers, ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kGet, .url = "http://localhost:8080/path"}; + auto auth_result = session->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; // The real x-amz-content-sha256 should be the empty body hash (signer overwrites fake) - EXPECT_EQ(headers["x-amz-content-sha256"], SigV4AuthSession::kEmptyBodySha256); + EXPECT_EQ(headers.at("x-amz-content-sha256"), SigV4AuthSession::kEmptyBodySha256); // The fake values should be relocated since the signer produced different values auto orig_sha_it = headers.find("Original-x-amz-content-sha256"); @@ -380,13 +395,12 @@ TEST_F(SigV4AuthTest, CreateCustomDelegateNone) { ASSERT_THAT(session_result, IsOk()); // Authenticate should work with noop delegate - std::unordered_map headers; - HTTPRequestContext ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; - ASSERT_THAT(session_result.value()->Authenticate(headers, ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + auto auth_result = session_result.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; EXPECT_NE(headers.find("authorization"), headers.end()); - - EXPECT_EQ(headers.find("original-authorization"), headers.end()); EXPECT_EQ(headers.find("original-authorization"), headers.end()); } @@ -428,9 +442,10 @@ TEST_F(SigV4AuthTest, ContextualSessionOverridesProperties) { manager_result.value()->ContextualSession(context, catalog_session.value()); ASSERT_THAT(ctx_session, IsOk()); - std::unordered_map headers; - HTTPRequestContext req_ctx{HttpMethod::kGet, "https://example.com/v1/config", ""}; - ASSERT_THAT(ctx_session.value()->Authenticate(headers, req_ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + auto auth_result = ctx_session.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; auto auth_it = headers.find("authorization"); ASSERT_NE(auth_it, headers.end()); @@ -462,10 +477,11 @@ TEST_F(SigV4AuthTest, TableSessionOverridesProperties) { catalog_session.value()); ASSERT_THAT(table_session, IsOk()); - std::unordered_map headers; - HTTPRequestContext req_ctx{HttpMethod::kGet, "https://example.com/v1/db1/tables/table1", - ""}; - ASSERT_THAT(table_session.value()->Authenticate(headers, req_ctx), IsOk()); + HTTPRequest request{.method = HttpMethod::kGet, + .url = "https://example.com/v1/db1/tables/table1"}; + auto auth_result = table_session.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + const auto& headers = auth_result.value().headers; auto auth_it = headers.find("authorization"); ASSERT_NE(auth_it, headers.end()); From afc117eee73f7f0ecc1a414a00fdbcded8e05d1e Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Tue, 14 Apr 2026 23:02:30 +0800 Subject: [PATCH 07/23] move MakeSigV4AuthManager to sigv4_auth_manager.cc --- .../catalog/rest/auth/auth_managers.cc | 29 ------------------- .../catalog/rest/auth/sigv4_auth_manager.cc | 24 +++++++++++++++ 2 files changed, 24 insertions(+), 29 deletions(-) diff --git a/src/iceberg/catalog/rest/auth/auth_managers.cc b/src/iceberg/catalog/rest/auth/auth_managers.cc index 67d6f9634..0a1d12788 100644 --- a/src/iceberg/catalog/rest/auth/auth_managers.cc +++ b/src/iceberg/catalog/rest/auth/auth_managers.cc @@ -22,9 +22,6 @@ #include #include "iceberg/catalog/rest/auth/auth_manager_internal.h" -#ifdef ICEBERG_BUILD_SIGV4 -# include "iceberg/catalog/rest/auth/sigv4_auth_manager.h" -#endif #include "iceberg/catalog/rest/auth/auth_properties.h" #include "iceberg/util/string_util.h" @@ -105,30 +102,4 @@ Result> AuthManagers::Load( return it->second(name, properties); } -#ifdef ICEBERG_BUILD_SIGV4 -Result> MakeSigV4AuthManager( - std::string_view name, - const std::unordered_map& properties) { - // Determine the delegate auth type. Default to OAuth2 if not specified. - std::string delegate_type = AuthProperties::kAuthTypeOAuth2; - auto it = properties.find(AuthProperties::kSigV4DelegateAuthType); - if (it != properties.end() && !it->second.empty()) { - delegate_type = StringUtils::ToLower(it->second); - } - - // Prevent circular delegation (sigv4 -> sigv4 -> ...). - ICEBERG_PRECHECK(delegate_type != AuthProperties::kAuthTypeSigV4, - "Cannot delegate a SigV4 auth manager to another SigV4 auth " - "manager (delegate_type='{}')", - delegate_type); - - // Load the delegate auth manager. - auto delegate_props = properties; - delegate_props[AuthProperties::kAuthType] = delegate_type; - - ICEBERG_ASSIGN_OR_RAISE(auto delegate, AuthManagers::Load(name, delegate_props)); - return std::make_unique(std::move(delegate)); -} -#endif - } // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index bca6c8f55..3fd918ceb 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -29,6 +29,8 @@ #include #include +#include "iceberg/catalog/rest/auth/auth_manager_internal.h" +#include "iceberg/catalog/rest/auth/auth_managers.h" #include "iceberg/catalog/rest/auth/auth_properties.h" #include "iceberg/catalog/rest/endpoint.h" #include "iceberg/util/checked_cast.h" @@ -292,4 +294,26 @@ Result> SigV4AuthManager::WrapSession( std::move(credentials)); } +Result> MakeSigV4AuthManager( + std::string_view name, + const std::unordered_map& properties) { + // Default to OAuth2 when delegate type is not specified. + std::string delegate_type = AuthProperties::kAuthTypeOAuth2; + if (auto it = properties.find(AuthProperties::kSigV4DelegateAuthType); + it != properties.end() && !it->second.empty()) { + delegate_type = StringUtils::ToLower(it->second); + } + + // Prevent circular delegation (sigv4 -> sigv4 -> ...). + ICEBERG_PRECHECK(delegate_type != AuthProperties::kAuthTypeSigV4, + "Cannot delegate a SigV4 auth manager to another SigV4 auth " + "manager (delegate_type='{}')", + delegate_type); + + auto delegate_props = properties; + delegate_props[AuthProperties::kAuthType] = delegate_type; + ICEBERG_ASSIGN_OR_RAISE(auto delegate, AuthManagers::Load(name, delegate_props)); + return std::make_unique(std::move(delegate)); +} + } // namespace iceberg::rest::auth From bb1ace70ed7ce31e1b3c61a8b951235e6f160f2f Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Tue, 14 Apr 2026 23:42:50 +0800 Subject: [PATCH 08/23] Meson: wire SigV4 behind a feature option --- meson.options | 7 ++ src/iceberg/catalog/rest/auth/meson.build | 18 ++-- .../catalog/rest/auth/sigv4_auth_manager.cc | 39 ++++---- .../catalog/rest/auth/sigv4_auth_manager.h | 24 +++-- src/iceberg/catalog/rest/http_client.cc | 96 ++++++++----------- src/iceberg/catalog/rest/meson.build | 16 +++- src/iceberg/test/meson.build | 8 ++ src/iceberg/test/sigv4_auth_test.cc | 41 +++++++- 8 files changed, 153 insertions(+), 96 deletions(-) diff --git a/meson.options b/meson.options index 9152af34d..c53574889 100644 --- a/meson.options +++ b/meson.options @@ -44,4 +44,11 @@ option( value: 'disabled', ) +option( + 'sigv4', + type: 'feature', + description: 'Build AWS SigV4 authentication support for rest catalog', + value: 'disabled', +) + option('tests', type: 'feature', description: 'Build tests', value: 'enabled') diff --git a/src/iceberg/catalog/rest/auth/meson.build b/src/iceberg/catalog/rest/auth/meson.build index 4de6e821e..352ddfaec 100644 --- a/src/iceberg/catalog/rest/auth/meson.build +++ b/src/iceberg/catalog/rest/auth/meson.build @@ -15,13 +15,17 @@ # specific language governing permissions and limitations # under the License. +iceberg_rest_auth_headers = [ + 'auth_manager.h', + 'auth_managers.h', + 'auth_properties.h', + 'auth_session.h', + 'oauth2_util.h', +] +if aws_sdk_core_dep.found() + iceberg_rest_auth_headers += ['sigv4_auth_manager.h'] +endif install_headers( - [ - 'auth_manager.h', - 'auth_managers.h', - 'auth_properties.h', - 'auth_session.h', - 'oauth2_util.h', - ], + iceberg_rest_auth_headers, subdir: 'iceberg/catalog/rest/auth', ) diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index 3fd918ceb..bf9fc5da4 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -20,6 +20,7 @@ #include "iceberg/catalog/rest/auth/sigv4_auth_manager.h" #include +#include #include #include @@ -107,13 +108,15 @@ class RestSigV4Signer : public Aws::Client::AWSAuthV4Signer { SigV4AuthSession::SigV4AuthSession( std::shared_ptr delegate, std::string signing_region, std::string signing_name, - std::shared_ptr credentials_provider) + std::shared_ptr credentials_provider, + std::unordered_map effective_properties) : delegate_(std::move(delegate)), signing_region_(std::move(signing_region)), signing_name_(std::move(signing_name)), credentials_provider_(std::move(credentials_provider)), signer_(std::make_unique( - credentials_provider_, signing_name_.c_str(), signing_region_.c_str())) {} + credentials_provider_, signing_name_.c_str(), signing_region_.c_str())), + effective_properties_(std::move(effective_properties)) {} SigV4AuthSession::~SigV4AuthSession() = default; @@ -121,7 +124,6 @@ Result SigV4AuthSession::Authenticate(const HTTPRequest& request) { ICEBERG_ASSIGN_OR_RAISE(auto delegate_request, delegate_->Authenticate(request)); const auto& original_headers = delegate_request.headers; - // Relocate any delegate-set Authorization so SigV4 takes precedence. std::unordered_map signing_headers; for (const auto& [name, value] : original_headers) { if (StringUtils::EqualsIgnoreCase(name, "Authorization")) { @@ -138,8 +140,8 @@ Result SigV4AuthSession::Authenticate(const HTTPRequest& request) { aws_request->SetHeaderValue(Aws::String(name.c_str()), Aws::String(value.c_str())); } - // Empty body uses hex EMPTY_BODY_SHA256 (Java workaround for the signer - // producing an invalid checksum on empty bodies); non-empty uses Base64. + // Empty body: hex EMPTY_BODY_SHA256 (Java parity workaround for the signer + // computing an invalid checksum on empty bodies). Non-empty: Base64. if (delegate_request.body.empty()) { aws_request->SetHeaderValue("x-amz-content-sha256", Aws::String(kEmptyBodySha256)); } else { @@ -152,12 +154,14 @@ Result SigV4AuthSession::Authenticate(const HTTPRequest& request) { Aws::Utils::HashingUtils::Base64Encode(sha256)); } - if (!signer_->SignRequest(*aws_request)) { - return std::unexpected( - Error{ErrorKind::kAuthenticationFailed, "SigV4 signing failed"}); + { + std::lock_guard lock(signing_mutex_); + if (!signer_->SignRequest(*aws_request)) { + return std::unexpected( + Error{ErrorKind::kAuthenticationFailed, "SigV4 signing failed"}); + } } - // Fill headers with the signed set, relocating any conflicting originals. HTTPRequest signed_request{.method = delegate_request.method, .url = std::move(delegate_request.url), .headers = {}, @@ -200,7 +204,6 @@ Result> SigV4AuthManager::CatalogSession( HttpClient& shared_client, const std::unordered_map& properties) { AwsSdkGuard::EnsureInitialized(); - catalog_properties_ = properties; ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->CatalogSession(shared_client, properties)); return WrapSession(std::move(delegate_session), properties); @@ -214,8 +217,8 @@ Result> SigV4AuthManager::ContextualSession( ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->ContextualSession( context, sigv4_parent->delegate())); - auto merged = MergeProperties(catalog_properties_, context); - return WrapSession(std::move(delegate_session), merged); + auto merged = MergeProperties(sigv4_parent->effective_properties(), context); + return WrapSession(std::move(delegate_session), std::move(merged)); } Result> SigV4AuthManager::TableSession( @@ -228,8 +231,8 @@ Result> SigV4AuthManager::TableSession( auto delegate_session, delegate_->TableSession(table, properties, sigv4_parent->delegate())); - auto merged = MergeProperties(catalog_properties_, properties); - return WrapSession(std::move(delegate_session), merged); + auto merged = MergeProperties(sigv4_parent->effective_properties(), properties); + return WrapSession(std::move(delegate_session), std::move(merged)); } Status SigV4AuthManager::Close() { return delegate_->Close(); } @@ -285,13 +288,13 @@ std::string SigV4AuthManager::ResolveSigningName( Result> SigV4AuthManager::WrapSession( std::shared_ptr delegate_session, - const std::unordered_map& properties) { + std::unordered_map properties) { auto region = ResolveSigningRegion(properties); auto service = ResolveSigningName(properties); ICEBERG_ASSIGN_OR_RAISE(auto credentials, MakeCredentialsProvider(properties)); - return std::make_shared(std::move(delegate_session), - std::move(region), std::move(service), - std::move(credentials)); + return std::make_shared( + std::move(delegate_session), std::move(region), std::move(service), + std::move(credentials), std::move(properties)); } Result> MakeSigV4AuthManager( diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h index 48cc0eb2e..88dcc789d 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h @@ -20,6 +20,7 @@ #pragma once #include +#include #include #include @@ -47,8 +48,8 @@ namespace iceberg::rest::auth { /// /// See https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv.html /// -/// Thread safety: Authenticate() is NOT thread-safe. Each session should be used -/// from a single thread, or callers must synchronize externally. +/// Thread safety: Authenticate() is thread-safe; concurrent signing calls are +/// serialized by an internal mutex. class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { public: /// SHA-256 hash of empty string, used for requests with no body. @@ -61,7 +62,8 @@ class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { SigV4AuthSession( std::shared_ptr delegate, std::string signing_region, std::string signing_name, - std::shared_ptr credentials_provider); + std::shared_ptr credentials_provider, + std::unordered_map effective_properties); ~SigV4AuthSession() override; @@ -71,20 +73,27 @@ class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { const std::shared_ptr& delegate() const { return delegate_; } + /// Merged properties this session was built from. Child sessions inherit + /// from this (not the catalog's) so contextual overrides propagate into + /// table sessions. + const std::unordered_map& effective_properties() const { + return effective_properties_; + } + private: std::shared_ptr delegate_; std::string signing_region_; std::string signing_name_; std::shared_ptr credentials_provider_; std::unique_ptr signer_; + std::unordered_map effective_properties_; + // AWSAuthV4Signer::SignRequest mutates shared signer state. + mutable std::mutex signing_mutex_; }; /// \brief An AuthManager that produces SigV4AuthSession instances. /// /// Wraps a delegate AuthManager to handle double authentication (e.g., OAuth2 + SigV4). -/// -/// Thread safety: CatalogSession() must be called before ContextualSession() or -/// TableSession(). Concurrent calls are NOT safe — callers must synchronize externally. class ICEBERG_REST_EXPORT SigV4AuthManager : public AuthManager { public: explicit SigV4AuthManager(std::unique_ptr delegate); @@ -118,10 +127,9 @@ class ICEBERG_REST_EXPORT SigV4AuthManager : public AuthManager { const std::unordered_map& properties); Result> WrapSession( std::shared_ptr delegate_session, - const std::unordered_map& properties); + std::unordered_map properties); std::unique_ptr delegate_; - std::unordered_map catalog_properties_; }; } // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/http_client.cc b/src/iceberg/catalog/rest/http_client.cc index 8dec6f239..b37995833 100644 --- a/src/iceberg/catalog/rest/http_client.cc +++ b/src/iceberg/catalog/rest/http_client.cc @@ -81,21 +81,8 @@ std::unordered_map MergeHeaders( return merged; } -/// \brief Authenticate the request and return the final cpr::Header. -Result AuthenticateRequest(const auth::HTTPRequest& request, - auth::AuthSession& session) { - ICEBERG_ASSIGN_OR_RAISE(auto authenticated, session.Authenticate(request)); - return cpr::Header(authenticated.headers.begin(), authenticated.headers.end()); -} - -/// \brief Converts a map of string key-value pairs to cpr::Parameters. -cpr::Parameters GetParameters( - const std::unordered_map& params) { - cpr::Parameters cpr_params; - for (const auto& [key, val] : params) { - cpr_params.Add({key, val}); - } - return cpr_params; +cpr::Header ToCprHeader(const auth::HTTPRequest& request) { + return cpr::Header(request.headers.begin(), request.headers.end()); } /// \brief Append URL-encoded query parameters to a URL, sorted by key. @@ -175,14 +162,13 @@ Result HttpClient::Get( const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE( - auto all_headers, - AuthenticateRequest({.method = HttpMethod::kGet, - .url = AppendQueryString(path, params), - .headers = MergeHeaders(default_headers_, headers), - .body = ""}, - session)); - cpr::Response response = - cpr::Get(cpr::Url{path}, GetParameters(params), all_headers, *connection_pool_); + auto authenticated, + session.Authenticate({.method = HttpMethod::kGet, + .url = AppendQueryString(path, params), + .headers = MergeHeaders(default_headers_, headers), + .body = ""})); + cpr::Response response = cpr::Get(cpr::Url{authenticated.url}, + ToCprHeader(authenticated), *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); HttpResponse http_response; @@ -195,14 +181,14 @@ Result HttpClient::Post( const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE( - auto all_headers, - AuthenticateRequest({.method = HttpMethod::kPost, - .url = path, - .headers = MergeHeaders(default_headers_, headers), - .body = body}, - session)); + auto authenticated, + session.Authenticate({.method = HttpMethod::kPost, + .url = path, + .headers = MergeHeaders(default_headers_, headers), + .body = body})); cpr::Response response = - cpr::Post(cpr::Url{path}, cpr::Body{body}, all_headers, *connection_pool_); + cpr::Post(cpr::Url{authenticated.url}, cpr::Body{authenticated.body}, + ToCprHeader(authenticated), *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); HttpResponse http_response; @@ -222,19 +208,18 @@ Result HttpClient::PostForm( for (const auto& [key, val] : form_data) { pair_list.emplace_back(key, val); } - // Use cpr's own encoding as the signing body to ensure consistency with the - // actual payload sent over the wire. - cpr::Payload payload(pair_list.begin(), pair_list.end()); - std::string encoded_body = payload.GetContent(); + // Sign the exact bytes cpr will put on the wire. + std::string encoded_body = + cpr::Payload(pair_list.begin(), pair_list.end()).GetContent(); ICEBERG_ASSIGN_OR_RAISE( - auto all_headers, - AuthenticateRequest({.method = HttpMethod::kPost, - .url = path, - .headers = MergeHeaders(default_headers_, form_headers), - .body = encoded_body}, - session)); + auto authenticated, + session.Authenticate({.method = HttpMethod::kPost, + .url = path, + .headers = MergeHeaders(default_headers_, form_headers), + .body = std::move(encoded_body)})); cpr::Response response = - cpr::Post(cpr::Url{path}, std::move(payload), all_headers, *connection_pool_); + cpr::Post(cpr::Url{authenticated.url}, cpr::Body{authenticated.body}, + ToCprHeader(authenticated), *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); HttpResponse http_response; @@ -246,13 +231,13 @@ Result HttpClient::Head( const std::string& path, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE( - auto all_headers, - AuthenticateRequest({.method = HttpMethod::kHead, - .url = path, - .headers = MergeHeaders(default_headers_, headers), - .body = ""}, - session)); - cpr::Response response = cpr::Head(cpr::Url{path}, all_headers, *connection_pool_); + auto authenticated, + session.Authenticate({.method = HttpMethod::kHead, + .url = path, + .headers = MergeHeaders(default_headers_, headers), + .body = ""})); + cpr::Response response = cpr::Head(cpr::Url{authenticated.url}, + ToCprHeader(authenticated), *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); HttpResponse http_response; @@ -265,14 +250,13 @@ Result HttpClient::Delete( const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE( - auto all_headers, - AuthenticateRequest({.method = HttpMethod::kDelete, - .url = AppendQueryString(path, params), - .headers = MergeHeaders(default_headers_, headers), - .body = ""}, - session)); - cpr::Response response = - cpr::Delete(cpr::Url{path}, GetParameters(params), all_headers, *connection_pool_); + auto authenticated, + session.Authenticate({.method = HttpMethod::kDelete, + .url = AppendQueryString(path, params), + .headers = MergeHeaders(default_headers_, headers), + .body = ""})); + cpr::Response response = cpr::Delete(cpr::Url{authenticated.url}, + ToCprHeader(authenticated), *connection_pool_); ICEBERG_RETURN_UNEXPECTED(HandleFailureResponse(response, error_handler)); HttpResponse http_response; diff --git a/src/iceberg/catalog/rest/meson.build b/src/iceberg/catalog/rest/meson.build index f3eae6d45..bd0109927 100644 --- a/src/iceberg/catalog/rest/meson.build +++ b/src/iceberg/catalog/rest/meson.build @@ -42,16 +42,26 @@ cpr_needs_static = ( cpr_dep = dependency('cpr', static: cpr_needs_static) iceberg_rest_build_deps = [iceberg_dep, cpr_dep] +iceberg_rest_compile_defs = [] + +sigv4_opt = get_option('sigv4') +aws_sdk_core_dep = dependency('aws-cpp-sdk-core', required: sigv4_opt) +if aws_sdk_core_dep.found() + iceberg_rest_sources += files('auth/sigv4_auth_manager.cc') + iceberg_rest_build_deps += aws_sdk_core_dep + iceberg_rest_compile_defs += '-DICEBERG_BUILD_SIGV4' +endif + iceberg_rest_lib = library( 'iceberg_rest', sources: iceberg_rest_sources, dependencies: iceberg_rest_build_deps, gnu_symbol_visibility: 'hidden', - cpp_shared_args: ['-DICEBERG_REST_EXPORTING'], - cpp_static_args: ['-DICEBERG_REST_STATIC'], + cpp_shared_args: ['-DICEBERG_REST_EXPORTING'] + iceberg_rest_compile_defs, + cpp_static_args: ['-DICEBERG_REST_STATIC'] + iceberg_rest_compile_defs, ) -iceberg_rest_compile_args = [] +iceberg_rest_compile_args = iceberg_rest_compile_defs if get_option('default_library') == 'static' iceberg_rest_compile_args += ['-DICEBERG_REST_STATIC'] endif diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index 8d2805900..03d9e1f6c 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -135,6 +135,14 @@ if get_option('rest').enabled() 'dependencies': [iceberg_rest_dep], }, } + if aws_sdk_core_dep.found() + iceberg_tests += { + 'sigv4_auth_test': { + 'sources': files('sigv4_auth_test.cc'), + 'dependencies': [iceberg_rest_dep, aws_sdk_core_dep], + }, + } + endif if get_option('rest_integration_test').enabled() if host_machine.system() == 'windows' warning('Cannot build rest integration test on Windows, skipping.') diff --git a/src/iceberg/test/sigv4_auth_test.cc b/src/iceberg/test/sigv4_auth_test.cc index caa9b5603..fafadb7a4 100644 --- a/src/iceberg/test/sigv4_auth_test.cc +++ b/src/iceberg/test/sigv4_auth_test.cc @@ -340,8 +340,9 @@ TEST_F(SigV4AuthTest, ConflictingSigV4HeadersRelocated) { auto credentials = std::make_shared(Aws::Auth::AWSCredentials( "AKIAIOSFODNN7EXAMPLE", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")); - auto session = std::make_shared(delegate, "us-east-1", "execute-api", - credentials); + auto session = std::make_shared( + delegate, "us-east-1", "execute-api", credentials, + std::unordered_map{}); HTTPRequest request{.method = HttpMethod::kGet, .url = "http://localhost:8080/path"}; auto auth_result = session->Authenticate(request); @@ -369,8 +370,9 @@ TEST_F(SigV4AuthTest, SessionCloseDelegatesToInner) { auto delegate = AuthSession::MakeDefault({}); auto credentials = std::make_shared( Aws::Auth::AWSCredentials("id", "secret")); - auto session = std::make_shared(delegate, "us-east-1", "execute-api", - credentials); + auto session = std::make_shared( + delegate, "us-east-1", "execute-api", credentials, + std::unordered_map{}); // Close should succeed without error EXPECT_THAT(session->Close(), IsOk()); @@ -490,6 +492,37 @@ TEST_F(SigV4AuthTest, TableSessionOverridesProperties) { << "Expected ap-southeast-1 in Authorization, got: " << auth_it->second; } +TEST_F(SigV4AuthTest, TableSessionInheritsContextualOverrides) { + auto properties = MakeSigV4Properties(); + properties[AuthProperties::kSigV4SigningRegion] = "us-west-2"; + + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto catalog_session = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(catalog_session, IsOk()); + + auto ctx_session = manager_result.value()->ContextualSession( + {{AuthProperties::kSigV4SigningRegion, "eu-west-1"}}, catalog_session.value()); + ASSERT_THAT(ctx_session, IsOk()); + + iceberg::TableIdentifier table_id{iceberg::Namespace{{"db1"}}, "table1"}; + auto table_session = manager_result.value()->TableSession(table_id, /*properties=*/{}, + ctx_session.value()); + ASSERT_THAT(table_session, IsOk()); + + HTTPRequest request{.method = HttpMethod::kGet, + .url = "https://example.com/v1/db1/tables/table1"}; + auto auth_result = table_session.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + + auto auth_it = auth_result.value().headers.find("authorization"); + ASSERT_NE(auth_it, auth_result.value().headers.end()); + EXPECT_TRUE(auth_it->second.find("eu-west-1") != std::string::npos) + << "Table session should inherit eu-west-1 from contextual parent, got: " + << auth_it->second; +} + // Java: close (TestRESTSigV4AuthManager) TEST_F(SigV4AuthTest, ManagerCloseDelegatesToInner) { auto properties = MakeSigV4Properties(); From dcc704812c6df462cace22b09aec2281a94e1a4b Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Wed, 15 Apr 2026 12:23:09 +0800 Subject: [PATCH 09/23] drop unnecessary signing mutex --- .../catalog/rest/auth/sigv4_auth_manager.cc | 14 +++++--------- src/iceberg/catalog/rest/auth/sigv4_auth_manager.h | 7 ++----- src/iceberg/catalog/rest/http_client.cc | 2 +- src/iceberg/test/sigv4_auth_test.cc | 6 +++--- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index bf9fc5da4..e334f08f2 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -20,7 +20,6 @@ #include "iceberg/catalog/rest/auth/sigv4_auth_manager.h" #include -#include #include #include @@ -154,12 +153,9 @@ Result SigV4AuthSession::Authenticate(const HTTPRequest& request) { Aws::Utils::HashingUtils::Base64Encode(sha256)); } - { - std::lock_guard lock(signing_mutex_); - if (!signer_->SignRequest(*aws_request)) { - return std::unexpected( - Error{ErrorKind::kAuthenticationFailed, "SigV4 signing failed"}); - } + if (!signer_->SignRequest(*aws_request)) { + return std::unexpected(Error{.kind = ErrorKind::kAuthenticationFailed, + .message = "SigV4 signing failed"}); } HTTPRequest signed_request{.method = delegate_request.method, @@ -269,10 +265,10 @@ std::string SigV4AuthManager::ResolveSigningRegion( return it->second; } if (const char* env = std::getenv("AWS_REGION")) { - return std::string(env); + return {env}; } if (const char* env = std::getenv("AWS_DEFAULT_REGION")) { - return std::string(env); + return {env}; } return "us-east-1"; } diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h index 88dcc789d..4e173a284 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h @@ -20,7 +20,6 @@ #pragma once #include -#include #include #include @@ -48,8 +47,8 @@ namespace iceberg::rest::auth { /// /// See https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv.html /// -/// Thread safety: Authenticate() is thread-safe; concurrent signing calls are -/// serialized by an internal mutex. +/// Thread safety: Authenticate() is thread-safe as long as the delegate +/// session is. class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { public: /// SHA-256 hash of empty string, used for requests with no body. @@ -87,8 +86,6 @@ class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { std::shared_ptr credentials_provider_; std::unique_ptr signer_; std::unordered_map effective_properties_; - // AWSAuthV4Signer::SignRequest mutates shared signer state. - mutable std::mutex signing_mutex_; }; /// \brief An AuthManager that produces SigV4AuthSession instances. diff --git a/src/iceberg/catalog/rest/http_client.cc b/src/iceberg/catalog/rest/http_client.cc index b37995833..cc5e132eb 100644 --- a/src/iceberg/catalog/rest/http_client.cc +++ b/src/iceberg/catalog/rest/http_client.cc @@ -82,7 +82,7 @@ std::unordered_map MergeHeaders( } cpr::Header ToCprHeader(const auth::HTTPRequest& request) { - return cpr::Header(request.headers.begin(), request.headers.end()); + return {request.headers.begin(), request.headers.end()}; } /// \brief Append URL-encoded query parameters to a URL, sorted by key. diff --git a/src/iceberg/test/sigv4_auth_test.cc b/src/iceberg/test/sigv4_auth_test.cc index fafadb7a4..fd52e8dad 100644 --- a/src/iceberg/test/sigv4_auth_test.cc +++ b/src/iceberg/test/sigv4_auth_test.cc @@ -209,7 +209,7 @@ TEST_F(SigV4AuthTest, TableSessionInheritsProperties) { auto catalog_session = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(catalog_session, IsOk()); - iceberg::TableIdentifier table_id{iceberg::Namespace{{"ns1"}}, "table1"}; + iceberg::TableIdentifier table_id{.ns = iceberg::Namespace{{"ns1"}}, .name = "table1"}; std::unordered_map table_props; auto table_session = manager_result.value()->TableSession(table_id, table_props, catalog_session.value()); @@ -474,7 +474,7 @@ TEST_F(SigV4AuthTest, TableSessionOverridesProperties) { {AuthProperties::kSigV4SigningRegion, "ap-southeast-1"}, }; - iceberg::TableIdentifier table_id{iceberg::Namespace{{"db1"}}, "table1"}; + iceberg::TableIdentifier table_id{.ns = iceberg::Namespace{{"db1"}}, .name = "table1"}; auto table_session = manager_result.value()->TableSession(table_id, table_props, catalog_session.value()); ASSERT_THAT(table_session, IsOk()); @@ -506,7 +506,7 @@ TEST_F(SigV4AuthTest, TableSessionInheritsContextualOverrides) { {{AuthProperties::kSigV4SigningRegion, "eu-west-1"}}, catalog_session.value()); ASSERT_THAT(ctx_session, IsOk()); - iceberg::TableIdentifier table_id{iceberg::Namespace{{"db1"}}, "table1"}; + iceberg::TableIdentifier table_id{.ns = iceberg::Namespace{{"db1"}}, .name = "table1"}; auto table_session = manager_result.value()->TableSession(table_id, /*properties=*/{}, ctx_session.value()); ASSERT_THAT(table_session, IsOk()); From a0a974f3dc975dbaac859417d2b5a7f992c4282b Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Wed, 15 Apr 2026 15:25:40 +0800 Subject: [PATCH 10/23] address review feedback --- .../catalog/rest/auth/sigv4_auth_manager.cc | 24 ++++++++++--------- src/iceberg/catalog/rest/http_client.cc | 14 ++++++----- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index e334f08f2..2e7992772 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -19,13 +19,13 @@ #include "iceberg/catalog/rest/auth/sigv4_auth_manager.h" -#include #include #include #include #include #include +#include #include #include @@ -33,7 +33,6 @@ #include "iceberg/catalog/rest/auth/auth_managers.h" #include "iceberg/catalog/rest/auth/auth_properties.h" #include "iceberg/catalog/rest/endpoint.h" -#include "iceberg/util/checked_cast.h" #include "iceberg/util/macros.h" #include "iceberg/util/string_util.h" @@ -208,7 +207,9 @@ Result> SigV4AuthManager::CatalogSession( Result> SigV4AuthManager::ContextualSession( const std::unordered_map& context, std::shared_ptr parent) { - auto sigv4_parent = internal::checked_pointer_cast(std::move(parent)); + auto sigv4_parent = std::dynamic_pointer_cast(std::move(parent)); + ICEBERG_PRECHECK(sigv4_parent != nullptr, + "SigV4AuthManager parent must be a SigV4AuthSession"); ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->ContextualSession( context, sigv4_parent->delegate())); @@ -221,7 +222,9 @@ Result> SigV4AuthManager::TableSession( const TableIdentifier& table, const std::unordered_map& properties, std::shared_ptr parent) { - auto sigv4_parent = internal::checked_pointer_cast(std::move(parent)); + auto sigv4_parent = std::dynamic_pointer_cast(std::move(parent)); + ICEBERG_PRECHECK(sigv4_parent != nullptr, + "SigV4AuthManager parent must be a SigV4AuthSession"); ICEBERG_ASSIGN_OR_RAISE( auto delegate_session, @@ -233,6 +236,8 @@ Result> SigV4AuthManager::TableSession( Status SigV4AuthManager::Close() { return delegate_->Close(); } +// TODO(sigv4): support loading a custom AWSCredentialsProvider via a class +// name property, matching Java's AwsProperties.restCredentialsProvider(). Result> SigV4AuthManager::MakeCredentialsProvider( const std::unordered_map& properties) { @@ -264,13 +269,10 @@ std::string SigV4AuthManager::ResolveSigningRegion( it != properties.end() && !it->second.empty()) { return it->second; } - if (const char* env = std::getenv("AWS_REGION")) { - return {env}; - } - if (const char* env = std::getenv("AWS_DEFAULT_REGION")) { - return {env}; - } - return "us-east-1"; + // Delegates the full resolution chain (AWS_DEFAULT_REGION / AWS_REGION env, + // ~/.aws/config profile, EC2/ECS IMDS, fallback us-east-1) to the AWS SDK. + // Set AWS_EC2_METADATA_DISABLED=true to skip IMDS on non-EC2 hosts. + return {Aws::Client::ClientConfiguration().region.c_str()}; } std::string SigV4AuthManager::ResolveSigningName( diff --git a/src/iceberg/catalog/rest/http_client.cc b/src/iceberg/catalog/rest/http_client.cc index cc5e132eb..2872f9069 100644 --- a/src/iceberg/catalog/rest/http_client.cc +++ b/src/iceberg/catalog/rest/http_client.cc @@ -86,7 +86,7 @@ cpr::Header ToCprHeader(const auth::HTTPRequest& request) { } /// \brief Append URL-encoded query parameters to a URL, sorted by key. -std::string AppendQueryString( +Result AppendQueryString( const std::string& base_url, const std::unordered_map& params) { if (params.empty()) return base_url; @@ -95,9 +95,9 @@ std::string AppendQueryString( bool first = true; for (const auto& [k, v] : sorted) { if (!first) url += "&"; - auto ek = EncodeString(k); - auto ev = EncodeString(v); - url += (ek ? *ek : k) + "=" + (ev ? *ev : v); + ICEBERG_ASSIGN_OR_RAISE(auto ek, EncodeString(k)); + ICEBERG_ASSIGN_OR_RAISE(auto ev, EncodeString(v)); + url += ek + "=" + ev; first = false; } return url; @@ -161,10 +161,11 @@ Result HttpClient::Get( const std::string& path, const std::unordered_map& params, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { + ICEBERG_ASSIGN_OR_RAISE(auto url, AppendQueryString(path, params)); ICEBERG_ASSIGN_OR_RAISE( auto authenticated, session.Authenticate({.method = HttpMethod::kGet, - .url = AppendQueryString(path, params), + .url = std::move(url), .headers = MergeHeaders(default_headers_, headers), .body = ""})); cpr::Response response = cpr::Get(cpr::Url{authenticated.url}, @@ -249,10 +250,11 @@ Result HttpClient::Delete( const std::string& path, const std::unordered_map& params, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { + ICEBERG_ASSIGN_OR_RAISE(auto url, AppendQueryString(path, params)); ICEBERG_ASSIGN_OR_RAISE( auto authenticated, session.Authenticate({.method = HttpMethod::kDelete, - .url = AppendQueryString(path, params), + .url = std::move(url), .headers = MergeHeaders(default_headers_, headers), .body = ""})); cpr::Response response = cpr::Delete(cpr::Url{authenticated.url}, From 5582326e45339abb60e1f59b06c06304133a81dd Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Sat, 25 Apr 2026 11:41:37 +0800 Subject: [PATCH 11/23] ci: drop redundant ninja-build install from cpp-linter --- .github/workflows/cpp-linter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cpp-linter.yml b/.github/workflows/cpp-linter.yml index fe90d92c4..ab49d2d2e 100644 --- a/.github/workflows/cpp-linter.yml +++ b/.github/workflows/cpp-linter.yml @@ -43,7 +43,7 @@ jobs: shell: bash run: | sudo apt-get update - sudo apt-get install -y libcurl4-openssl-dev libsqlite3-dev libpq-dev default-libmysqlclient-dev ninja-build + sudo apt-get install -y libcurl4-openssl-dev libsqlite3-dev libpq-dev default-libmysqlclient-dev - name: Cache vcpkg packages uses: actions/cache@v4 id: vcpkg-cache From 44370bafdf3ba381a4beb6a8cea1d57a4f697cac Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Tue, 12 May 2026 17:32:11 +0800 Subject: [PATCH 12/23] address review feedback --- .github/workflows/cpp-linter.yml | 14 +-- .github/workflows/sigv4_test.yml | 71 +++++++++++++++ CMakeLists.txt | 2 +- ci/scripts/build_iceberg.sh | 12 ++- .../IcebergThirdpartyToolchain.cmake | 4 +- src/iceberg/catalog/rest/CMakeLists.txt | 10 +- .../catalog/rest/auth/auth_manager_internal.h | 5 +- .../catalog/rest/auth/auth_managers.cc | 4 +- src/iceberg/catalog/rest/auth/auth_session.cc | 4 +- src/iceberg/catalog/rest/auth/auth_session.h | 15 +-- src/iceberg/catalog/rest/auth/meson.build | 18 ++-- .../catalog/rest/auth/sigv4_auth_manager.cc | 85 ++++++++++------- ...anager.h => sigv4_auth_manager_internal.h} | 16 +--- src/iceberg/catalog/rest/endpoint.h | 7 +- src/iceberg/catalog/rest/http_client.cc | 3 +- src/iceberg/catalog/rest/http_request.h | 46 ++++++++++ src/iceberg/catalog/rest/meson.build | 5 +- src/iceberg/iceberg-config.cmake.in | 8 +- src/iceberg/test/CMakeLists.txt | 2 +- src/iceberg/test/sigv4_auth_test.cc | 91 +++++++++---------- 20 files changed, 262 insertions(+), 160 deletions(-) create mode 100644 .github/workflows/sigv4_test.yml rename src/iceberg/catalog/rest/auth/{sigv4_auth_manager.h => sigv4_auth_manager_internal.h} (88%) create mode 100644 src/iceberg/catalog/rest/http_request.h diff --git a/.github/workflows/cpp-linter.yml b/.github/workflows/cpp-linter.yml index ab49d2d2e..4225d662e 100644 --- a/.github/workflows/cpp-linter.yml +++ b/.github/workflows/cpp-linter.yml @@ -44,16 +44,6 @@ jobs: run: | sudo apt-get update sudo apt-get install -y libcurl4-openssl-dev libsqlite3-dev libpq-dev default-libmysqlclient-dev - - name: Cache vcpkg packages - uses: actions/cache@v4 - id: vcpkg-cache - with: - path: /usr/local/share/vcpkg/installed - key: vcpkg-x64-linux-aws-sdk-cpp-core-${{ hashFiles('.github/workflows/cpp-linter.yml') }} - - name: Install AWS SDK via vcpkg - if: steps.vcpkg-cache.outputs.cache-hit != 'true' - shell: bash - run: vcpkg install aws-sdk-cpp[core]:x64-linux - name: Run build env: CC: gcc-14 @@ -64,9 +54,7 @@ jobs: -DICEBERG_BUILD_SQL_CATALOG=ON \ -DICEBERG_SQL_SQLITE=ON \ -DICEBERG_SQL_POSTGRESQL=ON \ - -DICEBERG_SQL_MYSQL=ON \ - -DICEBERG_BUILD_SIGV4=ON \ - -DCMAKE_TOOLCHAIN_FILE=/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake + -DICEBERG_SQL_MYSQL=ON cmake --build . - uses: cpp-linter/cpp-linter-action@0f6d1b8d7e38b584cbee606eb23d850c217d54f8 # v2.15.1 id: linter diff --git a/.github/workflows/sigv4_test.yml b/.github/workflows/sigv4_test.yml new file mode 100644 index 000000000..f9ee3b9c9 --- /dev/null +++ b/.github/workflows/sigv4_test.yml @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# SigV4 build + unit tests (Linux only; aws-cpp-sdk-core via vcpkg). +name: SigV4 Tests + +on: + push: + branches: + - '**' + - '!dependabot/**' + tags: + - '**' + pull_request: + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +permissions: + contents: read + +env: + ICEBERG_HOME: /tmp/iceberg + +jobs: + sigv4: + name: SigV4 (AMD64 Ubuntu 24.04) + runs-on: ubuntu-24.04 + timeout-minutes: 35 + env: + CC: gcc-14 + CXX: g++-14 + AWS_EC2_METADATA_DISABLED: "TRUE" + steps: + - name: Checkout iceberg-cpp + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Install dependencies + shell: bash + run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev + - name: Cache vcpkg packages + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + id: vcpkg-cache + with: + path: /usr/local/share/vcpkg/installed + key: vcpkg-x64-linux-aws-sdk-cpp-core-${{ hashFiles('.github/workflows/sigv4_test.yml') }} + - name: Install AWS SDK via vcpkg + if: steps.vcpkg-cache.outputs.cache-hit != 'true' + shell: bash + run: vcpkg install aws-sdk-cpp[core]:x64-linux + - name: Build and test Iceberg with SigV4 + shell: bash + env: + CMAKE_TOOLCHAIN_FILE: /usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake + run: ci/scripts/build_iceberg.sh "$(pwd)" OFF OFF OFF ON diff --git a/CMakeLists.txt b/CMakeLists.txt index bdd019fc6..0d80a125e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,7 +53,7 @@ option(ICEBERG_SQL_SQLITE "Build the SQLite connector for the SQL catalog" OFF) option(ICEBERG_SQL_POSTGRESQL "Build the PostgreSQL connector for the SQL catalog" OFF) option(ICEBERG_SQL_MYSQL "Build the MySQL connector for the SQL catalog" OFF) option(ICEBERG_S3 "Build with S3 support" OFF) -option(ICEBERG_BUILD_SIGV4 "Build SigV4 authentication support (requires AWS SDK)" OFF) +option(ICEBERG_SIGV4 "Build SigV4 authentication support (requires AWS SDK)" OFF) option(ICEBERG_ENABLE_ASAN "Enable Address Sanitizer" OFF) option(ICEBERG_ENABLE_UBSAN "Enable Undefined Behavior Sanitizer" OFF) diff --git a/ci/scripts/build_iceberg.sh b/ci/scripts/build_iceberg.sh index 406ef56a7..9e2756ef5 100755 --- a/ci/scripts/build_iceberg.sh +++ b/ci/scripts/build_iceberg.sh @@ -17,7 +17,7 @@ # specific language governing permissions and limitations # under the License. # -# Usage: build_iceberg.sh [rest_integration_tests=OFF] [sccache=OFF] [s3=OFF] +# Usage: build_iceberg.sh [rest_integration_tests=OFF] [sccache=OFF] [s3=OFF] [sigv4=OFF] set -eux @@ -26,6 +26,7 @@ build_dir=${1}/build build_rest_integration_test=${2:-OFF} build_enable_sccache=${3:-OFF} build_enable_s3=${4:-OFF} +build_enable_sigv4=${5:-OFF} run_tests=${ICEBERG_RUN_TESTS:-ON} mkdir ${build_dir} @@ -49,11 +50,20 @@ else CMAKE_ARGS+=("-DICEBERG_S3=OFF") fi +if [[ "${build_enable_sigv4}" == "ON" ]]; then + CMAKE_ARGS+=("-DICEBERG_SIGV4=ON") +else + CMAKE_ARGS+=("-DICEBERG_SIGV4=OFF") +fi + if is_windows; then CMAKE_ARGS+=("-DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake") CMAKE_ARGS+=("-DCMAKE_BUILD_TYPE=Release") else CMAKE_ARGS+=("-DCMAKE_BUILD_TYPE=Debug") + if [[ -n "${CMAKE_TOOLCHAIN_FILE:-}" ]]; then + CMAKE_ARGS+=("-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") + fi fi if [[ "${build_enable_sccache}" == "ON" ]]; then diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index fd1235faf..56f800786 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -645,9 +645,9 @@ function(resolve_aws_sdk_dependency) PARENT_SCOPE) endfunction() -if(ICEBERG_BUILD_SIGV4) +if(ICEBERG_SIGV4) if(NOT ICEBERG_BUILD_REST) - message(FATAL_ERROR "ICEBERG_BUILD_SIGV4 requires ICEBERG_BUILD_REST to be ON") + message(FATAL_ERROR "ICEBERG_SIGV4 requires ICEBERG_BUILD_REST to be ON") endif() resolve_aws_sdk_dependency() endif() diff --git a/src/iceberg/catalog/rest/CMakeLists.txt b/src/iceberg/catalog/rest/CMakeLists.txt index 8230b7a40..b396bf6f2 100644 --- a/src/iceberg/catalog/rest/CMakeLists.txt +++ b/src/iceberg/catalog/rest/CMakeLists.txt @@ -35,9 +35,7 @@ set(ICEBERG_REST_SOURCES rest_util.cc types.cc) -if(ICEBERG_BUILD_SIGV4) - list(APPEND ICEBERG_REST_SOURCES auth/sigv4_auth_manager.cc) -endif() +list(APPEND ICEBERG_REST_SOURCES auth/sigv4_auth_manager.cc) set(ICEBERG_REST_STATIC_BUILD_INTERFACE_LIBS) set(ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS) @@ -57,7 +55,7 @@ list(APPEND "$,iceberg::iceberg_shared,iceberg::iceberg_static>" "$,iceberg::cpr,cpr::cpr>") -if(ICEBERG_BUILD_SIGV4) +if(ICEBERG_SIGV4) list(APPEND ICEBERG_REST_STATIC_BUILD_INTERFACE_LIBS aws-cpp-sdk-core) list(APPEND ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS aws-cpp-sdk-core) list(APPEND ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS aws-cpp-sdk-core) @@ -76,10 +74,10 @@ add_iceberg_lib(iceberg_rest SHARED_INSTALL_INTERFACE_LIBS ${ICEBERG_REST_SHARED_INSTALL_INTERFACE_LIBS}) -if(ICEBERG_BUILD_SIGV4) +if(ICEBERG_SIGV4) foreach(LIB iceberg_rest_static iceberg_rest_shared) if(TARGET ${LIB}) - target_compile_definitions(${LIB} PUBLIC ICEBERG_BUILD_SIGV4) + target_compile_definitions(${LIB} PUBLIC ICEBERG_SIGV4) endif() endforeach() endif() diff --git a/src/iceberg/catalog/rest/auth/auth_manager_internal.h b/src/iceberg/catalog/rest/auth/auth_manager_internal.h index 783fb2e70..36671a39f 100644 --- a/src/iceberg/catalog/rest/auth/auth_manager_internal.h +++ b/src/iceberg/catalog/rest/auth/auth_manager_internal.h @@ -47,11 +47,10 @@ Result> MakeOAuth2Manager( std::string_view name, const std::unordered_map& properties); -#ifdef ICEBERG_BUILD_SIGV4 -/// \brief Create a SigV4 authentication manager with a delegate. +/// \brief Create a SigV4 authentication manager with a delegate. Returns +/// NotSupported when the library was built without ICEBERG_SIGV4. Result> MakeSigV4AuthManager( std::string_view name, const std::unordered_map& properties); -#endif } // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/auth_managers.cc b/src/iceberg/catalog/rest/auth/auth_managers.cc index 0a1d12788..8c17af285 100644 --- a/src/iceberg/catalog/rest/auth/auth_managers.cc +++ b/src/iceberg/catalog/rest/auth/auth_managers.cc @@ -66,10 +66,8 @@ AuthManagerRegistry CreateDefaultRegistry() { {AuthProperties::kAuthTypeNone, MakeNoopAuthManager}, {AuthProperties::kAuthTypeBasic, MakeBasicAuthManager}, {AuthProperties::kAuthTypeOAuth2, MakeOAuth2Manager}, + {AuthProperties::kAuthTypeSigV4, MakeSigV4AuthManager}, }; -#ifdef ICEBERG_BUILD_SIGV4 - registry[AuthProperties::kAuthTypeSigV4] = MakeSigV4AuthManager; -#endif return registry; } diff --git a/src/iceberg/catalog/rest/auth/auth_session.cc b/src/iceberg/catalog/rest/auth/auth_session.cc index 7c5b9b289..22591c805 100644 --- a/src/iceberg/catalog/rest/auth/auth_session.cc +++ b/src/iceberg/catalog/rest/auth/auth_session.cc @@ -43,8 +43,8 @@ class DefaultAuthSession : public AuthSession { explicit DefaultAuthSession(std::unordered_map headers) : headers_(std::move(headers)) {} - Result Authenticate(const HTTPRequest& request) override { - HTTPRequest authenticated = request; + Result Authenticate(const HttpRequest& request) override { + HttpRequest authenticated = request; for (const auto& [key, value] : headers_) { authenticated.headers.try_emplace(key, value); } diff --git a/src/iceberg/catalog/rest/auth/auth_session.h b/src/iceberg/catalog/rest/auth/auth_session.h index d3fa9ea2c..5d09688de 100644 --- a/src/iceberg/catalog/rest/auth/auth_session.h +++ b/src/iceberg/catalog/rest/auth/auth_session.h @@ -23,7 +23,7 @@ #include #include -#include "iceberg/catalog/rest/endpoint.h" +#include "iceberg/catalog/rest/http_request.h" #include "iceberg/catalog/rest/iceberg_rest_export.h" #include "iceberg/catalog/rest/type_fwd.h" #include "iceberg/result.h" @@ -33,17 +33,6 @@ namespace iceberg::rest::auth { -/// \brief An outgoing HTTP request passed through an AuthSession. Mirrors the -/// HTTPRequest type used by the Java reference implementation so signing -/// implementations like SigV4 can operate on method, url, headers, and body -/// as a single value. -struct ICEBERG_REST_EXPORT HTTPRequest { - HttpMethod method = HttpMethod::kGet; - std::string url; - std::unordered_map headers; - std::string body; -}; - /// \brief An authentication session that can authenticate outgoing HTTP requests. class ICEBERG_REST_EXPORT AuthSession { public: @@ -63,7 +52,7 @@ class ICEBERG_REST_EXPORT AuthSession { /// - NotAuthorized: Not authenticated (401) /// - IOError: Network or connection errors when reaching auth server /// - RestError: HTTP errors from authentication service - virtual Result Authenticate(const HTTPRequest& request) = 0; + virtual Result Authenticate(const HttpRequest& request) = 0; /// \brief Close the session and release any resources. /// diff --git a/src/iceberg/catalog/rest/auth/meson.build b/src/iceberg/catalog/rest/auth/meson.build index 352ddfaec..4de6e821e 100644 --- a/src/iceberg/catalog/rest/auth/meson.build +++ b/src/iceberg/catalog/rest/auth/meson.build @@ -15,17 +15,13 @@ # specific language governing permissions and limitations # under the License. -iceberg_rest_auth_headers = [ - 'auth_manager.h', - 'auth_managers.h', - 'auth_properties.h', - 'auth_session.h', - 'oauth2_util.h', -] -if aws_sdk_core_dep.found() - iceberg_rest_auth_headers += ['sigv4_auth_manager.h'] -endif install_headers( - iceberg_rest_auth_headers, + [ + 'auth_manager.h', + 'auth_managers.h', + 'auth_properties.h', + 'auth_session.h', + 'oauth2_util.h', + ], subdir: 'iceberg/catalog/rest/auth', ) diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index 2e7992772..74d4d0416 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -17,32 +17,34 @@ * under the License. */ -#include "iceberg/catalog/rest/auth/sigv4_auth_manager.h" +#include "iceberg/catalog/rest/auth/auth_manager_internal.h" +#include "iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h" -#include +#ifdef ICEBERG_SIGV4 -#include -#include -#include -#include -#include -#include -#include +# include -#include "iceberg/catalog/rest/auth/auth_manager_internal.h" -#include "iceberg/catalog/rest/auth/auth_managers.h" -#include "iceberg/catalog/rest/auth/auth_properties.h" -#include "iceberg/catalog/rest/endpoint.h" -#include "iceberg/util/macros.h" -#include "iceberg/util/string_util.h" +# include +# include +# include +# include +# include +# include +# include + +# include "iceberg/catalog/rest/auth/auth_managers.h" +# include "iceberg/catalog/rest/auth/auth_properties.h" +# include "iceberg/util/macros.h" +# include "iceberg/util/string_util.h" namespace iceberg::rest::auth { namespace { -/// \brief Ensures AWS SDK is initialized exactly once per process. -/// ShutdownAPI is intentionally never called (leak-by-design) to avoid -/// static destruction order issues with objects that may outlive shutdown. +/// \brief Ensures the AWS SDK is initialized exactly once per process. +/// +/// Aws::InitAPI / ShutdownAPI must bracket the process lifetime, which a +/// library cannot enforce, so we never call ShutdownAPI (leak by design). class AwsSdkGuard { public: static void EnsureInitialized() { @@ -106,19 +108,17 @@ class RestSigV4Signer : public Aws::Client::AWSAuthV4Signer { SigV4AuthSession::SigV4AuthSession( std::shared_ptr delegate, std::string signing_region, std::string signing_name, - std::shared_ptr credentials_provider, - std::unordered_map effective_properties) + std::shared_ptr credentials_provider) : delegate_(std::move(delegate)), signing_region_(std::move(signing_region)), signing_name_(std::move(signing_name)), credentials_provider_(std::move(credentials_provider)), signer_(std::make_unique( - credentials_provider_, signing_name_.c_str(), signing_region_.c_str())), - effective_properties_(std::move(effective_properties)) {} + credentials_provider_, signing_name_.c_str(), signing_region_.c_str())) {} SigV4AuthSession::~SigV4AuthSession() = default; -Result SigV4AuthSession::Authenticate(const HTTPRequest& request) { +Result SigV4AuthSession::Authenticate(const HttpRequest& request) { ICEBERG_ASSIGN_OR_RAISE(auto delegate_request, delegate_->Authenticate(request)); const auto& original_headers = delegate_request.headers; @@ -157,7 +157,7 @@ Result SigV4AuthSession::Authenticate(const HTTPRequest& request) { .message = "SigV4 signing failed"}); } - HTTPRequest signed_request{.method = delegate_request.method, + HttpRequest signed_request{.method = delegate_request.method, .url = std::move(delegate_request.url), .headers = {}, .body = std::move(delegate_request.body)}; @@ -199,11 +199,17 @@ Result> SigV4AuthManager::CatalogSession( HttpClient& shared_client, const std::unordered_map& properties) { AwsSdkGuard::EnsureInitialized(); + catalog_properties_ = properties; ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->CatalogSession(shared_client, properties)); return WrapSession(std::move(delegate_session), properties); } +// Contextual and table sessions both merge against the stored catalog +// properties, matching Java's RESTSigV4AuthManager. Contextual overrides do +// not propagate into child table sessions; the two derivations are +// independent dimensions on top of the catalog baseline. + Result> SigV4AuthManager::ContextualSession( const std::unordered_map& context, std::shared_ptr parent) { @@ -214,8 +220,8 @@ Result> SigV4AuthManager::ContextualSession( ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->ContextualSession( context, sigv4_parent->delegate())); - auto merged = MergeProperties(sigv4_parent->effective_properties(), context); - return WrapSession(std::move(delegate_session), std::move(merged)); + auto merged = MergeProperties(catalog_properties_, context); + return WrapSession(std::move(delegate_session), merged); } Result> SigV4AuthManager::TableSession( @@ -230,8 +236,8 @@ Result> SigV4AuthManager::TableSession( auto delegate_session, delegate_->TableSession(table, properties, sigv4_parent->delegate())); - auto merged = MergeProperties(sigv4_parent->effective_properties(), properties); - return WrapSession(std::move(delegate_session), std::move(merged)); + auto merged = MergeProperties(catalog_properties_, properties); + return WrapSession(std::move(delegate_session), merged); } Status SigV4AuthManager::Close() { return delegate_->Close(); } @@ -286,13 +292,13 @@ std::string SigV4AuthManager::ResolveSigningName( Result> SigV4AuthManager::WrapSession( std::shared_ptr delegate_session, - std::unordered_map properties) { + const std::unordered_map& properties) { auto region = ResolveSigningRegion(properties); auto service = ResolveSigningName(properties); ICEBERG_ASSIGN_OR_RAISE(auto credentials, MakeCredentialsProvider(properties)); - return std::make_shared( - std::move(delegate_session), std::move(region), std::move(service), - std::move(credentials), std::move(properties)); + return std::make_shared(std::move(delegate_session), + std::move(region), std::move(service), + std::move(credentials)); } Result> MakeSigV4AuthManager( @@ -318,3 +324,18 @@ Result> MakeSigV4AuthManager( } } // namespace iceberg::rest::auth + +#else // !ICEBERG_SIGV4 + +namespace iceberg::rest::auth { + +Result> MakeSigV4AuthManager( + std::string_view /*name*/, + const std::unordered_map& /*properties*/) { + return NotSupported( + "SigV4 authentication is not built; configure with -DICEBERG_SIGV4=ON"); +} + +} // namespace iceberg::rest::auth + +#endif // ICEBERG_SIGV4 diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h b/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h similarity index 88% rename from src/iceberg/catalog/rest/auth/sigv4_auth_manager.h rename to src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h index 4e173a284..8334128e7 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.h +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h @@ -61,31 +61,22 @@ class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { SigV4AuthSession( std::shared_ptr delegate, std::string signing_region, std::string signing_name, - std::shared_ptr credentials_provider, - std::unordered_map effective_properties); + std::shared_ptr credentials_provider); ~SigV4AuthSession() override; - Result Authenticate(const HTTPRequest& request) override; + Result Authenticate(const HttpRequest& request) override; Status Close() override; const std::shared_ptr& delegate() const { return delegate_; } - /// Merged properties this session was built from. Child sessions inherit - /// from this (not the catalog's) so contextual overrides propagate into - /// table sessions. - const std::unordered_map& effective_properties() const { - return effective_properties_; - } - private: std::shared_ptr delegate_; std::string signing_region_; std::string signing_name_; std::shared_ptr credentials_provider_; std::unique_ptr signer_; - std::unordered_map effective_properties_; }; /// \brief An AuthManager that produces SigV4AuthSession instances. @@ -124,9 +115,10 @@ class ICEBERG_REST_EXPORT SigV4AuthManager : public AuthManager { const std::unordered_map& properties); Result> WrapSession( std::shared_ptr delegate_session, - std::unordered_map properties); + const std::unordered_map& properties); std::unique_ptr delegate_; + std::unordered_map catalog_properties_; }; } // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/endpoint.h b/src/iceberg/catalog/rest/endpoint.h index fdcd2108e..9f51b43d4 100644 --- a/src/iceberg/catalog/rest/endpoint.h +++ b/src/iceberg/catalog/rest/endpoint.h @@ -22,6 +22,7 @@ #include #include +#include "iceberg/catalog/rest/http_request.h" #include "iceberg/catalog/rest/iceberg_rest_export.h" #include "iceberg/result.h" @@ -30,12 +31,6 @@ namespace iceberg::rest { -/// \brief HTTP method enumeration. -enum class HttpMethod : uint8_t { kGet, kPost, kPut, kDelete, kHead }; - -/// \brief Convert HttpMethod to string representation. -constexpr std::string_view ToString(HttpMethod method); - /// \brief An Endpoint is an immutable value object identifying a specific REST API /// operation. It consists of: /// - HTTP method (GET, POST, DELETE, etc.) diff --git a/src/iceberg/catalog/rest/http_client.cc b/src/iceberg/catalog/rest/http_client.cc index 2872f9069..650b09148 100644 --- a/src/iceberg/catalog/rest/http_client.cc +++ b/src/iceberg/catalog/rest/http_client.cc @@ -81,11 +81,12 @@ std::unordered_map MergeHeaders( return merged; } -cpr::Header ToCprHeader(const auth::HTTPRequest& request) { +cpr::Header ToCprHeader(const HttpRequest& request) { return {request.headers.begin(), request.headers.end()}; } /// \brief Append URL-encoded query parameters to a URL, sorted by key. +/// \param base_url must not already contain a query string ('?' or '&'). Result AppendQueryString( const std::string& base_url, const std::unordered_map& params) { diff --git a/src/iceberg/catalog/rest/http_request.h b/src/iceberg/catalog/rest/http_request.h new file mode 100644 index 000000000..90002ae3d --- /dev/null +++ b/src/iceberg/catalog/rest/http_request.h @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "iceberg/catalog/rest/iceberg_rest_export.h" + +namespace iceberg::rest { + +/// \brief HTTP method enumeration. +enum class HttpMethod : uint8_t { kGet, kPost, kPut, kDelete, kHead }; + +/// \brief Convert HttpMethod to string representation. +constexpr std::string_view ToString(HttpMethod method); + +/// \brief An outgoing HTTP request. Mirrors Java's HttpRequest so signing +/// implementations like SigV4 see method, url, headers, and body together. +struct ICEBERG_REST_EXPORT HttpRequest { + HttpMethod method = HttpMethod::kGet; + std::string url; + std::unordered_map headers; + std::string body; +}; + +} // namespace iceberg::rest diff --git a/src/iceberg/catalog/rest/meson.build b/src/iceberg/catalog/rest/meson.build index bd0109927..23692a058 100644 --- a/src/iceberg/catalog/rest/meson.build +++ b/src/iceberg/catalog/rest/meson.build @@ -41,15 +41,15 @@ cpr_needs_static = ( ) cpr_dep = dependency('cpr', static: cpr_needs_static) +iceberg_rest_sources += files('auth/sigv4_auth_manager.cc') iceberg_rest_build_deps = [iceberg_dep, cpr_dep] iceberg_rest_compile_defs = [] sigv4_opt = get_option('sigv4') aws_sdk_core_dep = dependency('aws-cpp-sdk-core', required: sigv4_opt) if aws_sdk_core_dep.found() - iceberg_rest_sources += files('auth/sigv4_auth_manager.cc') iceberg_rest_build_deps += aws_sdk_core_dep - iceberg_rest_compile_defs += '-DICEBERG_BUILD_SIGV4' + iceberg_rest_compile_defs += '-DICEBERG_SIGV4' endif iceberg_rest_lib = library( @@ -80,6 +80,7 @@ install_headers( 'endpoint.h', 'error_handlers.h', 'http_client.h', + 'http_request.h', 'iceberg_rest_export.h', 'resource_paths.h', 'rest_catalog.h', diff --git a/src/iceberg/iceberg-config.cmake.in b/src/iceberg/iceberg-config.cmake.in index 0339ee1a9..ae99d6640 100644 --- a/src/iceberg/iceberg-config.cmake.in +++ b/src/iceberg/iceberg-config.cmake.in @@ -73,7 +73,13 @@ macro(iceberg_find_components components) endforeach() endmacro() -# Find system dependencies +# AWSSDK's CMake config dispatches sub-package finds via AWSSDK_FIND_COMPONENTS, +# so a plain find_dependency would not bring in aws-cpp-sdk-core. +if("AWSSDK" IN_LIST ICEBERG_SYSTEM_DEPENDENCIES) + list(REMOVE_ITEM ICEBERG_SYSTEM_DEPENDENCIES AWSSDK) + include(CMakeFindDependencyMacro) + find_dependency(AWSSDK COMPONENTS core) +endif() iceberg_find_dependencies("${ICEBERG_SYSTEM_DEPENDENCIES}") include("${CMAKE_CURRENT_LIST_DIR}/iceberg-targets.cmake") diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index b2ed9a7cc..0e8f03150 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -290,7 +290,7 @@ if(ICEBERG_BUILD_REST) rest_json_serde_test.cc rest_util_test.cc) - if(ICEBERG_BUILD_SIGV4) + if(ICEBERG_SIGV4) add_rest_iceberg_test(sigv4_auth_test SOURCES sigv4_auth_test.cc) target_link_libraries(sigv4_auth_test PRIVATE aws-cpp-sdk-core) endif() diff --git a/src/iceberg/test/sigv4_auth_test.cc b/src/iceberg/test/sigv4_auth_test.cc index fd52e8dad..78e469af7 100644 --- a/src/iceberg/test/sigv4_auth_test.cc +++ b/src/iceberg/test/sigv4_auth_test.cc @@ -17,20 +17,22 @@ * under the License. */ -#include -#include +#ifdef ICEBERG_SIGV4 -#include -#include -#include +# include +# include -#include "iceberg/catalog/rest/auth/auth_managers.h" -#include "iceberg/catalog/rest/auth/auth_properties.h" -#include "iceberg/catalog/rest/auth/auth_session.h" -#include "iceberg/catalog/rest/auth/sigv4_auth_manager.h" -#include "iceberg/catalog/rest/http_client.h" -#include "iceberg/table_identifier.h" -#include "iceberg/test/matchers.h" +# include +# include +# include + +# include "iceberg/catalog/rest/auth/auth_managers.h" +# include "iceberg/catalog/rest/auth/auth_properties.h" +# include "iceberg/catalog/rest/auth/auth_session.h" +# include "iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h" +# include "iceberg/catalog/rest/http_client.h" +# include "iceberg/table_identifier.h" +# include "iceberg/test/matchers.h" namespace iceberg::rest::auth { @@ -82,7 +84,7 @@ TEST_F(SigV4AuthTest, AuthenticateAddsAuthorizationHeader) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; auto auth_result = session_result.value()->Authenticate(request); ASSERT_THAT(auth_result, IsOk()); const auto& headers = auth_result.value().headers; @@ -100,7 +102,7 @@ TEST_F(SigV4AuthTest, AuthenticateWithPostBody) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - HTTPRequest request{.method = HttpMethod::kPost, + HttpRequest request{.method = HttpMethod::kPost, .url = "https://example.com/v1/namespaces", .headers = {{"Content-Type", "application/json"}}, .body = R"({"namespace":["ns1"]})"}; @@ -123,7 +125,7 @@ TEST_F(SigV4AuthTest, DelegateAuthorizationHeaderRelocated) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; auto auth_result = session_result.value()->Authenticate(request); ASSERT_THAT(auth_result, IsOk()); const auto& headers = auth_result.value().headers; @@ -144,7 +146,7 @@ TEST_F(SigV4AuthTest, AuthenticateWithSessionToken) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; auto auth_result = session_result.value()->Authenticate(request); ASSERT_THAT(auth_result, IsOk()); const auto& headers = auth_result.value().headers; @@ -165,7 +167,7 @@ TEST_F(SigV4AuthTest, CustomSigningNameAndRegion) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; auto auth_result = session_result.value()->Authenticate(request); ASSERT_THAT(auth_result, IsOk()); const auto& headers = auth_result.value().headers; @@ -193,7 +195,7 @@ TEST_F(SigV4AuthTest, DelegateDefaultsToOAuth2NoAuth) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; auto auth_result = session_result.value()->Authenticate(request); ASSERT_THAT(auth_result, IsOk()); const auto& headers = auth_result.value().headers; @@ -215,7 +217,7 @@ TEST_F(SigV4AuthTest, TableSessionInheritsProperties) { catalog_session.value()); ASSERT_THAT(table_session, IsOk()); - HTTPRequest request{.method = HttpMethod::kGet, + HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/ns1/tables/table1"}; auto auth_result = table_session.value()->Authenticate(request); ASSERT_THAT(auth_result, IsOk()); @@ -223,9 +225,6 @@ TEST_F(SigV4AuthTest, TableSessionInheritsProperties) { auth_result.value().headers.end()); } -// ---------- Tests ported from Java TestRESTSigV4AuthSession ---------- - -// Java: authenticateWithoutBody TEST_F(SigV4AuthTest, AuthenticateWithoutBodyDetailedHeaders) { auto properties = MakeSigV4Properties(); auto manager_result = AuthManagers::Load("test-catalog", properties); @@ -234,7 +233,7 @@ TEST_F(SigV4AuthTest, AuthenticateWithoutBodyDetailedHeaders) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - HTTPRequest request{.method = HttpMethod::kGet, + HttpRequest request{.method = HttpMethod::kGet, .url = "http://localhost:8080/path", .headers = {{"Content-Type", "application/json"}}}; auto auth_result = session_result.value()->Authenticate(request); @@ -264,7 +263,6 @@ TEST_F(SigV4AuthTest, AuthenticateWithoutBodyDetailedHeaders) { EXPECT_NE(headers.find("x-amz-date"), headers.end()); } -// Java: authenticateWithBody TEST_F(SigV4AuthTest, AuthenticateWithBodyDetailedHeaders) { auto properties = MakeSigV4Properties(); auto manager_result = AuthManagers::Load("test-catalog", properties); @@ -273,7 +271,7 @@ TEST_F(SigV4AuthTest, AuthenticateWithBodyDetailedHeaders) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - HTTPRequest request{.method = HttpMethod::kPost, + HttpRequest request{.method = HttpMethod::kPost, .url = "http://localhost:8080/path", .headers = {{"Content-Type", "application/json"}}, .body = R"({"namespace":["ns1"]})"}; @@ -295,7 +293,6 @@ TEST_F(SigV4AuthTest, AuthenticateWithBodyDetailedHeaders) { << "Expected Base64 SHA256, got: " << sha_it->second; } -// Java: authenticateConflictingAuthorizationHeader TEST_F(SigV4AuthTest, ConflictingAuthorizationHeaderIncludedInSignedHeaders) { auto properties = MakeSigV4Properties(); properties[AuthProperties::kToken.key()] = "my-oauth-token"; @@ -307,7 +304,7 @@ TEST_F(SigV4AuthTest, ConflictingAuthorizationHeaderIncludedInSignedHeaders) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - HTTPRequest request{.method = HttpMethod::kGet, + HttpRequest request{.method = HttpMethod::kGet, .url = "http://localhost:8080/path", .headers = {{"Content-Type", "application/json"}}}; auto auth_result = session_result.value()->Authenticate(request); @@ -330,7 +327,6 @@ TEST_F(SigV4AuthTest, ConflictingAuthorizationHeaderIncludedInSignedHeaders) { EXPECT_EQ(orig_it->second, "Bearer my-oauth-token"); } -// Java: authenticateConflictingSigv4Headers TEST_F(SigV4AuthTest, ConflictingSigV4HeadersRelocated) { auto delegate = AuthSession::MakeDefault({ {"x-amz-content-sha256", "fake-sha256"}, @@ -340,11 +336,10 @@ TEST_F(SigV4AuthTest, ConflictingSigV4HeadersRelocated) { auto credentials = std::make_shared(Aws::Auth::AWSCredentials( "AKIAIOSFODNN7EXAMPLE", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")); - auto session = std::make_shared( - delegate, "us-east-1", "execute-api", credentials, - std::unordered_map{}); + auto session = std::make_shared(delegate, "us-east-1", "execute-api", + credentials); - HTTPRequest request{.method = HttpMethod::kGet, .url = "http://localhost:8080/path"}; + HttpRequest request{.method = HttpMethod::kGet, .url = "http://localhost:8080/path"}; auto auth_result = session->Authenticate(request); ASSERT_THAT(auth_result, IsOk()); const auto& headers = auth_result.value().headers; @@ -365,22 +360,17 @@ TEST_F(SigV4AuthTest, ConflictingSigV4HeadersRelocated) { EXPECT_NE(headers.find("authorization"), headers.end()); } -// Java: close (TestRESTSigV4AuthSession) TEST_F(SigV4AuthTest, SessionCloseDelegatesToInner) { auto delegate = AuthSession::MakeDefault({}); auto credentials = std::make_shared( Aws::Auth::AWSCredentials("id", "secret")); - auto session = std::make_shared( - delegate, "us-east-1", "execute-api", credentials, - std::unordered_map{}); + auto session = std::make_shared(delegate, "us-east-1", "execute-api", + credentials); // Close should succeed without error EXPECT_THAT(session->Close(), IsOk()); } -// ---------- Tests ported from Java TestRESTSigV4AuthManager ---------- - -// Java: createCustomDelegate TEST_F(SigV4AuthTest, CreateCustomDelegateNone) { std::unordered_map properties = { {AuthProperties::kAuthType, "sigv4"}, @@ -397,7 +387,7 @@ TEST_F(SigV4AuthTest, CreateCustomDelegateNone) { ASSERT_THAT(session_result, IsOk()); // Authenticate should work with noop delegate - HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; auto auth_result = session_result.value()->Authenticate(request); ASSERT_THAT(auth_result, IsOk()); const auto& headers = auth_result.value().headers; @@ -406,7 +396,6 @@ TEST_F(SigV4AuthTest, CreateCustomDelegateNone) { EXPECT_EQ(headers.find("original-authorization"), headers.end()); } -// Java: createInvalidCustomDelegate TEST_F(SigV4AuthTest, CreateInvalidCustomDelegateSigV4Circular) { std::unordered_map properties = { {AuthProperties::kAuthType, "sigv4"}, @@ -422,7 +411,6 @@ TEST_F(SigV4AuthTest, CreateInvalidCustomDelegateSigV4Circular) { HasErrorMessage("Cannot delegate a SigV4 auth manager to another SigV4")); } -// Java: contextualSession TEST_F(SigV4AuthTest, ContextualSessionOverridesProperties) { auto properties = MakeSigV4Properties(); properties[AuthProperties::kSigV4SigningRegion] = "us-west-2"; @@ -444,7 +432,7 @@ TEST_F(SigV4AuthTest, ContextualSessionOverridesProperties) { manager_result.value()->ContextualSession(context, catalog_session.value()); ASSERT_THAT(ctx_session, IsOk()); - HTTPRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; auto auth_result = ctx_session.value()->Authenticate(request); ASSERT_THAT(auth_result, IsOk()); const auto& headers = auth_result.value().headers; @@ -456,7 +444,6 @@ TEST_F(SigV4AuthTest, ContextualSessionOverridesProperties) { << "Expected eu-west-1 in Authorization, got: " << auth_it->second; } -// Java: tableSession (with property override) TEST_F(SigV4AuthTest, TableSessionOverridesProperties) { auto properties = MakeSigV4Properties(); properties[AuthProperties::kSigV4SigningRegion] = "us-west-2"; @@ -479,7 +466,7 @@ TEST_F(SigV4AuthTest, TableSessionOverridesProperties) { catalog_session.value()); ASSERT_THAT(table_session, IsOk()); - HTTPRequest request{.method = HttpMethod::kGet, + HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/db1/tables/table1"}; auto auth_result = table_session.value()->Authenticate(request); ASSERT_THAT(auth_result, IsOk()); @@ -492,7 +479,10 @@ TEST_F(SigV4AuthTest, TableSessionOverridesProperties) { << "Expected ap-southeast-1 in Authorization, got: " << auth_it->second; } -TEST_F(SigV4AuthTest, TableSessionInheritsContextualOverrides) { +// Matches Java RESTSigV4AuthManager: a table session derived from a contextual +// parent does NOT inherit the contextual overrides; it merges catalog props +// with table props directly. Contextual and table are independent dimensions. +TEST_F(SigV4AuthTest, TableSessionIgnoresContextualOverrides) { auto properties = MakeSigV4Properties(); properties[AuthProperties::kSigV4SigningRegion] = "us-west-2"; @@ -511,19 +501,18 @@ TEST_F(SigV4AuthTest, TableSessionInheritsContextualOverrides) { ctx_session.value()); ASSERT_THAT(table_session, IsOk()); - HTTPRequest request{.method = HttpMethod::kGet, + HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/db1/tables/table1"}; auto auth_result = table_session.value()->Authenticate(request); ASSERT_THAT(auth_result, IsOk()); auto auth_it = auth_result.value().headers.find("authorization"); ASSERT_NE(auth_it, auth_result.value().headers.end()); - EXPECT_TRUE(auth_it->second.find("eu-west-1") != std::string::npos) - << "Table session should inherit eu-west-1 from contextual parent, got: " + EXPECT_TRUE(auth_it->second.find("us-west-2") != std::string::npos) + << "Table session should use the catalog region, not the contextual override, got: " << auth_it->second; } -// Java: close (TestRESTSigV4AuthManager) TEST_F(SigV4AuthTest, ManagerCloseDelegatesToInner) { auto properties = MakeSigV4Properties(); auto manager_result = AuthManagers::Load("test-catalog", properties); @@ -534,3 +523,5 @@ TEST_F(SigV4AuthTest, ManagerCloseDelegatesToInner) { } } // namespace iceberg::rest::auth + +#endif // ICEBERG_SIGV4 From 9127e55e79e98c7614aacb1e9524dc3e1caf4a11 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Sun, 17 May 2026 10:14:55 +0800 Subject: [PATCH 13/23] expose explicit AWS SDK lifecycle for SigV4 --- src/iceberg/catalog/rest/auth/aws_sdk.h | 44 ++++++++++ src/iceberg/catalog/rest/auth/meson.build | 1 + .../catalog/rest/auth/sigv4_auth_manager.cc | 82 ++++++++++++++----- src/iceberg/test/sigv4_auth_test.cc | 28 +++++-- 4 files changed, 126 insertions(+), 29 deletions(-) create mode 100644 src/iceberg/catalog/rest/auth/aws_sdk.h diff --git a/src/iceberg/catalog/rest/auth/aws_sdk.h b/src/iceberg/catalog/rest/auth/aws_sdk.h new file mode 100644 index 000000000..b71006289 --- /dev/null +++ b/src/iceberg/catalog/rest/auth/aws_sdk.h @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include "iceberg/catalog/rest/iceberg_rest_export.h" +#include "iceberg/result.h" + +/// \file iceberg/catalog/rest/auth/aws_sdk.h +/// \brief Process-wide AWS SDK lifecycle for SigV4 authentication. +/// +/// Applications using SigV4 should call InitializeAwsSdk() at startup and +/// FinalizeAwsSdk() before exit. If never called, the SDK is lazily +/// initialized on first SigV4 use and leaked at process exit. FinalizeAwsSdk() +/// is intended for process-shutdown sequencing, not concurrent teardown. + +namespace iceberg::rest::auth { + +/// \brief Initialize the AWS SDK. Idempotent. +ICEBERG_REST_EXPORT Status InitializeAwsSdk(); + +/// \brief Shut down the AWS SDK. Refuses if any SigV4 sessions are alive. +ICEBERG_REST_EXPORT Status FinalizeAwsSdk(); + +ICEBERG_REST_EXPORT bool IsAwsSdkInitialized(); +ICEBERG_REST_EXPORT bool IsAwsSdkFinalized(); + +} // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/meson.build b/src/iceberg/catalog/rest/auth/meson.build index 4de6e821e..790c85f6a 100644 --- a/src/iceberg/catalog/rest/auth/meson.build +++ b/src/iceberg/catalog/rest/auth/meson.build @@ -21,6 +21,7 @@ install_headers( 'auth_managers.h', 'auth_properties.h', 'auth_session.h', + 'aws_sdk.h', 'oauth2_util.h', ], subdir: 'iceberg/catalog/rest/auth', diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index 74d4d0416..c4b4b82e6 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -18,10 +18,13 @@ */ #include "iceberg/catalog/rest/auth/auth_manager_internal.h" +#include "iceberg/catalog/rest/auth/aws_sdk.h" #include "iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h" #ifdef ICEBERG_SIGV4 +# include +# include # include # include @@ -41,23 +44,17 @@ namespace iceberg::rest::auth { namespace { -/// \brief Ensures the AWS SDK is initialized exactly once per process. -/// -/// Aws::InitAPI / ShutdownAPI must bracket the process lifetime, which a -/// library cannot enforce, so we never call ShutdownAPI (leak by design). -class AwsSdkGuard { - public: - static void EnsureInitialized() { - static AwsSdkGuard instance; - (void)instance; - } +enum class LifecycleState : uint8_t { kUninitialized, kInitialized, kFinalized }; - private: - AwsSdkGuard() { - Aws::SDKOptions options; - Aws::InitAPI(options); - } -}; +std::atomic g_state{LifecycleState::kUninitialized}; +std::mutex g_lifecycle_mutex; +Aws::SDKOptions g_sdk_options; +std::atomic g_active_session_count{0}; + +Status EnsureSdkInitialized() { + if (g_state.load() == LifecycleState::kInitialized) return {}; + return InitializeAwsSdk(); +} Aws::Http::HttpMethod ToAwsMethod(HttpMethod method) { switch (method) { @@ -114,9 +111,14 @@ SigV4AuthSession::SigV4AuthSession( signing_name_(std::move(signing_name)), credentials_provider_(std::move(credentials_provider)), signer_(std::make_unique( - credentials_provider_, signing_name_.c_str(), signing_region_.c_str())) {} + credentials_provider_, signing_name_.c_str(), signing_region_.c_str())) { + // Counted so FinalizeAwsSdk() refuses to ShutdownAPI while sessions exist. + g_active_session_count.fetch_add(1, std::memory_order_relaxed); +} -SigV4AuthSession::~SigV4AuthSession() = default; +SigV4AuthSession::~SigV4AuthSession() { + g_active_session_count.fetch_sub(1, std::memory_order_relaxed); +} Result SigV4AuthSession::Authenticate(const HttpRequest& request) { ICEBERG_ASSIGN_OR_RAISE(auto delegate_request, delegate_->Authenticate(request)); @@ -189,7 +191,7 @@ SigV4AuthManager::~SigV4AuthManager() = default; Result> SigV4AuthManager::InitSession( HttpClient& init_client, const std::unordered_map& properties) { - AwsSdkGuard::EnsureInitialized(); + ICEBERG_RETURN_UNEXPECTED(EnsureSdkInitialized()); ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->InitSession(init_client, properties)); return WrapSession(std::move(delegate_session), properties); @@ -198,7 +200,7 @@ Result> SigV4AuthManager::InitSession( Result> SigV4AuthManager::CatalogSession( HttpClient& shared_client, const std::unordered_map& properties) { - AwsSdkGuard::EnsureInitialized(); + ICEBERG_RETURN_UNEXPECTED(EnsureSdkInitialized()); catalog_properties_ = properties; ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->CatalogSession(shared_client, properties)); @@ -323,6 +325,35 @@ Result> MakeSigV4AuthManager( return std::make_unique(std::move(delegate)); } +Status InitializeAwsSdk() { + std::lock_guard lock(g_lifecycle_mutex); + auto state = g_state.load(); + if (state == LifecycleState::kInitialized) return {}; + if (state == LifecycleState::kFinalized) { + return InvalidArgument("AWS SDK has already been finalized; cannot reinitialize"); + } + Aws::InitAPI(g_sdk_options); + g_state.store(LifecycleState::kInitialized); + return {}; +} + +Status FinalizeAwsSdk() { + std::lock_guard lock(g_lifecycle_mutex); + if (g_state.load() != LifecycleState::kInitialized) return {}; + auto live = g_active_session_count.load(); + if (live != 0) { + return Invalid( + "Cannot finalize AWS SDK while {} SigV4 auth session(s) are still alive", live); + } + Aws::ShutdownAPI(g_sdk_options); + g_state.store(LifecycleState::kFinalized); + return {}; +} + +bool IsAwsSdkInitialized() { return g_state.load() == LifecycleState::kInitialized; } + +bool IsAwsSdkFinalized() { return g_state.load() == LifecycleState::kFinalized; } + } // namespace iceberg::rest::auth #else // !ICEBERG_SIGV4 @@ -336,6 +367,17 @@ Result> MakeSigV4AuthManager( "SigV4 authentication is not built; configure with -DICEBERG_SIGV4=ON"); } +Status InitializeAwsSdk() { + return NotSupported( + "SigV4 authentication is not built; configure with -DICEBERG_SIGV4=ON"); +} + +Status FinalizeAwsSdk() { return {}; } + +bool IsAwsSdkInitialized() { return false; } + +bool IsAwsSdkFinalized() { return false; } + } // namespace iceberg::rest::auth #endif // ICEBERG_SIGV4 diff --git a/src/iceberg/test/sigv4_auth_test.cc b/src/iceberg/test/sigv4_auth_test.cc index 78e469af7..a05a3f5a1 100644 --- a/src/iceberg/test/sigv4_auth_test.cc +++ b/src/iceberg/test/sigv4_auth_test.cc @@ -22,13 +22,13 @@ # include # include -# include # include # include # include "iceberg/catalog/rest/auth/auth_managers.h" # include "iceberg/catalog/rest/auth/auth_properties.h" # include "iceberg/catalog/rest/auth/auth_session.h" +# include "iceberg/catalog/rest/auth/aws_sdk.h" # include "iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h" # include "iceberg/catalog/rest/http_client.h" # include "iceberg/table_identifier.h" @@ -38,14 +38,7 @@ namespace iceberg::rest::auth { class SigV4AuthTest : public ::testing::Test { protected: - static void SetUpTestSuite() { - static bool initialized = false; - if (!initialized) { - Aws::SDKOptions options; - Aws::InitAPI(options); - initialized = true; - } - } + static void SetUpTestSuite() { ASSERT_THAT(InitializeAwsSdk(), IsOk()); } HttpClient client_{{}}; @@ -61,6 +54,23 @@ class SigV4AuthTest : public ::testing::Test { } }; +TEST_F(SigV4AuthTest, LifecycleInitializeIsIdempotent) { + EXPECT_THAT(InitializeAwsSdk(), IsOk()); + EXPECT_TRUE(IsAwsSdkInitialized()); + EXPECT_FALSE(IsAwsSdkFinalized()); +} + +TEST_F(SigV4AuthTest, LifecycleFinalizeRefusesWhileSessionsAlive) { + auto properties = MakeSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + EXPECT_THAT(FinalizeAwsSdk(), IsError(ErrorKind::kInvalid)); + EXPECT_TRUE(IsAwsSdkInitialized()); +} + TEST_F(SigV4AuthTest, LoadSigV4AuthManager) { auto properties = MakeSigV4Properties(); auto manager_result = AuthManagers::Load("test-catalog", properties); From a1d58c33004b9db5eaf928cb3b8b456fe830414f Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Sun, 17 May 2026 19:34:48 +0800 Subject: [PATCH 14/23] fold sigv4_auth_manager.cc into the rest sources set --- src/iceberg/catalog/rest/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/iceberg/catalog/rest/CMakeLists.txt b/src/iceberg/catalog/rest/CMakeLists.txt index b396bf6f2..2c0f28b99 100644 --- a/src/iceberg/catalog/rest/CMakeLists.txt +++ b/src/iceberg/catalog/rest/CMakeLists.txt @@ -23,6 +23,7 @@ set(ICEBERG_REST_SOURCES auth/auth_properties.cc auth/auth_session.cc auth/oauth2_util.cc + auth/sigv4_auth_manager.cc auth/token_refresh_scheduler.cc catalog_properties.cc endpoint.cc @@ -35,8 +36,6 @@ set(ICEBERG_REST_SOURCES rest_util.cc types.cc) -list(APPEND ICEBERG_REST_SOURCES auth/sigv4_auth_manager.cc) - set(ICEBERG_REST_STATIC_BUILD_INTERFACE_LIBS) set(ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS) set(ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS) From a3ffd23ae44cc23cf59e143501e02e0082941451 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Sun, 17 May 2026 19:45:23 +0800 Subject: [PATCH 15/23] wrap AWS SDK lifecycle globals in AwsSdkLifecycle singleton --- .../catalog/rest/auth/sigv4_auth_manager.cc | 102 +++++++++++------- 1 file changed, 63 insertions(+), 39 deletions(-) diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index c4b4b82e6..b91fbed1f 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -44,17 +44,63 @@ namespace iceberg::rest::auth { namespace { -enum class LifecycleState : uint8_t { kUninitialized, kInitialized, kFinalized }; +class AwsSdkLifecycle { + public: + static AwsSdkLifecycle& Instance() { + static AwsSdkLifecycle instance; + return instance; + } -std::atomic g_state{LifecycleState::kUninitialized}; -std::mutex g_lifecycle_mutex; -Aws::SDKOptions g_sdk_options; -std::atomic g_active_session_count{0}; + Status Initialize() { + std::lock_guard lock(mutex_); + auto s = state_.load(); + if (s == State::kInitialized) return {}; + if (s == State::kFinalized) { + return InvalidArgument("AWS SDK has already been finalized; cannot reinitialize"); + } + Aws::InitAPI(options_); + state_.store(State::kInitialized); + return {}; + } -Status EnsureSdkInitialized() { - if (g_state.load() == LifecycleState::kInitialized) return {}; - return InitializeAwsSdk(); -} + Status Finalize() { + std::lock_guard lock(mutex_); + if (state_.load() != State::kInitialized) return {}; + auto live = active_session_count_.load(); + if (live != 0) { + return Invalid( + "Cannot finalize AWS SDK while {} SigV4 auth session(s) are still alive", live); + } + Aws::ShutdownAPI(options_); + state_.store(State::kFinalized); + return {}; + } + + Status EnsureInitialized() { + if (state_.load() == State::kInitialized) return {}; + return Initialize(); + } + + bool IsInitialized() const { return state_.load() == State::kInitialized; } + bool IsFinalized() const { return state_.load() == State::kFinalized; } + + void IncrementSessionCount() { + active_session_count_.fetch_add(1, std::memory_order_relaxed); + } + void DecrementSessionCount() { + active_session_count_.fetch_sub(1, std::memory_order_relaxed); + } + + private: + enum class State : uint8_t { kUninitialized, kInitialized, kFinalized }; + + AwsSdkLifecycle() = default; + + std::atomic state_{State::kUninitialized}; + std::mutex mutex_; + Aws::SDKOptions options_; + std::atomic active_session_count_{0}; +}; Aws::Http::HttpMethod ToAwsMethod(HttpMethod method) { switch (method) { @@ -112,12 +158,11 @@ SigV4AuthSession::SigV4AuthSession( credentials_provider_(std::move(credentials_provider)), signer_(std::make_unique( credentials_provider_, signing_name_.c_str(), signing_region_.c_str())) { - // Counted so FinalizeAwsSdk() refuses to ShutdownAPI while sessions exist. - g_active_session_count.fetch_add(1, std::memory_order_relaxed); + AwsSdkLifecycle::Instance().IncrementSessionCount(); } SigV4AuthSession::~SigV4AuthSession() { - g_active_session_count.fetch_sub(1, std::memory_order_relaxed); + AwsSdkLifecycle::Instance().DecrementSessionCount(); } Result SigV4AuthSession::Authenticate(const HttpRequest& request) { @@ -191,7 +236,7 @@ SigV4AuthManager::~SigV4AuthManager() = default; Result> SigV4AuthManager::InitSession( HttpClient& init_client, const std::unordered_map& properties) { - ICEBERG_RETURN_UNEXPECTED(EnsureSdkInitialized()); + ICEBERG_RETURN_UNEXPECTED(AwsSdkLifecycle::Instance().EnsureInitialized()); ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->InitSession(init_client, properties)); return WrapSession(std::move(delegate_session), properties); @@ -200,7 +245,7 @@ Result> SigV4AuthManager::InitSession( Result> SigV4AuthManager::CatalogSession( HttpClient& shared_client, const std::unordered_map& properties) { - ICEBERG_RETURN_UNEXPECTED(EnsureSdkInitialized()); + ICEBERG_RETURN_UNEXPECTED(AwsSdkLifecycle::Instance().EnsureInitialized()); catalog_properties_ = properties; ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->CatalogSession(shared_client, properties)); @@ -325,34 +370,13 @@ Result> MakeSigV4AuthManager( return std::make_unique(std::move(delegate)); } -Status InitializeAwsSdk() { - std::lock_guard lock(g_lifecycle_mutex); - auto state = g_state.load(); - if (state == LifecycleState::kInitialized) return {}; - if (state == LifecycleState::kFinalized) { - return InvalidArgument("AWS SDK has already been finalized; cannot reinitialize"); - } - Aws::InitAPI(g_sdk_options); - g_state.store(LifecycleState::kInitialized); - return {}; -} +Status InitializeAwsSdk() { return AwsSdkLifecycle::Instance().Initialize(); } -Status FinalizeAwsSdk() { - std::lock_guard lock(g_lifecycle_mutex); - if (g_state.load() != LifecycleState::kInitialized) return {}; - auto live = g_active_session_count.load(); - if (live != 0) { - return Invalid( - "Cannot finalize AWS SDK while {} SigV4 auth session(s) are still alive", live); - } - Aws::ShutdownAPI(g_sdk_options); - g_state.store(LifecycleState::kFinalized); - return {}; -} +Status FinalizeAwsSdk() { return AwsSdkLifecycle::Instance().Finalize(); } -bool IsAwsSdkInitialized() { return g_state.load() == LifecycleState::kInitialized; } +bool IsAwsSdkInitialized() { return AwsSdkLifecycle::Instance().IsInitialized(); } -bool IsAwsSdkFinalized() { return g_state.load() == LifecycleState::kFinalized; } +bool IsAwsSdkFinalized() { return AwsSdkLifecycle::Instance().IsFinalized(); } } // namespace iceberg::rest::auth From 774de56d6248a146e2099a2600b2d496f38eddd9 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Fri, 22 May 2026 00:23:57 +0800 Subject: [PATCH 16/23] address review feedback --- .../workflows/{s3_test.yml => aws_test.yml} | 44 ++++-- .github/workflows/sigv4_test.yml | 71 --------- ci/scripts/build_iceberg.sh | 3 - .../IcebergThirdpartyToolchain.cmake | 5 + src/iceberg/catalog/rest/CMakeLists.txt | 1 + src/iceberg/catalog/rest/auth/auth_manager.cc | 3 +- src/iceberg/catalog/rest/auth/auth_manager.h | 6 +- .../catalog/rest/auth/auth_managers.cc | 16 +- .../catalog/rest/auth/auth_properties.h | 3 + src/iceberg/catalog/rest/auth/meson.build | 1 + .../catalog/rest/auth/session_context.h | 39 +++++ .../catalog/rest/auth/sigv4_auth_manager.cc | 145 +++++++++++++----- .../rest/auth/sigv4_auth_manager_internal.h | 14 +- src/iceberg/catalog/rest/http_client.cc | 10 +- src/iceberg/catalog/rest/http_request.cc | 83 ++++++++++ src/iceberg/catalog/rest/http_request.h | 71 ++++++++- src/iceberg/catalog/rest/meson.build | 1 + src/iceberg/iceberg-config.cmake.in | 13 +- src/iceberg/test/sigv4_auth_test.cc | 30 +++- 19 files changed, 402 insertions(+), 157 deletions(-) rename .github/workflows/{s3_test.yml => aws_test.yml} (61%) delete mode 100644 .github/workflows/sigv4_test.yml create mode 100644 src/iceberg/catalog/rest/auth/session_context.h create mode 100644 src/iceberg/catalog/rest/http_request.cc diff --git a/.github/workflows/s3_test.yml b/.github/workflows/aws_test.yml similarity index 61% rename from .github/workflows/s3_test.yml rename to .github/workflows/aws_test.yml index 0cf8e8b1e..ba8268b81 100644 --- a/.github/workflows/s3_test.yml +++ b/.github/workflows/aws_test.yml @@ -15,8 +15,10 @@ # specific language governing permissions and limitations # under the License. -# S3-backed tests against MinIO (Linux and macOS only). -name: S3 Tests +# AWS-related tests. ICEBERG_S3 (Arrow's bundled AWS SDK) and ICEBERG_SIGV4 +# (vcpkg-installed aws-cpp-sdk-core) are tested in separate jobs: linking +# both into one binary causes ODR conflicts on the shared AWS SDK symbols. +name: AWS Tests on: push: @@ -39,21 +41,31 @@ env: ICEBERG_HOME: /tmp/iceberg jobs: - s3-minio: + aws: if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }} - name: S3 (${{ matrix.title }}) + name: AWS (${{ matrix.title }}) runs-on: ${{ matrix.runs-on }} - timeout-minutes: 35 + timeout-minutes: 45 strategy: fail-fast: false matrix: include: - - title: AMD64 Ubuntu 24.04 + - title: AMD64 Ubuntu 24.04, S3 runs-on: ubuntu-24.04 CC: gcc-14 CXX: g++-14 - - title: AArch64 macOS 26 + s3: "ON" + sigv4: "OFF" + - title: AMD64 Ubuntu 24.04, SigV4 + runs-on: ubuntu-24.04 + CC: gcc-14 + CXX: g++-14 + s3: "OFF" + sigv4: "ON" + - title: AArch64 macOS 26, S3 runs-on: macos-26 + s3: "ON" + sigv4: "OFF" env: ICEBERG_TEST_S3_URI: s3://iceberg-test AWS_ACCESS_KEY_ID: minio @@ -70,14 +82,28 @@ jobs: if: ${{ startsWith(matrix.runs-on, 'ubuntu') }} shell: bash run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev + - name: Cache vcpkg packages + if: ${{ matrix.sigv4 == 'ON' }} + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + id: vcpkg-cache + with: + path: /usr/local/share/vcpkg/installed + key: vcpkg-x64-linux-aws-sdk-cpp-core-${{ hashFiles('.github/workflows/aws_test.yml') }} + - name: Install AWS SDK via vcpkg + if: ${{ matrix.sigv4 == 'ON' && steps.vcpkg-cache.outputs.cache-hit != 'true' }} + shell: bash + run: vcpkg install aws-sdk-cpp[core]:x64-linux - name: Set Ubuntu Compilers if: ${{ startsWith(matrix.runs-on, 'ubuntu') }} run: | echo "CC=${{ matrix.CC }}" >> $GITHUB_ENV echo "CXX=${{ matrix.CXX }}" >> $GITHUB_ENV - name: Start MinIO + if: ${{ matrix.s3 == 'ON' }} shell: bash run: bash ci/scripts/start_minio.sh - - name: Build and test Iceberg with S3 + - name: Build and test Iceberg shell: bash - run: ci/scripts/build_iceberg.sh "$(pwd)" OFF OFF ON + env: + CMAKE_TOOLCHAIN_FILE: ${{ matrix.sigv4 == 'ON' && '/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake' || '' }} + run: ci/scripts/build_iceberg.sh "$(pwd)" OFF OFF ${{ matrix.s3 }} ${{ matrix.sigv4 }} diff --git a/.github/workflows/sigv4_test.yml b/.github/workflows/sigv4_test.yml deleted file mode 100644 index f9ee3b9c9..000000000 --- a/.github/workflows/sigv4_test.yml +++ /dev/null @@ -1,71 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# SigV4 build + unit tests (Linux only; aws-cpp-sdk-core via vcpkg). -name: SigV4 Tests - -on: - push: - branches: - - '**' - - '!dependabot/**' - tags: - - '**' - pull_request: - -concurrency: - group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} - cancel-in-progress: true - -permissions: - contents: read - -env: - ICEBERG_HOME: /tmp/iceberg - -jobs: - sigv4: - name: SigV4 (AMD64 Ubuntu 24.04) - runs-on: ubuntu-24.04 - timeout-minutes: 35 - env: - CC: gcc-14 - CXX: g++-14 - AWS_EC2_METADATA_DISABLED: "TRUE" - steps: - - name: Checkout iceberg-cpp - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - name: Install dependencies - shell: bash - run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev - - name: Cache vcpkg packages - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 - id: vcpkg-cache - with: - path: /usr/local/share/vcpkg/installed - key: vcpkg-x64-linux-aws-sdk-cpp-core-${{ hashFiles('.github/workflows/sigv4_test.yml') }} - - name: Install AWS SDK via vcpkg - if: steps.vcpkg-cache.outputs.cache-hit != 'true' - shell: bash - run: vcpkg install aws-sdk-cpp[core]:x64-linux - - name: Build and test Iceberg with SigV4 - shell: bash - env: - CMAKE_TOOLCHAIN_FILE: /usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake - run: ci/scripts/build_iceberg.sh "$(pwd)" OFF OFF OFF ON diff --git a/ci/scripts/build_iceberg.sh b/ci/scripts/build_iceberg.sh index 9e2756ef5..330e5946a 100755 --- a/ci/scripts/build_iceberg.sh +++ b/ci/scripts/build_iceberg.sh @@ -61,9 +61,6 @@ if is_windows; then CMAKE_ARGS+=("-DCMAKE_BUILD_TYPE=Release") else CMAKE_ARGS+=("-DCMAKE_BUILD_TYPE=Debug") - if [[ -n "${CMAKE_TOOLCHAIN_FILE:-}" ]]; then - CMAKE_ARGS+=("-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") - fi fi if [[ "${build_enable_sccache}" == "ON" ]]; then diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 56f800786..60ab973df 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -643,6 +643,11 @@ function(resolve_aws_sdk_dependency) set(ICEBERG_SYSTEM_DEPENDENCIES ${ICEBERG_SYSTEM_DEPENDENCIES} PARENT_SCOPE) + # Forwarded to find_dependency(AWSSDK ...) in iceberg-config.cmake.in so + # downstream installed builds load aws-cpp-sdk-core via AWSSDK_FIND_COMPONENTS. + set(ICEBERG_FIND_EXTRA_ARGS_AWSSDK + "COMPONENTS;core" + PARENT_SCOPE) endfunction() if(ICEBERG_SIGV4) diff --git a/src/iceberg/catalog/rest/CMakeLists.txt b/src/iceberg/catalog/rest/CMakeLists.txt index 2c0f28b99..e8f595474 100644 --- a/src/iceberg/catalog/rest/CMakeLists.txt +++ b/src/iceberg/catalog/rest/CMakeLists.txt @@ -29,6 +29,7 @@ set(ICEBERG_REST_SOURCES endpoint.cc error_handlers.cc http_client.cc + http_request.cc json_serde.cc resource_paths.cc rest_catalog.cc diff --git a/src/iceberg/catalog/rest/auth/auth_manager.cc b/src/iceberg/catalog/rest/auth/auth_manager.cc index 247c1d538..10290489a 100644 --- a/src/iceberg/catalog/rest/auth/auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/auth_manager.cc @@ -38,8 +38,7 @@ Result> AuthManager::InitSession( } Result> AuthManager::ContextualSession( - [[maybe_unused]] const std::unordered_map& context, - std::shared_ptr parent) { + [[maybe_unused]] const SessionContext& context, std::shared_ptr parent) { // By default, return the parent session as-is return parent; } diff --git a/src/iceberg/catalog/rest/auth/auth_manager.h b/src/iceberg/catalog/rest/auth/auth_manager.h index c192ecf2c..e0acebab2 100644 --- a/src/iceberg/catalog/rest/auth/auth_manager.h +++ b/src/iceberg/catalog/rest/auth/auth_manager.h @@ -23,6 +23,7 @@ #include #include +#include "iceberg/catalog/rest/auth/session_context.h" #include "iceberg/catalog/rest/iceberg_rest_export.h" #include "iceberg/catalog/rest/type_fwd.h" #include "iceberg/result.h" @@ -70,13 +71,12 @@ class ICEBERG_REST_EXPORT AuthManager { /// This method is used by SessionCatalog to create sessions for different contexts /// (e.g., different users or tenants). /// - /// \param context Context properties (e.g., user credentials, tenant info). + /// \param context Per-session properties and credentials. /// \param parent Catalog session to inherit from or return as-is. /// \return A context-specific session, or the parent session if no context-specific /// session is needed, or an error if session creation fails. virtual Result> ContextualSession( - const std::unordered_map& context, - std::shared_ptr parent); + const SessionContext& context, std::shared_ptr parent); /// \brief Create or reuse a session scoped to a single table/view. /// diff --git a/src/iceberg/catalog/rest/auth/auth_managers.cc b/src/iceberg/catalog/rest/auth/auth_managers.cc index 8c17af285..6ee2637b3 100644 --- a/src/iceberg/catalog/rest/auth/auth_managers.cc +++ b/src/iceberg/catalog/rest/auth/auth_managers.cc @@ -46,6 +46,12 @@ const std::unordered_set& KnownAuthTypes() // Infer the authentication type from properties. std::string InferAuthType( const std::unordered_map& properties) { + // Deprecated alias: rest.sigv4-enabled=true forces SigV4. + if (auto it = properties.find(AuthProperties::kSigV4Enabled); + it != properties.end() && StringUtils::EqualsIgnoreCase(it->second, "true")) { + return AuthProperties::kAuthTypeSigV4; + } + auto it = properties.find(AuthProperties::kAuthType); if (it != properties.end() && !it->second.empty()) { return StringUtils::ToLower(it->second); @@ -61,8 +67,8 @@ std::string InferAuthType( return AuthProperties::kAuthTypeNone; } -AuthManagerRegistry CreateDefaultRegistry() { - AuthManagerRegistry registry = { +AuthManagerRegistry& GetRegistry() { + static AuthManagerRegistry registry = { {AuthProperties::kAuthTypeNone, MakeNoopAuthManager}, {AuthProperties::kAuthTypeBasic, MakeBasicAuthManager}, {AuthProperties::kAuthTypeOAuth2, MakeOAuth2Manager}, @@ -71,12 +77,6 @@ AuthManagerRegistry CreateDefaultRegistry() { return registry; } -// Get the global registry of auth manager factories. -AuthManagerRegistry& GetRegistry() { - static AuthManagerRegistry registry = CreateDefaultRegistry(); - return registry; -} - } // namespace void AuthManagers::Register(std::string_view auth_type, AuthManagerFactory factory) { diff --git a/src/iceberg/catalog/rest/auth/auth_properties.h b/src/iceberg/catalog/rest/auth/auth_properties.h index f6dfc4ae8..745cb1ff3 100644 --- a/src/iceberg/catalog/rest/auth/auth_properties.h +++ b/src/iceberg/catalog/rest/auth/auth_properties.h @@ -54,6 +54,9 @@ class ICEBERG_REST_EXPORT AuthProperties : public ConfigBase { // ---- SigV4 entries ---- + /// Deprecated: `rest.sigv4-enabled=true` selects SigV4 regardless of + /// `rest.auth.type`. + inline static const std::string kSigV4Enabled = "rest.sigv4-enabled"; inline static const std::string kSigV4DelegateAuthType = "rest.auth.sigv4.delegate-auth-type"; inline static const std::string kSigV4SigningRegion = "rest.signing-region"; diff --git a/src/iceberg/catalog/rest/auth/meson.build b/src/iceberg/catalog/rest/auth/meson.build index 790c85f6a..954bc4d02 100644 --- a/src/iceberg/catalog/rest/auth/meson.build +++ b/src/iceberg/catalog/rest/auth/meson.build @@ -23,6 +23,7 @@ install_headers( 'auth_session.h', 'aws_sdk.h', 'oauth2_util.h', + 'session_context.h', ], subdir: 'iceberg/catalog/rest/auth', ) diff --git a/src/iceberg/catalog/rest/auth/session_context.h b/src/iceberg/catalog/rest/auth/session_context.h new file mode 100644 index 000000000..069eeb95d --- /dev/null +++ b/src/iceberg/catalog/rest/auth/session_context.h @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "iceberg/catalog/rest/iceberg_rest_export.h" + +namespace iceberg::rest::auth { + +/// \brief Per-session context passed to AuthManager::ContextualSession. +/// +/// Mirrors Java's `SessionCatalog.SessionContext`. Separate `properties` and +/// `credentials` so per-context credential overrides don't silently collapse +/// into properties. +struct ICEBERG_REST_EXPORT SessionContext { + std::unordered_map properties; + std::unordered_map credentials; +}; + +} // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index b91fbed1f..15b3336fa 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -37,6 +37,7 @@ # include "iceberg/catalog/rest/auth/auth_managers.h" # include "iceberg/catalog/rest/auth/auth_properties.h" +# include "iceberg/catalog/rest/auth/oauth2_util.h" # include "iceberg/util/macros.h" # include "iceberg/util/string_util.h" @@ -66,10 +67,10 @@ class AwsSdkLifecycle { Status Finalize() { std::lock_guard lock(mutex_); if (state_.load() != State::kInitialized) return {}; - auto live = active_session_count_.load(); - if (live != 0) { + if (active_session_count_ != 0) { return Invalid( - "Cannot finalize AWS SDK while {} SigV4 auth session(s) are still alive", live); + "Cannot finalize AWS SDK while {} SigV4 auth session(s) are still alive", + active_session_count_); } Aws::ShutdownAPI(options_); state_.store(State::kFinalized); @@ -84,11 +85,21 @@ class AwsSdkLifecycle { bool IsInitialized() const { return state_.load() == State::kInitialized; } bool IsFinalized() const { return state_.load() == State::kFinalized; } - void IncrementSessionCount() { - active_session_count_.fetch_add(1, std::memory_order_relaxed); + // Holds the mutex while incrementing, so Finalize() can never observe a + // stale 0 between its count check and Aws::ShutdownAPI. + Status RegisterSession() { + std::lock_guard lock(mutex_); + if (state_.load() != State::kInitialized) { + return InvalidArgument( + "AWS SDK is not initialized; cannot create a SigV4AuthSession"); + } + ++active_session_count_; + return {}; } - void DecrementSessionCount() { - active_session_count_.fetch_sub(1, std::memory_order_relaxed); + + void UnregisterSession() { + std::lock_guard lock(mutex_); + --active_session_count_; } private: @@ -99,7 +110,7 @@ class AwsSdkLifecycle { std::atomic state_{State::kUninitialized}; std::mutex mutex_; Aws::SDKOptions options_; - std::atomic active_session_count_{0}; + size_t active_session_count_{0}; // guarded by mutex_ }; Aws::Http::HttpMethod ToAwsMethod(HttpMethod method) { @@ -157,13 +168,9 @@ SigV4AuthSession::SigV4AuthSession( signing_name_(std::move(signing_name)), credentials_provider_(std::move(credentials_provider)), signer_(std::make_unique( - credentials_provider_, signing_name_.c_str(), signing_region_.c_str())) { - AwsSdkLifecycle::Instance().IncrementSessionCount(); -} + credentials_provider_, signing_name_.c_str(), signing_region_.c_str())) {} -SigV4AuthSession::~SigV4AuthSession() { - AwsSdkLifecycle::Instance().DecrementSessionCount(); -} +SigV4AuthSession::~SigV4AuthSession() { AwsSdkLifecycle::Instance().UnregisterSession(); } Result SigV4AuthSession::Authenticate(const HttpRequest& request) { ICEBERG_ASSIGN_OR_RAISE(auto delegate_request, delegate_->Authenticate(request)); @@ -171,7 +178,7 @@ Result SigV4AuthSession::Authenticate(const HttpRequest& request) { std::unordered_map signing_headers; for (const auto& [name, value] : original_headers) { - if (StringUtils::EqualsIgnoreCase(name, "Authorization")) { + if (StringUtils::EqualsIgnoreCase(name, kAuthorizationHeader)) { signing_headers[std::string(kRelocatedHeaderPrefix) + name] = value; } else { signing_headers[name] = value; @@ -204,21 +211,34 @@ Result SigV4AuthSession::Authenticate(const HttpRequest& request) { .message = "SigV4 signing failed"}); } + // Build a case-insensitive index of original headers once so the outer + // loop over signed headers below is O(N + M) instead of O(N * M). + std::unordered_map> originals_by_name; + for (const auto& [orig_name, orig_value] : original_headers) { + originals_by_name[StringUtils::ToLower(orig_name)].push_back(&orig_value); + } + HttpRequest signed_request{.method = delegate_request.method, .url = std::move(delegate_request.url), .headers = {}, .body = std::move(delegate_request.body)}; + signed_request.headers.reserve(aws_request->GetHeaders().size() + + original_headers.size()); for (const auto& [aws_name, aws_value] : aws_request->GetHeaders()) { std::string name(aws_name.c_str(), aws_name.size()); std::string value(aws_value.c_str(), aws_value.size()); - for (const auto& [orig_name, orig_value] : original_headers) { - if (StringUtils::EqualsIgnoreCase(orig_name, name) && orig_value != value) { - signed_request.headers[std::string(kRelocatedHeaderPrefix) + orig_name] = - orig_value; - break; + if (auto it = originals_by_name.find(StringUtils::ToLower(name)); + it != originals_by_name.end()) { + // Preserve every original entry with this name whose value the signer + // didn't produce, matching Java updateRequestHeaders. + for (const auto* orig_value : it->second) { + if (*orig_value != value) { + signed_request.headers.add(std::string(kRelocatedHeaderPrefix) + name, + *orig_value); + } } } - signed_request.headers[std::move(name)] = std::move(value); + signed_request.headers.add(std::move(name), std::move(value)); } return signed_request; @@ -252,14 +272,11 @@ Result> SigV4AuthManager::CatalogSession( return WrapSession(std::move(delegate_session), properties); } -// Contextual and table sessions both merge against the stored catalog -// properties, matching Java's RESTSigV4AuthManager. Contextual overrides do -// not propagate into child table sessions; the two derivations are -// independent dimensions on top of the catalog baseline. +// Both derived sessions merge against the stored catalog_properties_, so +// contextual overrides do not propagate into child table sessions. Result> SigV4AuthManager::ContextualSession( - const std::unordered_map& context, - std::shared_ptr parent) { + const SessionContext& context, std::shared_ptr parent) { auto sigv4_parent = std::dynamic_pointer_cast(std::move(parent)); ICEBERG_PRECHECK(sigv4_parent != nullptr, "SigV4AuthManager parent must be a SigV4AuthSession"); @@ -267,8 +284,12 @@ Result> SigV4AuthManager::ContextualSession( ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->ContextualSession( context, sigv4_parent->delegate())); - auto merged = MergeProperties(catalog_properties_, context); - return WrapSession(std::move(delegate_session), merged); + // Merge context.credentials into properties so credential overrides aren't + // dropped. + auto merged = MergeProperties(catalog_properties_, + MergeProperties(context.properties, context.credentials)); + return WrapSession(std::move(delegate_session), merged, + sigv4_parent->credentials_provider()); } Result> SigV4AuthManager::TableSession( @@ -284,7 +305,8 @@ Result> SigV4AuthManager::TableSession( delegate_->TableSession(table, properties, sigv4_parent->delegate())); auto merged = MergeProperties(catalog_properties_, properties); - return WrapSession(std::move(delegate_session), merged); + return WrapSession(std::move(delegate_session), merged, + sigv4_parent->credentials_provider()); } Status SigV4AuthManager::Close() { return delegate_->Close(); } @@ -322,10 +344,12 @@ std::string SigV4AuthManager::ResolveSigningRegion( it != properties.end() && !it->second.empty()) { return it->second; } - // Delegates the full resolution chain (AWS_DEFAULT_REGION / AWS_REGION env, - // ~/.aws/config profile, EC2/ECS IMDS, fallback us-east-1) to the AWS SDK. - // Set AWS_EC2_METADATA_DISABLED=true to skip IMDS on non-EC2 hosts. - return {Aws::Client::ClientConfiguration().region.c_str()}; + // ClientConfiguration() walks env / profile / IMDS / us-east-1; the IMDS + // step can block for seconds on non-EC2 hosts. Resolve once per process + // (set AWS_EC2_METADATA_DISABLED=true to skip IMDS). + static const std::string kSdkResolvedRegion = + std::string(Aws::Client::ClientConfiguration().region.c_str()); + return kSdkResolvedRegion; } std::string SigV4AuthManager::ResolveSigningName( @@ -337,15 +361,56 @@ std::string SigV4AuthManager::ResolveSigningName( return AuthProperties::kSigV4SigningNameDefault; } +namespace { + +// RAII guard so any throw between RegisterSession() and the successful +// SigV4AuthSession construction unwinds the session count. +class SessionSlot { + public: + static Result Reserve() { + ICEBERG_RETURN_UNEXPECTED(AwsSdkLifecycle::Instance().RegisterSession()); + return SessionSlot{}; + } + SessionSlot(SessionSlot&& other) noexcept : armed_(other.armed_) { + other.armed_ = false; + } + SessionSlot& operator=(SessionSlot&&) = delete; + ~SessionSlot() { + if (armed_) AwsSdkLifecycle::Instance().UnregisterSession(); + } + void Release() noexcept { armed_ = false; } + + private: + SessionSlot() = default; + bool armed_ = true; +}; + +} // namespace + Result> SigV4AuthManager::WrapSession( std::shared_ptr delegate_session, - const std::unordered_map& properties) { + const std::unordered_map& properties, + std::shared_ptr reuse_credentials) { + ICEBERG_ASSIGN_OR_RAISE(auto slot, SessionSlot::Reserve()); auto region = ResolveSigningRegion(properties); auto service = ResolveSigningName(properties); - ICEBERG_ASSIGN_OR_RAISE(auto credentials, MakeCredentialsProvider(properties)); - return std::make_shared(std::move(delegate_session), - std::move(region), std::move(service), - std::move(credentials)); + + // Reuse the parent's provider unless properties override keys, avoiding a + // fresh DefaultAWSCredentialsProviderChain (can hit IMDS) per derivation. + auto explicit_keys = properties.find(AuthProperties::kSigV4AccessKeyId); + bool has_explicit_keys = + explicit_keys != properties.end() && !explicit_keys->second.empty(); + std::shared_ptr credentials; + if (reuse_credentials && !has_explicit_keys) { + credentials = std::move(reuse_credentials); + } else { + ICEBERG_ASSIGN_OR_RAISE(credentials, MakeCredentialsProvider(properties)); + } + auto session = + std::make_shared(std::move(delegate_session), std::move(region), + std::move(service), std::move(credentials)); + slot.Release(); + return session; } Result> MakeSigV4AuthManager( @@ -366,6 +431,8 @@ Result> MakeSigV4AuthManager( auto delegate_props = properties; delegate_props[AuthProperties::kAuthType] = delegate_type; + // Strip the legacy flag so the recursive Load doesn't bounce back to SigV4. + delegate_props.erase(AuthProperties::kSigV4Enabled); ICEBERG_ASSIGN_OR_RAISE(auto delegate, AuthManagers::Load(name, delegate_props)); return std::make_unique(std::move(delegate)); } diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h b/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h index 8334128e7..f0d921b7e 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h @@ -71,6 +71,12 @@ class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { const std::shared_ptr& delegate() const { return delegate_; } + /// Exposed so derived sessions can reuse the chain instead of constructing + /// a fresh DefaultAWSCredentialsProviderChain per derivation. + const std::shared_ptr& credentials_provider() const { + return credentials_provider_; + } + private: std::shared_ptr delegate_; std::string signing_region_; @@ -96,8 +102,7 @@ class ICEBERG_REST_EXPORT SigV4AuthManager : public AuthManager { const std::unordered_map& properties) override; Result> ContextualSession( - const std::unordered_map& context, - std::shared_ptr parent) override; + const SessionContext& context, std::shared_ptr parent) override; Result> TableSession( const TableIdentifier& table, @@ -113,9 +118,12 @@ class ICEBERG_REST_EXPORT SigV4AuthManager : public AuthManager { const std::unordered_map& properties); static std::string ResolveSigningName( const std::unordered_map& properties); + /// \param reuse_credentials If non-null and `properties` has no explicit + /// access keys, this provider is reused instead of building a new one. Result> WrapSession( std::shared_ptr delegate_session, - const std::unordered_map& properties); + const std::unordered_map& properties, + std::shared_ptr reuse_credentials = nullptr); std::unique_ptr delegate_; std::unordered_map catalog_properties_; diff --git a/src/iceberg/catalog/rest/http_client.cc b/src/iceberg/catalog/rest/http_client.cc index 650b09148..2fb29e82f 100644 --- a/src/iceberg/catalog/rest/http_client.cc +++ b/src/iceberg/catalog/rest/http_client.cc @@ -71,12 +71,16 @@ namespace { constexpr std::string_view kRestExceptionType = "RESTException"; /// \brief Merge default headers with per-request headers (per-request wins). -std::unordered_map MergeHeaders( +HttpHeaders MergeHeaders( const std::unordered_map& default_headers, const std::unordered_map& request_headers) { - std::unordered_map merged(default_headers); + HttpHeaders merged; + merged.reserve(default_headers.size() + request_headers.size()); + for (const auto& [key, val] : default_headers) { + merged.try_emplace(key, val); + } for (const auto& [key, val] : request_headers) { - merged.insert_or_assign(key, val); + merged[key] = val; } return merged; } diff --git a/src/iceberg/catalog/rest/http_request.cc b/src/iceberg/catalog/rest/http_request.cc new file mode 100644 index 000000000..4b4880e4c --- /dev/null +++ b/src/iceberg/catalog/rest/http_request.cc @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/catalog/rest/http_request.h" + +#include +#include + +#include "iceberg/util/string_util.h" + +namespace iceberg::rest { + +HttpHeaders::iterator HttpHeaders::find(std::string_view name) { + return std::ranges::find_if(entries_, [name](const Entry& e) { + return StringUtils::EqualsIgnoreCase(e.first, name); + }); +} + +HttpHeaders::const_iterator HttpHeaders::find(std::string_view name) const { + return std::ranges::find_if(entries_, [name](const Entry& e) { + return StringUtils::EqualsIgnoreCase(e.first, name); + }); +} + +std::string& HttpHeaders::at(std::string_view name) { + auto it = find(name); + if (it == entries_.end()) { + throw std::out_of_range("HttpHeaders::at: no header named '" + std::string(name) + + "'"); + } + return it->second; +} + +const std::string& HttpHeaders::at(std::string_view name) const { + auto it = find(name); + if (it == entries_.end()) { + throw std::out_of_range("HttpHeaders::at: no header named '" + std::string(name) + + "'"); + } + return it->second; +} + +std::string& HttpHeaders::operator[](std::string_view name) { + auto it = find(name); + if (it == entries_.end()) { + entries_.emplace_back(std::string(name), std::string{}); + return entries_.back().second; + } + return it->second; +} + +void HttpHeaders::try_emplace(std::string name, std::string value) { + if (find(name) == entries_.end()) { + entries_.emplace_back(std::move(name), std::move(value)); + } +} + +std::size_t HttpHeaders::erase(std::string_view name) { + auto removed = std::ranges::remove_if(entries_, [name](const Entry& e) { + return StringUtils::EqualsIgnoreCase(e.first, name); + }); + std::size_t count = removed.size(); + entries_.erase(removed.begin(), removed.end()); + return count; +} + +} // namespace iceberg::rest diff --git a/src/iceberg/catalog/rest/http_request.h b/src/iceberg/catalog/rest/http_request.h index 90002ae3d..126fbf034 100644 --- a/src/iceberg/catalog/rest/http_request.h +++ b/src/iceberg/catalog/rest/http_request.h @@ -20,9 +20,12 @@ #pragma once #include +#include +#include #include #include -#include +#include +#include #include "iceberg/catalog/rest/iceberg_rest_export.h" @@ -34,12 +37,76 @@ enum class HttpMethod : uint8_t { kGet, kPost, kPut, kDelete, kHead }; /// \brief Convert HttpMethod to string representation. constexpr std::string_view ToString(HttpMethod method); +/// \brief Ordered collection of HTTP headers preserving repeated values. +/// +/// Name comparison is case-insensitive (RFC 7230), insertion order is +/// preserved, and multiple entries with the same name coexist. The map-style +/// methods (`operator[]`, `at`, `try_emplace`, `find`) act on the *first* +/// matching entry; `add` appends a new entry even when the name already +/// exists. Not thread-safe. `add`, `try_emplace`, `operator[]` (when +/// inserting) and `erase` invalidate iterators. +class ICEBERG_REST_EXPORT HttpHeaders { + public: + using Entry = std::pair; + using container_type = std::vector; + using iterator = container_type::iterator; + using const_iterator = container_type::const_iterator; + + HttpHeaders() = default; + HttpHeaders(std::initializer_list init) : entries_(init) {} + + iterator begin() noexcept { return entries_.begin(); } + iterator end() noexcept { return entries_.end(); } + const_iterator begin() const noexcept { return entries_.begin(); } + const_iterator end() const noexcept { return entries_.end(); } + const_iterator cbegin() const noexcept { return entries_.cbegin(); } + const_iterator cend() const noexcept { return entries_.cend(); } + + bool empty() const noexcept { return entries_.empty(); } + std::size_t size() const noexcept { return entries_.size(); } + void clear() noexcept { entries_.clear(); } + void reserve(std::size_t n) { entries_.reserve(n); } + + /// \brief Case-insensitive lookup. Returns iterator to the first entry whose + /// name matches, or end() if none. + iterator find(std::string_view name); + const_iterator find(std::string_view name) const; + + bool contains(std::string_view name) const { return find(name) != end(); } + + /// \brief Returns the value of the first entry with the given name. + /// Throws std::out_of_range if none. + std::string& at(std::string_view name); + const std::string& at(std::string_view name) const; + + /// \brief Map-like upsert: returns reference to the first matching entry's + /// value, inserting a new entry with empty value if none exists. + std::string& operator[](std::string_view name); + + /// \brief Insert only if no entry with the same name exists. + void try_emplace(std::string name, std::string value); + + /// \brief Append an entry, preserving any existing entries with the same + /// name. Use this when repeated headers must survive (e.g. multiple + /// Set-Cookie values). + void add(std::string name, std::string value) { + entries_.emplace_back(std::move(name), std::move(value)); + } + + /// \brief Remove all entries with the given name (case-insensitive). Returns + /// the number of entries removed. + std::size_t erase(std::string_view name); + + private: + container_type entries_; +}; + /// \brief An outgoing HTTP request. Mirrors Java's HttpRequest so signing /// implementations like SigV4 see method, url, headers, and body together. struct ICEBERG_REST_EXPORT HttpRequest { HttpMethod method = HttpMethod::kGet; std::string url; - std::unordered_map headers; + HttpHeaders headers; std::string body; }; diff --git a/src/iceberg/catalog/rest/meson.build b/src/iceberg/catalog/rest/meson.build index 23692a058..f0cbe81d5 100644 --- a/src/iceberg/catalog/rest/meson.build +++ b/src/iceberg/catalog/rest/meson.build @@ -26,6 +26,7 @@ iceberg_rest_sources = files( 'endpoint.cc', 'error_handlers.cc', 'http_client.cc', + 'http_request.cc', 'json_serde.cc', 'resource_paths.cc', 'rest_catalog.cc', diff --git a/src/iceberg/iceberg-config.cmake.in b/src/iceberg/iceberg-config.cmake.in index ae99d6640..dfb0e1dbc 100644 --- a/src/iceberg/iceberg-config.cmake.in +++ b/src/iceberg/iceberg-config.cmake.in @@ -38,6 +38,9 @@ set(ICEBERG_BUILD_STATIC "@ICEBERG_BUILD_STATIC@") set(ICEBERG_SYSTEM_DEPENDENCIES "@ICEBERG_SYSTEM_DEPENDENCIES@") +# Extra args forwarded to find_dependency() for specific dependencies. +set(ICEBERG_FIND_EXTRA_ARGS_AWSSDK "@ICEBERG_FIND_EXTRA_ARGS_AWSSDK@") + include(CMakeFindDependencyMacro) macro(iceberg_find_dependencies dependencies) @@ -49,7 +52,7 @@ macro(iceberg_find_dependencies dependencies) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}") foreach(dependency ${dependencies}) - find_dependency(${dependency}) + find_dependency(${dependency} ${ICEBERG_FIND_EXTRA_ARGS_${dependency}}) endforeach() if(DEFINED ICEBERG_CMAKE_MODULE_PATH_OLD) @@ -73,13 +76,7 @@ macro(iceberg_find_components components) endforeach() endmacro() -# AWSSDK's CMake config dispatches sub-package finds via AWSSDK_FIND_COMPONENTS, -# so a plain find_dependency would not bring in aws-cpp-sdk-core. -if("AWSSDK" IN_LIST ICEBERG_SYSTEM_DEPENDENCIES) - list(REMOVE_ITEM ICEBERG_SYSTEM_DEPENDENCIES AWSSDK) - include(CMakeFindDependencyMacro) - find_dependency(AWSSDK COMPONENTS core) -endif() +# Find system dependencies iceberg_find_dependencies("${ICEBERG_SYSTEM_DEPENDENCIES}") include("${CMAKE_CURRENT_LIST_DIR}/iceberg-targets.cmake") diff --git a/src/iceberg/test/sigv4_auth_test.cc b/src/iceberg/test/sigv4_auth_test.cc index a05a3f5a1..261e37dba 100644 --- a/src/iceberg/test/sigv4_auth_test.cc +++ b/src/iceberg/test/sigv4_auth_test.cc @@ -29,6 +29,7 @@ # include "iceberg/catalog/rest/auth/auth_properties.h" # include "iceberg/catalog/rest/auth/auth_session.h" # include "iceberg/catalog/rest/auth/aws_sdk.h" +# include "iceberg/catalog/rest/auth/session_context.h" # include "iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h" # include "iceberg/catalog/rest/http_client.h" # include "iceberg/table_identifier.h" @@ -188,6 +189,23 @@ TEST_F(SigV4AuthTest, CustomSigningNameAndRegion) { EXPECT_TRUE(auth_it->second.find("custom-service") != std::string::npos); } +TEST_F(SigV4AuthTest, LegacySigV4EnabledFlagSelectsSigV4) { + auto properties = MakeSigV4Properties(); + properties.erase(AuthProperties::kAuthType); + properties[AuthProperties::kSigV4Enabled] = "true"; + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; + auto auth_result = session_result.value()->Authenticate(request); + ASSERT_THAT(auth_result, IsOk()); + EXPECT_TRUE( + auth_result.value().headers.at("authorization").starts_with("AWS4-HMAC-SHA256")); +} + TEST_F(SigV4AuthTest, AuthTypeCaseInsensitive) { for (const auto& auth_type : {"SIGV4", "SigV4", "sigV4"}) { auto properties = MakeSigV4Properties(); @@ -431,11 +449,10 @@ TEST_F(SigV4AuthTest, ContextualSessionOverridesProperties) { auto catalog_session = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(catalog_session, IsOk()); - // Context overrides region and credentials - std::unordered_map context = { - {AuthProperties::kSigV4AccessKeyId, "id2"}, - {AuthProperties::kSigV4SecretAccessKey, "secret2"}, - {AuthProperties::kSigV4SigningRegion, "eu-west-1"}, + SessionContext context{ + .properties = {{AuthProperties::kSigV4SigningRegion, "eu-west-1"}}, + .credentials = {{AuthProperties::kSigV4AccessKeyId, "id2"}, + {AuthProperties::kSigV4SecretAccessKey, "secret2"}}, }; auto ctx_session = @@ -503,7 +520,8 @@ TEST_F(SigV4AuthTest, TableSessionIgnoresContextualOverrides) { ASSERT_THAT(catalog_session, IsOk()); auto ctx_session = manager_result.value()->ContextualSession( - {{AuthProperties::kSigV4SigningRegion, "eu-west-1"}}, catalog_session.value()); + SessionContext{.properties = {{AuthProperties::kSigV4SigningRegion, "eu-west-1"}}}, + catalog_session.value()); ASSERT_THAT(ctx_session, IsOk()); iceberg::TableIdentifier table_id{.ns = iceberg::Namespace{{"db1"}}, .name = "table1"}; From 0ee9ab70f2844e4af5b75c0386df5e1686d5b68a Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Sun, 7 Jun 2026 14:15:16 +0800 Subject: [PATCH 17/23] address review feedback: SigV4 region, session lifecycle, S3+SigV4, Meson --- .github/workflows/aws_test.yml | 85 +++++++++++++++++-- .../IcebergThirdpartyToolchain.cmake | 5 ++ src/iceberg/catalog/rest/auth/auth_session.cc | 7 +- .../catalog/rest/auth/sigv4_auth_manager.cc | 44 ++++++++-- .../rest/auth/sigv4_auth_manager_internal.h | 8 +- src/iceberg/catalog/rest/meson.build | 9 +- src/iceberg/test/auth_manager_test.cc | 6 +- src/iceberg/test/sigv4_auth_test.cc | 26 ++++++ 8 files changed, 168 insertions(+), 22 deletions(-) diff --git a/.github/workflows/aws_test.yml b/.github/workflows/aws_test.yml index ba8268b81..ccf10e292 100644 --- a/.github/workflows/aws_test.yml +++ b/.github/workflows/aws_test.yml @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. -# AWS-related tests. ICEBERG_S3 (Arrow's bundled AWS SDK) and ICEBERG_SIGV4 -# (vcpkg-installed aws-cpp-sdk-core) are tested in separate jobs: linking -# both into one binary causes ODR conflicts on the shared AWS SDK symbols. +# AWS-related tests. ICEBERG_S3 and ICEBERG_SIGV4 are exercised individually and +# together; with both on, Arrow's S3 reuses SigV4's system AWS SDK +# (AWSSDK_SOURCE=SYSTEM) so a single AWS SDK is linked (no ODR). name: AWS Tests on: @@ -62,6 +62,17 @@ jobs: CXX: g++-14 s3: "OFF" sigv4: "ON" + aws-sdk-features: core + - title: AMD64 Ubuntu 24.04, S3 + SigV4 + runs-on: ubuntu-24.04 + CC: gcc-14 + CXX: g++-14 + s3: "ON" + sigv4: "ON" + # Arrow's S3 filesystem consumes this same AWS SDK, so it needs the + # S3-related components in addition to core (config is required by + # Arrow's FindAWSSDKAlt). + aws-sdk-features: core,config,s3,identity-management,sts,transfer - title: AArch64 macOS 26, S3 runs-on: macos-26 s3: "ON" @@ -88,11 +99,21 @@ jobs: id: vcpkg-cache with: path: /usr/local/share/vcpkg/installed - key: vcpkg-x64-linux-aws-sdk-cpp-core-${{ hashFiles('.github/workflows/aws_test.yml') }} + key: vcpkg-x64-linux-aws-sdk-cpp-s3-${{ matrix.s3 }}-sigv4-${{ matrix.sigv4 }}-${{ hashFiles('.github/workflows/aws_test.yml') }} - name: Install AWS SDK via vcpkg if: ${{ matrix.sigv4 == 'ON' && steps.vcpkg-cache.outputs.cache-hit != 'true' }} shell: bash - run: vcpkg install aws-sdk-cpp[core]:x64-linux + # Retry to ride out transient GitHub/mirror download failures (504s). + run: | + for attempt in 1 2 3; do + if vcpkg install "aws-sdk-cpp[${{ matrix.aws-sdk-features }}]:x64-linux"; then + exit 0 + fi + echo "::warning::vcpkg install failed (attempt ${attempt}/3), retrying in 30s" + sleep 30 + done + echo "::error::vcpkg install failed after 3 attempts" + exit 1 - name: Set Ubuntu Compilers if: ${{ startsWith(matrix.runs-on, 'ubuntu') }} run: | @@ -107,3 +128,57 @@ jobs: env: CMAKE_TOOLCHAIN_FILE: ${{ matrix.sigv4 == 'ON' && '/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake' || '' }} run: ci/scripts/build_iceberg.sh "$(pwd)" OFF OFF ${{ matrix.s3 }} ${{ matrix.sigv4 }} + + # Exercise the Meson build with SigV4 enabled (resolves aws-cpp-sdk-core via + # its CMake config, not pkg-config whose Cflags force -std=c++11). + meson-sigv4: + if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }} + name: Meson SigV4 (AMD64 Ubuntu 24.04) + runs-on: ubuntu-24.04 + timeout-minutes: 45 + steps: + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.x' + - name: Checkout iceberg-cpp + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Install build dependencies + shell: bash + run: | + sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev + python3 -m pip install --upgrade pip + python3 -m pip install -r requirements.txt + - name: Cache vcpkg packages + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + id: vcpkg-cache + with: + path: /usr/local/share/vcpkg/installed + key: vcpkg-x64-linux-aws-sdk-cpp-core-${{ hashFiles('.github/workflows/aws_test.yml') }} + - name: Install AWS SDK via vcpkg + if: ${{ steps.vcpkg-cache.outputs.cache-hit != 'true' }} + shell: bash + # Retry to ride out transient GitHub/mirror download failures (504s). + run: | + for attempt in 1 2 3; do + if vcpkg install aws-sdk-cpp[core]:x64-linux; then + exit 0 + fi + echo "::warning::vcpkg install failed (attempt ${attempt}/3), retrying in 30s" + sleep 30 + done + echo "::error::vcpkg install failed after 3 attempts" + exit 1 + - name: Set Ubuntu Compilers + run: | + echo "CC=gcc-14" >> $GITHUB_ENV + echo "CXX=g++-14" >> $GITHUB_ENV + - name: Build and test Iceberg + shell: bash + env: + CMAKE_PREFIX_PATH: /usr/local/share/vcpkg/installed/x64-linux + run: | + meson setup builddir -Dsigv4=enabled + meson compile -C builddir + meson test -C builddir --timeout-multiplier 0 --print-errorlogs diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 60ab973df..451f970dd 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -110,6 +110,11 @@ function(resolve_arrow_dependency) set(ARROW_RUNTIME_SIMD_LEVEL "NONE") set(ARROW_POSITION_INDEPENDENT_CODE ON) set(ARROW_DEPENDENCY_SOURCE "BUNDLED") + # With SigV4 also on, make Arrow's S3 reuse the system AWS SDK (the one SigV4 + # finds) instead of bundling its own, so only one AWS SDK is linked (no ODR). + if(ICEBERG_S3 AND ICEBERG_SIGV4) + set(AWSSDK_SOURCE "SYSTEM") + endif() set(ARROW_WITH_ZLIB ON) set(ZLIB_SOURCE "SYSTEM") set(ARROW_VERBOSE_THIRDPARTY_BUILD OFF) diff --git a/src/iceberg/catalog/rest/auth/auth_session.cc b/src/iceberg/catalog/rest/auth/auth_session.cc index 22591c805..cea6bb1c9 100644 --- a/src/iceberg/catalog/rest/auth/auth_session.cc +++ b/src/iceberg/catalog/rest/auth/auth_session.cc @@ -78,12 +78,13 @@ class OAuth2AuthSession : public AuthSession, return session; } - Status Authenticate(std::unordered_map& headers) override { + Result Authenticate(const HttpRequest& request) override { + HttpRequest authenticated = request; std::shared_lock lock(mutex_); for (const auto& [key, value] : headers_) { - headers.try_emplace(key, value); + authenticated.headers.try_emplace(key, value); } - return {}; + return authenticated; } Status Close() override { return CloseImpl(); } diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index 15b3336fa..4139d282c 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -32,7 +32,9 @@ # include # include # include +# include # include +# include # include # include "iceberg/catalog/rest/auth/auth_managers.h" @@ -170,7 +172,11 @@ SigV4AuthSession::SigV4AuthSession( signer_(std::make_unique( credentials_provider_, signing_name_.c_str(), signing_region_.c_str())) {} -SigV4AuthSession::~SigV4AuthSession() { AwsSdkLifecycle::Instance().UnregisterSession(); } +SigV4AuthSession::~SigV4AuthSession() { + if (owns_sdk_registration_) { + AwsSdkLifecycle::Instance().UnregisterSession(); + } +} Result SigV4AuthSession::Authenticate(const HttpRequest& request) { ICEBERG_ASSIGN_OR_RAISE(auto delegate_request, delegate_->Authenticate(request)); @@ -338,18 +344,36 @@ SigV4AuthManager::MakeCredentialsProvider( return std::make_shared(); } -std::string SigV4AuthManager::ResolveSigningRegion( +Result SigV4AuthManager::ResolveSigningRegion( const std::unordered_map& properties) { if (auto it = properties.find(AuthProperties::kSigV4SigningRegion); it != properties.end() && !it->second.empty()) { return it->second; } - // ClientConfiguration() walks env / profile / IMDS / us-east-1; the IMDS - // step can block for seconds on non-EC2 hosts. Resolve once per process - // (set AWS_EC2_METADATA_DISABLED=true to skip IMDS). - static const std::string kSdkResolvedRegion = - std::string(Aws::Client::ClientConfiguration().region.c_str()); - return kSdkResolvedRegion; + // Resolve from env then the shared config profile (skip IMDS — it can block + // on non-EC2 hosts), and fail rather than silently defaulting to us-east-1. + // Resolved once per process. + static const std::string kResolvedRegion = []() -> std::string { + Aws::String region = Aws::Environment::GetEnv("AWS_REGION"); + if (region.empty()) { + region = Aws::Environment::GetEnv("AWS_DEFAULT_REGION"); + } + if (region.empty()) { + const auto& profiles = Aws::Config::GetCachedConfigProfiles(); + if (auto it = profiles.find(Aws::Auth::GetConfigProfileName()); + it != profiles.end()) { + region = it->second.GetRegion(); + } + } + return std::string(region.c_str()); + }(); + if (kResolvedRegion.empty()) { + return InvalidArgument( + "SigV4: could not resolve a signing region; set the '{}' property or the " + "AWS_REGION environment variable", + AuthProperties::kSigV4SigningRegion); + } + return kResolvedRegion; } std::string SigV4AuthManager::ResolveSigningName( @@ -392,7 +416,7 @@ Result> SigV4AuthManager::WrapSession( const std::unordered_map& properties, std::shared_ptr reuse_credentials) { ICEBERG_ASSIGN_OR_RAISE(auto slot, SessionSlot::Reserve()); - auto region = ResolveSigningRegion(properties); + ICEBERG_ASSIGN_OR_RAISE(auto region, ResolveSigningRegion(properties)); auto service = ResolveSigningName(properties); // Reuse the parent's provider unless properties override keys, avoiding a @@ -409,6 +433,8 @@ Result> SigV4AuthManager::WrapSession( auto session = std::make_shared(std::move(delegate_session), std::move(region), std::move(service), std::move(credentials)); + // The reserved slot's unregister responsibility now belongs to the session. + session->owns_sdk_registration_ = true; slot.Release(); return session; } diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h b/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h index f0d921b7e..9531acdca 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h @@ -78,11 +78,17 @@ class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { } private: + // WrapSession() reserves an AwsSdkLifecycle slot and transfers it here. + friend class SigV4AuthManager; + std::shared_ptr delegate_; std::string signing_region_; std::string signing_name_; std::shared_ptr credentials_provider_; std::unique_ptr signer_; + // Only WrapSession()-created sessions registered a slot; directly-constructed + // ones (e.g. tests) must not unregister and underflow the count. + bool owns_sdk_registration_ = false; }; /// \brief An AuthManager that produces SigV4AuthSession instances. @@ -114,7 +120,7 @@ class ICEBERG_REST_EXPORT SigV4AuthManager : public AuthManager { private: static Result> MakeCredentialsProvider(const std::unordered_map& properties); - static std::string ResolveSigningRegion( + static Result ResolveSigningRegion( const std::unordered_map& properties); static std::string ResolveSigningName( const std::unordered_map& properties); diff --git a/src/iceberg/catalog/rest/meson.build b/src/iceberg/catalog/rest/meson.build index f0cbe81d5..dad220013 100644 --- a/src/iceberg/catalog/rest/meson.build +++ b/src/iceberg/catalog/rest/meson.build @@ -47,7 +47,14 @@ iceberg_rest_build_deps = [iceberg_dep, cpr_dep] iceberg_rest_compile_defs = [] sigv4_opt = get_option('sigv4') -aws_sdk_core_dep = dependency('aws-cpp-sdk-core', required: sigv4_opt) +# Use the CMake config, not pkg-config: aws-cpp-sdk-core.pc Cflags force +# -std=c++11 -fno-exceptions, which would override the project's C++23 build. +aws_sdk_core_dep = dependency( + 'aws-cpp-sdk-core', + method: 'cmake', + modules: ['aws-cpp-sdk-core'], + required: sigv4_opt, +) if aws_sdk_core_dep.found() iceberg_rest_build_deps += aws_sdk_core_dep iceberg_rest_compile_defs += '-DICEBERG_SIGV4' diff --git a/src/iceberg/test/auth_manager_test.cc b/src/iceberg/test/auth_manager_test.cc index 85d15dcaa..184362904 100644 --- a/src/iceberg/test/auth_manager_test.cc +++ b/src/iceberg/test/auth_manager_test.cc @@ -496,9 +496,9 @@ TEST(OAuth2AuthSessionTest, InitialTokenIsUsed) { ASSERT_THAT(session_result, IsOk()); auto session = session_result.value(); - std::unordered_map headers; - ASSERT_THAT(session->Authenticate(headers), IsOk()); - EXPECT_EQ(headers["Authorization"], "Bearer initial-token-123"); + auto auth_result = session->Authenticate({}); + ASSERT_THAT(auth_result, IsOk()); + EXPECT_EQ(auth_result.value().headers.at("Authorization"), "Bearer initial-token-123"); session->Close(); } diff --git a/src/iceberg/test/sigv4_auth_test.cc b/src/iceberg/test/sigv4_auth_test.cc index 261e37dba..082c2e381 100644 --- a/src/iceberg/test/sigv4_auth_test.cc +++ b/src/iceberg/test/sigv4_auth_test.cc @@ -72,6 +72,32 @@ TEST_F(SigV4AuthTest, LifecycleFinalizeRefusesWhileSessionsAlive) { EXPECT_TRUE(IsAwsSdkInitialized()); } +TEST_F(SigV4AuthTest, DirectlyConstructedSessionDoesNotCorruptLifecycleCount) { + // A directly-constructed session never registered with AwsSdkLifecycle, so + // destroying it must not decrement the count. Otherwise it underflows, and a + // later real session wraps it back to zero, letting FinalizeAwsSdk() shut the + // SDK down while a session is still alive. + { + auto delegate = AuthSession::MakeDefault({}); + auto credentials = std::make_shared( + Aws::Auth::AWSCredentials("id", "secret")); + auto direct = std::make_shared(delegate, "us-east-1", "execute-api", + credentials); + } // destroyed here — must leave the lifecycle count untouched + + auto properties = MakeSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + // Exactly one live (registered) session, so Finalize must refuse. With the + // underflow bug the count would have wrapped and Finalize could wrongly + // succeed and shut the SDK down. + EXPECT_THAT(FinalizeAwsSdk(), IsError(ErrorKind::kInvalid)); + EXPECT_TRUE(IsAwsSdkInitialized()); +} + TEST_F(SigV4AuthTest, LoadSigV4AuthManager) { auto properties = MakeSigV4Properties(); auto manager_result = AuthManagers::Load("test-catalog", properties); From 99f09e6defc37eb682057b12b71847b85a178b07 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Mon, 8 Jun 2026 18:29:15 +0800 Subject: [PATCH 18/23] feat(cmake): reuse Arrow's bundled AWS SDK for SigV4 (no system SDK for S3+SigV4) --- .github/workflows/aws_test.yml | 6 ++--- CMakeLists.txt | 6 +++++ .../IcebergThirdpartyToolchain.cmake | 26 ++++++++++++++++--- src/iceberg/catalog/rest/CMakeLists.txt | 6 +++-- 4 files changed, 36 insertions(+), 8 deletions(-) diff --git a/.github/workflows/aws_test.yml b/.github/workflows/aws_test.yml index ccf10e292..77cf11477 100644 --- a/.github/workflows/aws_test.yml +++ b/.github/workflows/aws_test.yml @@ -94,14 +94,14 @@ jobs: shell: bash run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev - name: Cache vcpkg packages - if: ${{ matrix.sigv4 == 'ON' }} + if: ${{ matrix.sigv4 == 'ON' && matrix.s3 == 'OFF' }} uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 id: vcpkg-cache with: path: /usr/local/share/vcpkg/installed key: vcpkg-x64-linux-aws-sdk-cpp-s3-${{ matrix.s3 }}-sigv4-${{ matrix.sigv4 }}-${{ hashFiles('.github/workflows/aws_test.yml') }} - name: Install AWS SDK via vcpkg - if: ${{ matrix.sigv4 == 'ON' && steps.vcpkg-cache.outputs.cache-hit != 'true' }} + if: ${{ matrix.sigv4 == 'ON' && matrix.s3 == 'OFF' && steps.vcpkg-cache.outputs.cache-hit != 'true' }} shell: bash # Retry to ride out transient GitHub/mirror download failures (504s). run: | @@ -126,7 +126,7 @@ jobs: - name: Build and test Iceberg shell: bash env: - CMAKE_TOOLCHAIN_FILE: ${{ matrix.sigv4 == 'ON' && '/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake' || '' }} + CMAKE_TOOLCHAIN_FILE: ${{ matrix.sigv4 == 'ON' && matrix.s3 == 'OFF' && '/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake' || '' }} run: ci/scripts/build_iceberg.sh "$(pwd)" OFF OFF ${{ matrix.s3 }} ${{ matrix.sigv4 }} # Exercise the Meson build with SigV4 enabled (resolves aws-cpp-sdk-core via diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d80a125e..c95fa7eb8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,6 +54,12 @@ option(ICEBERG_SQL_POSTGRESQL "Build the PostgreSQL connector for the SQL catalo option(ICEBERG_SQL_MYSQL "Build the MySQL connector for the SQL catalog" OFF) option(ICEBERG_S3 "Build with S3 support" OFF) option(ICEBERG_SIGV4 "Build SigV4 authentication support (requires AWS SDK)" OFF) +set(ICEBERG_AWSSDK_SOURCE + "AUTO" + CACHE STRING "AWS SDK source for SigV4: AUTO (reuse Arrow's bundled AWS SDK when \ +ICEBERG_S3 is ON, otherwise SYSTEM), SYSTEM (find an installed AWS SDK), or \ +BUNDLED (reuse Arrow's bundled AWS SDK; requires ICEBERG_S3)") +set_property(CACHE ICEBERG_AWSSDK_SOURCE PROPERTY STRINGS AUTO SYSTEM BUNDLED) option(ICEBERG_ENABLE_ASAN "Enable Address Sanitizer" OFF) option(ICEBERG_ENABLE_UBSAN "Enable Undefined Behavior Sanitizer" OFF) diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 451f970dd..8ee05b1d2 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -20,6 +20,22 @@ set(ICEBERG_SYSTEM_DEPENDENCIES) set(ICEBERG_ARROW_INSTALL_INTERFACE_LIBS) +if(ICEBERG_SIGV4) + set(ICEBERG_AWSSDK_SOURCE_RESOLVED "${ICEBERG_AWSSDK_SOURCE}") + if(ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "AUTO") + if(ICEBERG_S3) + set(ICEBERG_AWSSDK_SOURCE_RESOLVED "BUNDLED") + else() + set(ICEBERG_AWSSDK_SOURCE_RESOLVED "SYSTEM") + endif() + endif() + if(ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "BUNDLED" AND NOT ICEBERG_S3) + message(FATAL_ERROR "ICEBERG_AWSSDK_SOURCE=BUNDLED requires ICEBERG_S3=ON: " + "the bundled AWS SDK is provided by Arrow's S3 support.") + endif() + message(STATUS "AWS SDK source for SigV4: ${ICEBERG_AWSSDK_SOURCE_RESOLVED}") +endif() + # ---------------------------------------------------------------------- # Versions and URLs for toolchain builds # @@ -110,9 +126,9 @@ function(resolve_arrow_dependency) set(ARROW_RUNTIME_SIMD_LEVEL "NONE") set(ARROW_POSITION_INDEPENDENT_CODE ON) set(ARROW_DEPENDENCY_SOURCE "BUNDLED") - # With SigV4 also on, make Arrow's S3 reuse the system AWS SDK (the one SigV4 - # finds) instead of bundling its own, so only one AWS SDK is linked (no ODR). - if(ICEBERG_S3 AND ICEBERG_SIGV4) + if(ICEBERG_S3 + AND ICEBERG_SIGV4 + AND ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "SYSTEM") set(AWSSDK_SOURCE "SYSTEM") endif() set(ARROW_WITH_ZLIB ON) @@ -643,6 +659,10 @@ endif() # AWS SDK for C++ function(resolve_aws_sdk_dependency) + if(ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "BUNDLED") + message(STATUS "SigV4 reuses Arrow's bundled AWS SDK (aws-cpp-sdk-core)") + return() + endif() find_package(AWSSDK REQUIRED COMPONENTS core) list(APPEND ICEBERG_SYSTEM_DEPENDENCIES AWSSDK) set(ICEBERG_SYSTEM_DEPENDENCIES diff --git a/src/iceberg/catalog/rest/CMakeLists.txt b/src/iceberg/catalog/rest/CMakeLists.txt index e8f595474..98610d62c 100644 --- a/src/iceberg/catalog/rest/CMakeLists.txt +++ b/src/iceberg/catalog/rest/CMakeLists.txt @@ -58,8 +58,10 @@ list(APPEND if(ICEBERG_SIGV4) list(APPEND ICEBERG_REST_STATIC_BUILD_INTERFACE_LIBS aws-cpp-sdk-core) list(APPEND ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS aws-cpp-sdk-core) - list(APPEND ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS aws-cpp-sdk-core) - list(APPEND ICEBERG_REST_SHARED_INSTALL_INTERFACE_LIBS aws-cpp-sdk-core) + if(ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "SYSTEM") + list(APPEND ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS aws-cpp-sdk-core) + list(APPEND ICEBERG_REST_SHARED_INSTALL_INTERFACE_LIBS aws-cpp-sdk-core) + endif() endif() add_iceberg_lib(iceberg_rest From 10ce4f2f2ff9a628f250f4165d6a16d139863f45 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Tue, 9 Jun 2026 11:06:36 +0800 Subject: [PATCH 19/23] fix(rest): define ToString(HttpMethod) inline in header --- src/iceberg/catalog/rest/endpoint.cc | 16 ---------------- src/iceberg/catalog/rest/http_request.h | 16 +++++++++++++++- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/iceberg/catalog/rest/endpoint.cc b/src/iceberg/catalog/rest/endpoint.cc index bf457c879..953a7f03f 100644 --- a/src/iceberg/catalog/rest/endpoint.cc +++ b/src/iceberg/catalog/rest/endpoint.cc @@ -24,22 +24,6 @@ namespace iceberg::rest { -constexpr std::string_view ToString(HttpMethod method) { - switch (method) { - case HttpMethod::kGet: - return "GET"; - case HttpMethod::kPost: - return "POST"; - case HttpMethod::kPut: - return "PUT"; - case HttpMethod::kDelete: - return "DELETE"; - case HttpMethod::kHead: - return "HEAD"; - } - return "UNKNOWN"; -} - Result Endpoint::Make(HttpMethod method, std::string_view path) { if (path.empty()) { return InvalidArgument("Endpoint cannot have empty path"); diff --git a/src/iceberg/catalog/rest/http_request.h b/src/iceberg/catalog/rest/http_request.h index 126fbf034..1238d34d5 100644 --- a/src/iceberg/catalog/rest/http_request.h +++ b/src/iceberg/catalog/rest/http_request.h @@ -35,7 +35,21 @@ namespace iceberg::rest { enum class HttpMethod : uint8_t { kGet, kPost, kPut, kDelete, kHead }; /// \brief Convert HttpMethod to string representation. -constexpr std::string_view ToString(HttpMethod method); +constexpr std::string_view ToString(HttpMethod method) { + switch (method) { + case HttpMethod::kGet: + return "GET"; + case HttpMethod::kPost: + return "POST"; + case HttpMethod::kPut: + return "PUT"; + case HttpMethod::kDelete: + return "DELETE"; + case HttpMethod::kHead: + return "HEAD"; + } + return "UNKNOWN"; +} /// \brief Ordered collection of HTTP headers preserving repeated values. /// From c23e3af223f492d26f47cd42d5a8ff77fcda5066 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Thu, 11 Jun 2026 15:10:04 +0800 Subject: [PATCH 20/23] fix(auth): fail fast on empty SigV4 credentials; fix stale CI comment --- .github/workflows/aws_test.yml | 4 ++-- src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/workflows/aws_test.yml b/.github/workflows/aws_test.yml index 77cf11477..c73ff3dc6 100644 --- a/.github/workflows/aws_test.yml +++ b/.github/workflows/aws_test.yml @@ -16,8 +16,8 @@ # under the License. # AWS-related tests. ICEBERG_S3 and ICEBERG_SIGV4 are exercised individually and -# together; with both on, Arrow's S3 reuses SigV4's system AWS SDK -# (AWSSDK_SOURCE=SYSTEM) so a single AWS SDK is linked (no ODR). +# together; with both on, ICEBERG_AWSSDK_SOURCE defaults to BUNDLED so SigV4 +# reuses Arrow's bundled aws-cpp-sdk-core — a single AWS SDK is linked (no ODR). name: AWS Tests on: diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index 4139d282c..896be280f 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -430,6 +430,17 @@ Result> SigV4AuthManager::WrapSession( } else { ICEBERG_ASSIGN_OR_RAISE(credentials, MakeCredentialsProvider(properties)); } + // Fail fast when the provider cannot resolve credentials (e.g. an empty + // default chain) instead of sending an effectively unsigned request later. + if (credentials->GetAWSCredentials().IsEmpty()) { + return std::unexpected( + Error{.kind = ErrorKind::kAuthenticationFailed, + .message = "SigV4: AWS credentials provider returned empty credentials; " + "set '" + + AuthProperties::kSigV4AccessKeyId + "' and '" + + AuthProperties::kSigV4SecretAccessKey + + "' or configure the AWS credentials chain"}); + } auto session = std::make_shared(std::move(delegate_session), std::move(region), std::move(service), std::move(credentials)); From fbfb7e9f8230237e84c0deb71722aea00e023c87 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Thu, 11 Jun 2026 17:05:59 +0800 Subject: [PATCH 21/23] address review: SigV4 session lifecycle via Make(), per-table sessions, Base64 wording, CI toolchain --- ci/scripts/build_iceberg.sh | 5 ++ meson.options | 2 + .../catalog/rest/auth/sigv4_auth_manager.cc | 37 ++++++++------ .../rest/auth/sigv4_auth_manager_internal.h | 15 +++--- src/iceberg/catalog/rest/rest_catalog.cc | 50 ++++++++++++++++++- src/iceberg/catalog/rest/rest_catalog.h | 12 +++++ src/iceberg/test/sigv4_auth_test.cc | 34 ++++++------- 7 files changed, 115 insertions(+), 40 deletions(-) diff --git a/ci/scripts/build_iceberg.sh b/ci/scripts/build_iceberg.sh index 330e5946a..a0ca30cca 100755 --- a/ci/scripts/build_iceberg.sh +++ b/ci/scripts/build_iceberg.sh @@ -60,6 +60,11 @@ if is_windows; then CMAKE_ARGS+=("-DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake") CMAKE_ARGS+=("-DCMAKE_BUILD_TYPE=Release") else + # Pass an externally provided toolchain (e.g. vcpkg for the SigV4 job) + # explicitly instead of relying on CMake >= 3.21 reading the env var. + if [[ -n "${CMAKE_TOOLCHAIN_FILE:-}" ]]; then + CMAKE_ARGS+=("-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") + fi CMAKE_ARGS+=("-DCMAKE_BUILD_TYPE=Debug") fi diff --git a/meson.options b/meson.options index c53574889..3300031a4 100644 --- a/meson.options +++ b/meson.options @@ -44,6 +44,8 @@ option( value: 'disabled', ) +# Resolves a system-installed AWS SDK via its CMake config; the bundled-AWS +# path (ICEBERG_AWSSDK_SOURCE=BUNDLED) is CMake-only. option( 'sigv4', type: 'feature', diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc index 896be280f..0b801ddd9 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc @@ -172,11 +172,7 @@ SigV4AuthSession::SigV4AuthSession( signer_(std::make_unique( credentials_provider_, signing_name_.c_str(), signing_region_.c_str())) {} -SigV4AuthSession::~SigV4AuthSession() { - if (owns_sdk_registration_) { - AwsSdkLifecycle::Instance().UnregisterSession(); - } -} +SigV4AuthSession::~SigV4AuthSession() { AwsSdkLifecycle::Instance().UnregisterSession(); } Result SigV4AuthSession::Authenticate(const HttpRequest& request) { ICEBERG_ASSIGN_OR_RAISE(auto delegate_request, delegate_->Authenticate(request)); @@ -198,8 +194,12 @@ Result SigV4AuthSession::Authenticate(const HttpRequest& request) { aws_request->SetHeaderValue(Aws::String(name.c_str()), Aws::String(value.c_str())); } - // Empty body: hex EMPTY_BODY_SHA256 (Java parity workaround for the signer - // computing an invalid checksum on empty bodies). Non-empty: Base64. + // Java parity: for non-empty bodies the signed x-amz-content-sha256 header + // carries Base64(SHA256(body)) — matching the Java client's + // SignerChecksumParams behavior — while the canonical request's payload hash + // line remains lowercase hex per SigV4. Empty bodies use the hex + // EMPTY_BODY_SHA256 constant (workaround for the signer computing an invalid + // checksum on empty bodies). if (delegate_request.body.empty()) { aws_request->SetHeaderValue("x-amz-content-sha256", Aws::String(kEmptyBodySha256)); } else { @@ -411,11 +411,23 @@ class SessionSlot { } // namespace +Result> SigV4AuthSession::Make( + std::shared_ptr delegate, std::string signing_region, + std::string signing_name, + std::shared_ptr credentials_provider) { + ICEBERG_ASSIGN_OR_RAISE(auto slot, SessionSlot::Reserve()); + auto session = std::shared_ptr( + new SigV4AuthSession(std::move(delegate), std::move(signing_region), + std::move(signing_name), std::move(credentials_provider))); + // The session's destructor now owns the unregister. + slot.Release(); + return session; +} + Result> SigV4AuthManager::WrapSession( std::shared_ptr delegate_session, const std::unordered_map& properties, std::shared_ptr reuse_credentials) { - ICEBERG_ASSIGN_OR_RAISE(auto slot, SessionSlot::Reserve()); ICEBERG_ASSIGN_OR_RAISE(auto region, ResolveSigningRegion(properties)); auto service = ResolveSigningName(properties); @@ -441,12 +453,9 @@ Result> SigV4AuthManager::WrapSession( AuthProperties::kSigV4SecretAccessKey + "' or configure the AWS credentials chain"}); } - auto session = - std::make_shared(std::move(delegate_session), std::move(region), - std::move(service), std::move(credentials)); - // The reserved slot's unregister responsibility now belongs to the session. - session->owns_sdk_registration_ = true; - slot.Release(); + ICEBERG_ASSIGN_OR_RAISE( + auto session, SigV4AuthSession::Make(std::move(delegate_session), std::move(region), + std::move(service), std::move(credentials))); return session; } diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h b/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h index 9531acdca..cc0d1a1ad 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h @@ -58,7 +58,11 @@ class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { /// Prefix prepended to relocated headers that conflict with SigV4-signed headers. static constexpr std::string_view kRelocatedHeaderPrefix = "Original-"; - SigV4AuthSession( + /// \brief Creates a session registered with the AWS SDK lifecycle. + /// + /// Fails if the SDK is not initialized. Every session owns its lifecycle + /// registration and unregisters on destruction. + static Result> Make( std::shared_ptr delegate, std::string signing_region, std::string signing_name, std::shared_ptr credentials_provider); @@ -78,17 +82,16 @@ class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { } private: - // WrapSession() reserves an AwsSdkLifecycle slot and transfers it here. - friend class SigV4AuthManager; + SigV4AuthSession( + std::shared_ptr delegate, std::string signing_region, + std::string signing_name, + std::shared_ptr credentials_provider); std::shared_ptr delegate_; std::string signing_region_; std::string signing_name_; std::shared_ptr credentials_provider_; std::unique_ptr signer_; - // Only WrapSession()-created sessions registered a slot; directly-constructed - // ones (e.g. tests) must not unregister and underflow the count. - bool owns_sdk_registration_ = false; }; /// \brief An AuthManager that produces SigV4AuthSession instances. diff --git a/src/iceberg/catalog/rest/rest_catalog.cc b/src/iceberg/catalog/rest/rest_catalog.cc index f04f5fb55..0b95d2ef2 100644 --- a/src/iceberg/catalog/rest/rest_catalog.cc +++ b/src/iceberg/catalog/rest/rest_catalog.cc @@ -118,14 +118,56 @@ Result CaptureNoSuchNamespace(const auto& status) { return CaptureNoSuchObject(status, ErrorKind::kNoSuchNamespace); } +std::string TableSessionKey(const TableIdentifier& identifier) { + std::string key; + for (const auto& level : identifier.ns.levels) { + key += level; + key += '\x1f'; + } + key += identifier.name; + return key; +} + } // namespace RestCatalog::~RestCatalog() { + for (auto& [key, session] : table_sessions_) { + if (session) { + std::ignore = session->Close(); + } + } if (catalog_session_) { std::ignore = catalog_session_->Close(); } } +Status RestCatalog::RememberTableSession( + const TableIdentifier& identifier, + const std::unordered_map& config) { + ICEBERG_ASSIGN_OR_RAISE( + auto session, auth_manager_->TableSession(identifier, config, catalog_session_)); + if (session == catalog_session_) { + return {}; + } + std::shared_ptr replaced; + { + std::lock_guard lock(table_sessions_mutex_); + auto& slot = table_sessions_[TableSessionKey(identifier)]; + replaced = std::exchange(slot, std::move(session)); + } + if (replaced) { + std::ignore = replaced->Close(); + } + return {}; +} + +std::shared_ptr RestCatalog::SessionFor( + const TableIdentifier& identifier) { + std::lock_guard lock(table_sessions_mutex_); + auto it = table_sessions_.find(TableSessionKey(identifier)); + return it != table_sessions_.end() ? it->second : catalog_session_; +} + Result> RestCatalog::Make( const RestCatalogProperties& config) { ICEBERG_ASSIGN_OR_RAISE(auto uri, config.Uri()); @@ -355,7 +397,9 @@ Result RestCatalog::CreateTableInternal( *catalog_session_)); ICEBERG_ASSIGN_OR_RAISE(auto json, FromJsonString(response.body())); - return LoadTableResultFromJson(json); + ICEBERG_ASSIGN_OR_RAISE(auto load_result, LoadTableResultFromJson(json)); + ICEBERG_RETURN_UNEXPECTED(RememberTableSession(identifier, load_result.config)); + return load_result; } Result> RestCatalog::CreateTable( @@ -391,7 +435,7 @@ Result> RestCatalog::UpdateTable( ICEBERG_ASSIGN_OR_RAISE( const auto response, client_->Post(path, json_request, /*headers=*/{}, *TableErrorHandler::Instance(), - *catalog_session_)); + *SessionFor(identifier))); ICEBERG_ASSIGN_OR_RAISE(auto json, FromJsonString(response.body())); ICEBERG_ASSIGN_OR_RAISE(auto commit_response, CommitTableResponseFromJson(json)); @@ -479,6 +523,7 @@ Result> RestCatalog::LoadTable(const TableIdentifier& ide ICEBERG_ASSIGN_OR_RAISE(const auto body, LoadTableInternal(identifier)); ICEBERG_ASSIGN_OR_RAISE(auto json, FromJsonString(body)); ICEBERG_ASSIGN_OR_RAISE(auto load_result, LoadTableResultFromJson(json)); + ICEBERG_RETURN_UNEXPECTED(RememberTableSession(identifier, load_result.config)); /// FIXME: support per-table FileIO creation return Table::Make(identifier, std::move(load_result.metadata), std::move(load_result.metadata_location), file_io_, @@ -503,6 +548,7 @@ Result> RestCatalog::RegisterTable( ICEBERG_ASSIGN_OR_RAISE(auto json, FromJsonString(response.body())); ICEBERG_ASSIGN_OR_RAISE(auto load_result, LoadTableResultFromJson(json)); + ICEBERG_RETURN_UNEXPECTED(RememberTableSession(identifier, load_result.config)); return Table::Make(identifier, std::move(load_result.metadata), std::move(load_result.metadata_location), file_io_, shared_from_this()); diff --git a/src/iceberg/catalog/rest/rest_catalog.h b/src/iceberg/catalog/rest/rest_catalog.h index 4fd4db5b8..c6e8ac124 100644 --- a/src/iceberg/catalog/rest/rest_catalog.h +++ b/src/iceberg/catalog/rest/rest_catalog.h @@ -20,7 +20,9 @@ #pragma once #include +#include #include +#include #include #include "iceberg/catalog.h" @@ -109,6 +111,14 @@ class ICEBERG_REST_EXPORT RestCatalog : public Catalog, Result LoadTableInternal(const TableIdentifier& identifier) const; + /// \brief Derives and caches a per-table auth session from a table response + /// `config`, per the REST spec (`config.token` must be used for table requests). + Status RememberTableSession(const TableIdentifier& identifier, + const std::unordered_map& config); + + /// \brief Returns the cached per-table session, or the catalog session. + std::shared_ptr SessionFor(const TableIdentifier& identifier); + Result CreateTableInternal( const TableIdentifier& identifier, const std::shared_ptr& schema, const std::shared_ptr& spec, const std::shared_ptr& order, @@ -124,6 +134,8 @@ class ICEBERG_REST_EXPORT RestCatalog : public Catalog, std::unique_ptr auth_manager_; std::shared_ptr catalog_session_; SnapshotMode snapshot_mode_; + std::mutex table_sessions_mutex_; + std::unordered_map> table_sessions_; }; } // namespace iceberg::rest diff --git a/src/iceberg/test/sigv4_auth_test.cc b/src/iceberg/test/sigv4_auth_test.cc index 082c2e381..512bb0d95 100644 --- a/src/iceberg/test/sigv4_auth_test.cc +++ b/src/iceberg/test/sigv4_auth_test.cc @@ -72,18 +72,16 @@ TEST_F(SigV4AuthTest, LifecycleFinalizeRefusesWhileSessionsAlive) { EXPECT_TRUE(IsAwsSdkInitialized()); } -TEST_F(SigV4AuthTest, DirectlyConstructedSessionDoesNotCorruptLifecycleCount) { - // A directly-constructed session never registered with AwsSdkLifecycle, so - // destroying it must not decrement the count. Otherwise it underflows, and a - // later real session wraps it back to zero, letting FinalizeAwsSdk() shut the - // SDK down while a session is still alive. +TEST_F(SigV4AuthTest, SessionRegistrationBalancesLifecycleCount) { { auto delegate = AuthSession::MakeDefault({}); auto credentials = std::make_shared( Aws::Auth::AWSCredentials("id", "secret")); - auto direct = std::make_shared(delegate, "us-east-1", "execute-api", - credentials); - } // destroyed here — must leave the lifecycle count untouched + auto session_result = + SigV4AuthSession::Make(delegate, "us-east-1", "execute-api", credentials); + ASSERT_THAT(session_result, IsOk()); + EXPECT_THAT(FinalizeAwsSdk(), IsError(ErrorKind::kInvalid)); + } auto properties = MakeSigV4Properties(); auto manager_result = AuthManagers::Load("test-catalog", properties); @@ -91,9 +89,6 @@ TEST_F(SigV4AuthTest, DirectlyConstructedSessionDoesNotCorruptLifecycleCount) { auto session_result = manager_result.value()->CatalogSession(client_, properties); ASSERT_THAT(session_result, IsOk()); - // Exactly one live (registered) session, so Finalize must refuse. With the - // underflow bug the count would have wrapped and Finalize could wrongly - // succeed and shut the SDK down. EXPECT_THAT(FinalizeAwsSdk(), IsError(ErrorKind::kInvalid)); EXPECT_TRUE(IsAwsSdkInitialized()); } @@ -338,7 +333,8 @@ TEST_F(SigV4AuthTest, AuthenticateWithBodyDetailedHeaders) { ASSERT_NE(auth_it, headers.end()); EXPECT_TRUE(auth_it->second.starts_with("AWS4-HMAC-SHA256 Credential=")); - // x-amz-content-sha256 should be Base64-encoded body SHA256 (matching Java) + // Java parity: the x-amz-content-sha256 header is Base64(SHA256(body)) for + // non-empty bodies; the canonical request payload hash stays lowercase hex. auto sha_it = headers.find("x-amz-content-sha256"); ASSERT_NE(sha_it, headers.end()); EXPECT_NE(sha_it->second, SigV4AuthSession::kEmptyBodySha256); @@ -390,8 +386,10 @@ TEST_F(SigV4AuthTest, ConflictingSigV4HeadersRelocated) { auto credentials = std::make_shared(Aws::Auth::AWSCredentials( "AKIAIOSFODNN7EXAMPLE", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")); - auto session = std::make_shared(delegate, "us-east-1", "execute-api", - credentials); + auto session_result = + SigV4AuthSession::Make(delegate, "us-east-1", "execute-api", credentials); + ASSERT_THAT(session_result, IsOk()); + auto session = session_result.value(); HttpRequest request{.method = HttpMethod::kGet, .url = "http://localhost:8080/path"}; auto auth_result = session->Authenticate(request); @@ -418,11 +416,11 @@ TEST_F(SigV4AuthTest, SessionCloseDelegatesToInner) { auto delegate = AuthSession::MakeDefault({}); auto credentials = std::make_shared( Aws::Auth::AWSCredentials("id", "secret")); - auto session = std::make_shared(delegate, "us-east-1", "execute-api", - credentials); + auto session_result = + SigV4AuthSession::Make(delegate, "us-east-1", "execute-api", credentials); + ASSERT_THAT(session_result, IsOk()); - // Close should succeed without error - EXPECT_THAT(session->Close(), IsOk()); + EXPECT_THAT(session_result.value()->Close(), IsOk()); } TEST_F(SigV4AuthTest, CreateCustomDelegateNone) { From 2e68f1f248a9bfe4ecd8f2b5e2685e1dbb7771c2 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Thu, 11 Jun 2026 18:23:39 +0800 Subject: [PATCH 22/23] address review: refresh uses cached per-table session; add RestCatalog session test --- src/iceberg/catalog/rest/rest_catalog.cc | 6 +- src/iceberg/catalog/rest/rest_catalog.h | 4 +- src/iceberg/test/CMakeLists.txt | 1 + src/iceberg/test/meson.build | 1 + src/iceberg/test/rest_catalog_session_test.cc | 301 ++++++++++++++++++ 5 files changed, 309 insertions(+), 4 deletions(-) create mode 100644 src/iceberg/test/rest_catalog_session_test.cc diff --git a/src/iceberg/catalog/rest/rest_catalog.cc b/src/iceberg/catalog/rest/rest_catalog.cc index 0b95d2ef2..b43ea4983 100644 --- a/src/iceberg/catalog/rest/rest_catalog.cc +++ b/src/iceberg/catalog/rest/rest_catalog.cc @@ -162,7 +162,7 @@ Status RestCatalog::RememberTableSession( } std::shared_ptr RestCatalog::SessionFor( - const TableIdentifier& identifier) { + const TableIdentifier& identifier) const { std::lock_guard lock(table_sessions_mutex_); auto it = table_sessions_.find(TableSessionKey(identifier)); return it != table_sessions_.end() ? it->second : catalog_session_; @@ -512,10 +512,12 @@ Result RestCatalog::LoadTableInternal( params["snapshots"] = "all"; } + // Refresh uses the cached per-table session; the initial load falls back to + // the catalog session (no table session is cached yet). ICEBERG_ASSIGN_OR_RAISE( const auto response, client_->Get(path, params, /*headers=*/{}, *TableErrorHandler::Instance(), - *catalog_session_)); + *SessionFor(identifier))); return response.body(); } diff --git a/src/iceberg/catalog/rest/rest_catalog.h b/src/iceberg/catalog/rest/rest_catalog.h index c6e8ac124..f876c818d 100644 --- a/src/iceberg/catalog/rest/rest_catalog.h +++ b/src/iceberg/catalog/rest/rest_catalog.h @@ -117,7 +117,7 @@ class ICEBERG_REST_EXPORT RestCatalog : public Catalog, const std::unordered_map& config); /// \brief Returns the cached per-table session, or the catalog session. - std::shared_ptr SessionFor(const TableIdentifier& identifier); + std::shared_ptr SessionFor(const TableIdentifier& identifier) const; Result CreateTableInternal( const TableIdentifier& identifier, const std::shared_ptr& schema, @@ -134,7 +134,7 @@ class ICEBERG_REST_EXPORT RestCatalog : public Catalog, std::unique_ptr auth_manager_; std::shared_ptr catalog_session_; SnapshotMode snapshot_mode_; - std::mutex table_sessions_mutex_; + mutable std::mutex table_sessions_mutex_; std::unordered_map> table_sessions_; }; diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 0e8f03150..88a373c88 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -286,6 +286,7 @@ if(ICEBERG_BUILD_REST) SOURCES auth_manager_test.cc endpoint_test.cc + rest_catalog_session_test.cc rest_file_io_test.cc rest_json_serde_test.cc rest_util_test.cc) diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index 03d9e1f6c..b46317f73 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -128,6 +128,7 @@ if get_option('rest').enabled() 'sources': files( 'auth_manager_test.cc', 'endpoint_test.cc', + 'rest_catalog_session_test.cc', 'rest_file_io_test.cc', 'rest_json_serde_test.cc', 'rest_util_test.cc', diff --git a/src/iceberg/test/rest_catalog_session_test.cc b/src/iceberg/test/rest_catalog_session_test.cc new file mode 100644 index 000000000..cddfe0d79 --- /dev/null +++ b/src/iceberg/test/rest_catalog_session_test.cc @@ -0,0 +1,301 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#ifndef _WIN32 + +# include + +# include +# include +# include +# include +# include +# include +# include +# include + +# include +# include + +# include "iceberg/catalog/rest/auth/auth_manager.h" +# include "iceberg/catalog/rest/auth/auth_managers.h" +# include "iceberg/catalog/rest/auth/auth_properties.h" +# include "iceberg/catalog/rest/auth/auth_session.h" +# include "iceberg/catalog/rest/catalog_properties.h" +# include "iceberg/catalog/rest/rest_catalog.h" +# include "iceberg/file_io.h" +# include "iceberg/file_io_registry.h" +# include "iceberg/table_identifier.h" +# include "iceberg/table_requirement.h" +# include "iceberg/table_update.h" +# include "iceberg/test/matchers.h" + +namespace iceberg::rest { + +namespace { + +constexpr std::string_view kMetadataJson = + R"({"format-version":2,"table-uuid":"test-uuid-1234","location":"s3://bucket/test",)" + R"("last-sequence-number":0,"last-updated-ms":0,"last-column-id":1,)" + R"("schemas":[{"type":"struct","schema-id":1,"fields":[{"id":1,"name":"id","type":"int","required":true}]}],)" + R"("current-schema-id":1,"partition-specs":[{"spec-id":0,"fields":[]}],"default-spec-id":0,)" + R"("last-partition-id":0,"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0})"; + +struct RecordedRequest { + std::string method; + std::string path; + std::string auth_marker; +}; + +class MiniRestServer { + public: + bool Start() { + listen_fd_ = ::socket(AF_INET, SOCK_STREAM, 0); + if (listen_fd_ < 0) return false; + int reuse = 1; + ::setsockopt(listen_fd_, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)); + sockaddr_in addr{}; + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.sin_port = 0; + if (::bind(listen_fd_, reinterpret_cast(&addr), sizeof(addr)) < 0) { + return false; + } + socklen_t len = sizeof(addr); + ::getsockname(listen_fd_, reinterpret_cast(&addr), &len); + port_ = ntohs(addr.sin_port); + if (::listen(listen_fd_, 8) < 0) return false; + server_thread_ = std::thread([this, fd = listen_fd_] { Loop(fd); }); + return true; + } + + void Stop() { + stopping_ = true; + if (listen_fd_ >= 0) { + ::shutdown(listen_fd_, SHUT_RDWR); + ::close(listen_fd_); + listen_fd_ = -1; + } + if (server_thread_.joinable()) server_thread_.join(); + } + + int port() const { return port_; } + + std::vector requests() { + std::lock_guard lock(mutex_); + return requests_; + } + + private: + void Loop(int listen_fd) { + while (!stopping_) { + int fd = ::accept(listen_fd, nullptr, nullptr); + if (fd < 0) break; + HandleConnection(fd); + ::close(fd); + } + } + + void HandleConnection(int fd) { + std::string raw; + std::array buf{}; + size_t header_end = std::string::npos; + while (header_end == std::string::npos) { + ssize_t n = ::read(fd, buf.data(), buf.size()); + if (n <= 0) return; + raw.append(buf.data(), static_cast(n)); + header_end = raw.find("\r\n\r\n"); + } + size_t content_length = 0; + { + std::string lower; + lower.reserve(header_end); + for (size_t i = 0; i < header_end; ++i) { + lower.push_back( + static_cast(std::tolower(static_cast(raw[i])))); + } + auto pos = lower.find("content-length:"); + if (pos != std::string::npos) { + content_length = std::stoul(lower.substr(pos + 15)); + } + } + while (raw.size() < header_end + 4 + content_length) { + ssize_t n = ::read(fd, buf.data(), buf.size()); + if (n <= 0) break; + raw.append(buf.data(), static_cast(n)); + } + + auto line_end = raw.find("\r\n"); + auto request_line = raw.substr(0, line_end); + auto sp1 = request_line.find(' '); + auto sp2 = request_line.find(' ', sp1 + 1); + RecordedRequest req; + req.method = request_line.substr(0, sp1); + req.path = request_line.substr(sp1 + 1, sp2 - sp1 - 1); + req.auth_marker = HeaderValue(raw.substr(0, header_end), "x-test-auth"); + { + std::lock_guard lock(mutex_); + requests_.push_back(req); + } + + Respond(fd, BodyFor(req)); + } + + static std::string HeaderValue(const std::string& headers, std::string_view name) { + std::string lower; + lower.reserve(headers.size()); + for (char c : headers) { + lower.push_back(static_cast(std::tolower(static_cast(c)))); + } + auto pos = lower.find(std::string(name) + ":"); + if (pos == std::string::npos) return ""; + auto value_start = pos + name.size() + 1; + auto value_end = headers.find("\r\n", value_start); + auto value = headers.substr(value_start, value_end - value_start); + auto first = value.find_first_not_of(' '); + return first == std::string::npos ? "" : value.substr(first); + } + + std::string BodyFor(const RecordedRequest& req) { + if (req.path.find("/v1/config") != std::string::npos) { + return R"({"defaults":{},"overrides":{}})"; + } + if (req.method == "GET" && req.path.find("/tables/") != std::string::npos) { + return std::string(R"({"metadata-location":"s3://bucket/meta/v1.json",)") + + R"("metadata":)" + std::string(kMetadataJson) + + R"(,"config":{"token":"tbl-token-1"}})"; + } + if (req.method == "POST" && req.path.find("/tables/") != std::string::npos) { + return std::string(R"({"metadata-location":"s3://bucket/meta/v2.json",)") + + R"("metadata":)" + std::string(kMetadataJson) + "}"; + } + return "{}"; + } + + static void Respond(int fd, const std::string& body) { + std::string response = "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\n"; + response += "Content-Length: " + std::to_string(body.size()) + "\r\n"; + response += "Connection: close\r\n\r\n"; + response += body; + size_t sent = 0; + while (sent < response.size()) { + ssize_t n = ::write(fd, response.data() + sent, response.size() - sent); + if (n <= 0) break; + sent += static_cast(n); + } + } + + int listen_fd_ = -1; + int port_ = 0; + std::atomic stopping_{false}; + std::thread server_thread_; + std::mutex mutex_; + std::vector requests_; +}; + +class RecordingAuthManager : public auth::AuthManager { + public: + Result> InitSession( + HttpClient& /*init_client*/, + const std::unordered_map& /*properties*/) override { + return auth::AuthSession::MakeDefault({{"x-test-auth", "init"}}); + } + + Result> CatalogSession( + HttpClient& /*shared_client*/, + const std::unordered_map& /*properties*/) override { + return auth::AuthSession::MakeDefault({{"x-test-auth", "catalog"}}); + } + + Result> TableSession( + const TableIdentifier& /*table*/, + const std::unordered_map& properties, + std::shared_ptr parent) override { + auto token = properties.find("token"); + if (token == properties.end()) { + return parent; + } + return auth::AuthSession::MakeDefault({{"x-test-auth", "table:" + token->second}}); + } +}; + +class MockFileIO : public FileIO {}; + +} // namespace + +TEST(RestCatalogSessionTest, RefreshAndCommitUseTableSessionFromResponseConfig) { + MiniRestServer server; + ASSERT_TRUE(server.Start()); + + auth::AuthManagers::Register( + "test-session-recorder", + [](std::string_view /*name*/, + const std::unordered_map& /*properties*/) + -> Result> { + return std::make_unique(); + }); + FileIORegistry::Register( + "test.SessionMockFileIO", + [](const std::unordered_map& /*properties*/) + -> Result> { return std::make_unique(); }); + + auto config = RestCatalogProperties::FromMap( + {{"uri", "http://127.0.0.1:" + std::to_string(server.port())}, + {auth::AuthProperties::kAuthType, "test-session-recorder"}, + {"io-impl", "test.SessionMockFileIO"}}); + + { + auto catalog_result = RestCatalog::Make(config); + ASSERT_THAT(catalog_result, IsOk()); + auto catalog = catalog_result.value(); + + TableIdentifier identifier{.ns = Namespace{{"ns1"}}, .name = "tbl1"}; + ASSERT_THAT(catalog->LoadTable(identifier), IsOk()); + ASSERT_THAT(catalog->LoadTable(identifier), IsOk()); + ASSERT_THAT(catalog->UpdateTable(identifier, {}, {}), IsOk()); + } + + server.Stop(); + + auto requests = server.requests(); + ASSERT_EQ(requests.size(), 4); + EXPECT_TRUE(requests[0].path.find("/v1/config") != std::string::npos); + EXPECT_EQ(requests[0].auth_marker, "init"); + EXPECT_EQ(requests[1].method, "GET"); + EXPECT_TRUE(requests[1].path.find("/tables/tbl1") != std::string::npos); + EXPECT_EQ(requests[1].auth_marker, "catalog"); + EXPECT_EQ(requests[2].method, "GET"); + EXPECT_TRUE(requests[2].path.find("/tables/tbl1") != std::string::npos); + EXPECT_EQ(requests[2].auth_marker, "table:tbl-token-1"); + EXPECT_EQ(requests[3].method, "POST"); + EXPECT_TRUE(requests[3].path.find("/tables/tbl1") != std::string::npos); + EXPECT_EQ(requests[3].auth_marker, "table:tbl-token-1"); +} + +} // namespace iceberg::rest + +#else + +TEST(RestCatalogSessionTest, RefreshAndCommitUseTableSessionFromResponseConfig) { + GTEST_SKIP() << "POSIX-socket test server is not available on Windows"; +} + +#endif // _WIN32 From 789921c609f266b36d9f6315c9f8d1c484f422bf Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Sat, 13 Jun 2026 23:21:16 +0800 Subject: [PATCH 23/23] Polish REST SigV4 auth integration Simplify AWS SDK wiring, remove premature session-cache plumbing, tighten credential validation, and strengthen SigV4 tests. --- .github/workflows/aws_test.yml | 30 +- CMakeLists.txt | 15 +- ci/scripts/build_iceberg.sh | 10 +- .../IcebergThirdpartyToolchain.cmake | 92 +- meson.options | 2 - src/iceberg/catalog/rest/CMakeLists.txt | 21 +- src/iceberg/catalog/rest/auth/auth_manager.cc | 3 +- src/iceberg/catalog/rest/auth/auth_manager.h | 6 +- .../catalog/rest/auth/auth_properties.h | 4 + src/iceberg/catalog/rest/auth/auth_session.cc | 14 +- src/iceberg/catalog/rest/auth/auth_session.h | 8 +- src/iceberg/catalog/rest/auth/aws_sdk.h | 44 - src/iceberg/catalog/rest/auth/meson.build | 2 - .../catalog/rest/auth/session_context.h | 39 - .../rest/auth/sigv4_auth_manager_internal.h | 30 +- ...sigv4_auth_manager.cc => sigv4_manager.cc} | 320 +++---- src/iceberg/catalog/rest/http_client.cc | 56 +- src/iceberg/catalog/rest/http_request.cc | 83 -- src/iceberg/catalog/rest/http_request.h | 94 +-- src/iceberg/catalog/rest/meson.build | 7 +- src/iceberg/catalog/rest/rest_catalog.cc | 59 +- src/iceberg/catalog/rest/rest_catalog.h | 12 - src/iceberg/test/CMakeLists.txt | 1 - src/iceberg/test/auth_manager_test.cc | 20 + src/iceberg/test/meson.build | 1 - src/iceberg/test/rest_catalog_session_test.cc | 301 ------- src/iceberg/test/sigv4_auth_test.cc | 791 ++++++++---------- 27 files changed, 733 insertions(+), 1332 deletions(-) delete mode 100644 src/iceberg/catalog/rest/auth/aws_sdk.h delete mode 100644 src/iceberg/catalog/rest/auth/session_context.h rename src/iceberg/catalog/rest/auth/{sigv4_auth_manager.cc => sigv4_manager.cc} (73%) delete mode 100644 src/iceberg/catalog/rest/http_request.cc delete mode 100644 src/iceberg/test/rest_catalog_session_test.cc diff --git a/.github/workflows/aws_test.yml b/.github/workflows/aws_test.yml index c73ff3dc6..a8d455006 100644 --- a/.github/workflows/aws_test.yml +++ b/.github/workflows/aws_test.yml @@ -15,9 +15,6 @@ # specific language governing permissions and limitations # under the License. -# AWS-related tests. ICEBERG_S3 and ICEBERG_SIGV4 are exercised individually and -# together; with both on, ICEBERG_AWSSDK_SOURCE defaults to BUNDLED so SigV4 -# reuses Arrow's bundled aws-cpp-sdk-core — a single AWS SDK is linked (no ODR). name: AWS Tests on: @@ -50,33 +47,26 @@ jobs: fail-fast: false matrix: include: - - title: AMD64 Ubuntu 24.04, S3 + - title: Ubuntu 24.04, S3 + SigV4, bundled AWS SDK runs-on: ubuntu-24.04 CC: gcc-14 CXX: g++-14 s3: "ON" - sigv4: "OFF" - - title: AMD64 Ubuntu 24.04, SigV4 - runs-on: ubuntu-24.04 - CC: gcc-14 - CXX: g++-14 - s3: "OFF" sigv4: "ON" - aws-sdk-features: core - - title: AMD64 Ubuntu 24.04, S3 + SigV4 + bundle_awssdk: "ON" + - title: Ubuntu 24.04, S3 + SigV4, system AWS SDK runs-on: ubuntu-24.04 CC: gcc-14 CXX: g++-14 s3: "ON" sigv4: "ON" - # Arrow's S3 filesystem consumes this same AWS SDK, so it needs the - # S3-related components in addition to core (config is required by - # Arrow's FindAWSSDKAlt). + bundle_awssdk: "OFF" aws-sdk-features: core,config,s3,identity-management,sts,transfer - - title: AArch64 macOS 26, S3 + - title: macOS 26 ARM64, S3, bundled AWS SDK runs-on: macos-26 s3: "ON" sigv4: "OFF" + bundle_awssdk: "ON" env: ICEBERG_TEST_S3_URI: s3://iceberg-test AWS_ACCESS_KEY_ID: minio @@ -94,14 +84,14 @@ jobs: shell: bash run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev - name: Cache vcpkg packages - if: ${{ matrix.sigv4 == 'ON' && matrix.s3 == 'OFF' }} + if: ${{ startsWith(matrix.runs-on, 'ubuntu') && matrix.bundle_awssdk == 'OFF' }} uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 id: vcpkg-cache with: path: /usr/local/share/vcpkg/installed key: vcpkg-x64-linux-aws-sdk-cpp-s3-${{ matrix.s3 }}-sigv4-${{ matrix.sigv4 }}-${{ hashFiles('.github/workflows/aws_test.yml') }} - name: Install AWS SDK via vcpkg - if: ${{ matrix.sigv4 == 'ON' && matrix.s3 == 'OFF' && steps.vcpkg-cache.outputs.cache-hit != 'true' }} + if: ${{ startsWith(matrix.runs-on, 'ubuntu') && matrix.bundle_awssdk == 'OFF' && steps.vcpkg-cache.outputs.cache-hit != 'true' }} shell: bash # Retry to ride out transient GitHub/mirror download failures (504s). run: | @@ -126,8 +116,8 @@ jobs: - name: Build and test Iceberg shell: bash env: - CMAKE_TOOLCHAIN_FILE: ${{ matrix.sigv4 == 'ON' && matrix.s3 == 'OFF' && '/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake' || '' }} - run: ci/scripts/build_iceberg.sh "$(pwd)" OFF OFF ${{ matrix.s3 }} ${{ matrix.sigv4 }} + CMAKE_TOOLCHAIN_FILE: ${{ startsWith(matrix.runs-on, 'ubuntu') && matrix.bundle_awssdk == 'OFF' && '/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake' || '' }} + run: ci/scripts/build_iceberg.sh "$(pwd)" OFF OFF ${{ matrix.s3 }} ${{ matrix.sigv4 }} ${{ matrix.bundle_awssdk }} # Exercise the Meson build with SigV4 enabled (resolves aws-cpp-sdk-core via # its CMake config, not pkg-config whose Cflags force -std=c++11). diff --git a/CMakeLists.txt b/CMakeLists.txt index c95fa7eb8..69f90b54a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,13 +53,8 @@ option(ICEBERG_SQL_SQLITE "Build the SQLite connector for the SQL catalog" OFF) option(ICEBERG_SQL_POSTGRESQL "Build the PostgreSQL connector for the SQL catalog" OFF) option(ICEBERG_SQL_MYSQL "Build the MySQL connector for the SQL catalog" OFF) option(ICEBERG_S3 "Build with S3 support" OFF) -option(ICEBERG_SIGV4 "Build SigV4 authentication support (requires AWS SDK)" OFF) -set(ICEBERG_AWSSDK_SOURCE - "AUTO" - CACHE STRING "AWS SDK source for SigV4: AUTO (reuse Arrow's bundled AWS SDK when \ -ICEBERG_S3 is ON, otherwise SYSTEM), SYSTEM (find an installed AWS SDK), or \ -BUNDLED (reuse Arrow's bundled AWS SDK; requires ICEBERG_S3)") -set_property(CACHE ICEBERG_AWSSDK_SOURCE PROPERTY STRINGS AUTO SYSTEM BUNDLED) +option(ICEBERG_SIGV4 "Build with SigV4 support" OFF) +option(ICEBERG_BUNDLE_AWSSDK "Bundle AWS SDK for S3/SigV4 support" ON) option(ICEBERG_ENABLE_ASAN "Enable Address Sanitizer" OFF) option(ICEBERG_ENABLE_UBSAN "Enable Undefined Behavior Sanitizer" OFF) @@ -83,12 +78,6 @@ if(ICEBERG_BUILD_REST_INTEGRATION_TESTS AND WIN32) message(WARNING "Cannot build rest integration test on Windows, turning it off.") endif() -# ICEBERG_S3 requires ICEBERG_BUILD_BUNDLE -if(NOT ICEBERG_BUILD_BUNDLE AND ICEBERG_S3) - set(ICEBERG_S3 OFF) - message(STATUS "ICEBERG_S3 is disabled because ICEBERG_BUILD_BUNDLE is OFF") -endif() - include(CMakeParseArguments) include(IcebergBuildUtils) include(IcebergSanitizer) diff --git a/ci/scripts/build_iceberg.sh b/ci/scripts/build_iceberg.sh index a0ca30cca..6af0802f6 100755 --- a/ci/scripts/build_iceberg.sh +++ b/ci/scripts/build_iceberg.sh @@ -17,7 +17,7 @@ # specific language governing permissions and limitations # under the License. # -# Usage: build_iceberg.sh [rest_integration_tests=OFF] [sccache=OFF] [s3=OFF] [sigv4=OFF] +# Usage: build_iceberg.sh [rest_integration_tests=OFF] [sccache=OFF] [s3=OFF] [sigv4=OFF] [bundle_awssdk=ON] set -eux @@ -27,6 +27,7 @@ build_rest_integration_test=${2:-OFF} build_enable_sccache=${3:-OFF} build_enable_s3=${4:-OFF} build_enable_sigv4=${5:-OFF} +build_bundle_awssdk=${6:-ON} run_tests=${ICEBERG_RUN_TESTS:-ON} mkdir ${build_dir} @@ -56,12 +57,17 @@ else CMAKE_ARGS+=("-DICEBERG_SIGV4=OFF") fi +if [[ "${build_bundle_awssdk}" == "ON" ]]; then + CMAKE_ARGS+=("-DICEBERG_BUNDLE_AWSSDK=ON") +else + CMAKE_ARGS+=("-DICEBERG_BUNDLE_AWSSDK=OFF") +fi + if is_windows; then CMAKE_ARGS+=("-DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake") CMAKE_ARGS+=("-DCMAKE_BUILD_TYPE=Release") else # Pass an externally provided toolchain (e.g. vcpkg for the SigV4 job) - # explicitly instead of relying on CMake >= 3.21 reading the env var. if [[ -n "${CMAKE_TOOLCHAIN_FILE:-}" ]]; then CMAKE_ARGS+=("-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") fi diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 8ee05b1d2..1430bb9cd 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -19,23 +19,49 @@ # third party libraries. set(ICEBERG_SYSTEM_DEPENDENCIES) set(ICEBERG_ARROW_INSTALL_INTERFACE_LIBS) - -if(ICEBERG_SIGV4) - set(ICEBERG_AWSSDK_SOURCE_RESOLVED "${ICEBERG_AWSSDK_SOURCE}") - if(ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "AUTO") - if(ICEBERG_S3) - set(ICEBERG_AWSSDK_SOURCE_RESOLVED "BUNDLED") - else() - set(ICEBERG_AWSSDK_SOURCE_RESOLVED "SYSTEM") - endif() +set(ICEBERG_AWSSDK_BUNDLED FALSE) +if(ICEBERG_S3 AND ICEBERG_BUNDLE_AWSSDK) + if(NOT ICEBERG_BUILD_BUNDLE) + message(FATAL_ERROR "ICEBERG_BUNDLE_AWSSDK requires ICEBERG_BUILD_BUNDLE to be ON") endif() - if(ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "BUNDLED" AND NOT ICEBERG_S3) - message(FATAL_ERROR "ICEBERG_AWSSDK_SOURCE=BUNDLED requires ICEBERG_S3=ON: " - "the bundled AWS SDK is provided by Arrow's S3 support.") + set(ICEBERG_AWSSDK_BUNDLED TRUE) +endif() + +set(ICEBERG_AWSSDK_COMPONENTS) +if(NOT ICEBERG_AWSSDK_BUNDLED) + if(ICEBERG_S3) + list(APPEND + ICEBERG_AWSSDK_COMPONENTS + core + config + s3 + transfer + identity-management + sts) + elseif(ICEBERG_SIGV4) + list(APPEND ICEBERG_AWSSDK_COMPONENTS core) endif() - message(STATUS "AWS SDK source for SigV4: ${ICEBERG_AWSSDK_SOURCE_RESOLVED}") endif() +# ---------------------------------------------------------------------- +# AWS SDK for C++ + +function(resolve_aws_sdk_dependency) + if(NOT ICEBERG_AWSSDK_COMPONENTS) + return() + endif() + find_package(AWSSDK REQUIRED COMPONENTS ${ICEBERG_AWSSDK_COMPONENTS}) + list(APPEND ICEBERG_SYSTEM_DEPENDENCIES AWSSDK) + set(ICEBERG_SYSTEM_DEPENDENCIES + ${ICEBERG_SYSTEM_DEPENDENCIES} + PARENT_SCOPE) + # Forwarded to find_dependency(AWSSDK ...) in iceberg-config.cmake.in so + # downstream installed builds load the same AWS SDK targets. + set(ICEBERG_FIND_EXTRA_ARGS_AWSSDK + "COMPONENTS;${ICEBERG_AWSSDK_COMPONENTS}" + PARENT_SCOPE) +endfunction() + # ---------------------------------------------------------------------- # Versions and URLs for toolchain builds # @@ -126,12 +152,10 @@ function(resolve_arrow_dependency) set(ARROW_RUNTIME_SIMD_LEVEL "NONE") set(ARROW_POSITION_INDEPENDENT_CODE ON) set(ARROW_DEPENDENCY_SOURCE "BUNDLED") - if(ICEBERG_S3 - AND ICEBERG_SIGV4 - AND ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "SYSTEM") + set(ARROW_WITH_ZLIB ON) + if(ICEBERG_S3 AND NOT ICEBERG_AWSSDK_BUNDLED) set(AWSSDK_SOURCE "SYSTEM") endif() - set(ARROW_WITH_ZLIB ON) set(ZLIB_SOURCE "SYSTEM") set(ARROW_VERBOSE_THIRDPARTY_BUILD OFF) set(CMAKE_CXX_STANDARD 20) @@ -641,6 +665,13 @@ resolve_nanoarrow_dependency() resolve_croaring_dependency() resolve_nlohmann_json_dependency() +if(ICEBERG_S3 OR ICEBERG_SIGV4) + if(ICEBERG_SIGV4 AND NOT ICEBERG_BUILD_REST) + message(FATAL_ERROR "ICEBERG_SIGV4 requires ICEBERG_BUILD_REST to be ON") + endif() + resolve_aws_sdk_dependency() +endif() + if(ICEBERG_BUILD_BUNDLE) resolve_arrow_dependency() resolve_avro_dependency() @@ -654,30 +685,3 @@ endif() if(ICEBERG_BUILD_SQL_CATALOG) resolve_sql_catalog_dependencies() endif() - -# ---------------------------------------------------------------------- -# AWS SDK for C++ - -function(resolve_aws_sdk_dependency) - if(ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "BUNDLED") - message(STATUS "SigV4 reuses Arrow's bundled AWS SDK (aws-cpp-sdk-core)") - return() - endif() - find_package(AWSSDK REQUIRED COMPONENTS core) - list(APPEND ICEBERG_SYSTEM_DEPENDENCIES AWSSDK) - set(ICEBERG_SYSTEM_DEPENDENCIES - ${ICEBERG_SYSTEM_DEPENDENCIES} - PARENT_SCOPE) - # Forwarded to find_dependency(AWSSDK ...) in iceberg-config.cmake.in so - # downstream installed builds load aws-cpp-sdk-core via AWSSDK_FIND_COMPONENTS. - set(ICEBERG_FIND_EXTRA_ARGS_AWSSDK - "COMPONENTS;core" - PARENT_SCOPE) -endfunction() - -if(ICEBERG_SIGV4) - if(NOT ICEBERG_BUILD_REST) - message(FATAL_ERROR "ICEBERG_SIGV4 requires ICEBERG_BUILD_REST to be ON") - endif() - resolve_aws_sdk_dependency() -endif() diff --git a/meson.options b/meson.options index 3300031a4..c53574889 100644 --- a/meson.options +++ b/meson.options @@ -44,8 +44,6 @@ option( value: 'disabled', ) -# Resolves a system-installed AWS SDK via its CMake config; the bundled-AWS -# path (ICEBERG_AWSSDK_SOURCE=BUNDLED) is CMake-only. option( 'sigv4', type: 'feature', diff --git a/src/iceberg/catalog/rest/CMakeLists.txt b/src/iceberg/catalog/rest/CMakeLists.txt index 98610d62c..b6438486a 100644 --- a/src/iceberg/catalog/rest/CMakeLists.txt +++ b/src/iceberg/catalog/rest/CMakeLists.txt @@ -23,13 +23,12 @@ set(ICEBERG_REST_SOURCES auth/auth_properties.cc auth/auth_session.cc auth/oauth2_util.cc - auth/sigv4_auth_manager.cc + auth/sigv4_manager.cc auth/token_refresh_scheduler.cc catalog_properties.cc endpoint.cc error_handlers.cc http_client.cc - http_request.cc json_serde.cc resource_paths.cc rest_catalog.cc @@ -58,7 +57,7 @@ list(APPEND if(ICEBERG_SIGV4) list(APPEND ICEBERG_REST_STATIC_BUILD_INTERFACE_LIBS aws-cpp-sdk-core) list(APPEND ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS aws-cpp-sdk-core) - if(ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "SYSTEM") + if(NOT ICEBERG_AWSSDK_BUNDLED) list(APPEND ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS aws-cpp-sdk-core) list(APPEND ICEBERG_REST_SHARED_INSTALL_INTERFACE_LIBS aws-cpp-sdk-core) endif() @@ -76,12 +75,16 @@ add_iceberg_lib(iceberg_rest SHARED_INSTALL_INTERFACE_LIBS ${ICEBERG_REST_SHARED_INSTALL_INTERFACE_LIBS}) -if(ICEBERG_SIGV4) - foreach(LIB iceberg_rest_static iceberg_rest_shared) - if(TARGET ${LIB}) - target_compile_definitions(${LIB} PUBLIC ICEBERG_SIGV4) +foreach(LIB iceberg_rest_static iceberg_rest_shared) + if(TARGET ${LIB}) + if(ICEBERG_SIGV4) + target_compile_definitions(${LIB} + PUBLIC "$") + else() + target_compile_definitions(${LIB} + PUBLIC "$") endif() - endforeach() -endif() + endif() +endforeach() iceberg_install_all_headers(iceberg/catalog/rest) diff --git a/src/iceberg/catalog/rest/auth/auth_manager.cc b/src/iceberg/catalog/rest/auth/auth_manager.cc index 10290489a..247c1d538 100644 --- a/src/iceberg/catalog/rest/auth/auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/auth_manager.cc @@ -38,7 +38,8 @@ Result> AuthManager::InitSession( } Result> AuthManager::ContextualSession( - [[maybe_unused]] const SessionContext& context, std::shared_ptr parent) { + [[maybe_unused]] const std::unordered_map& context, + std::shared_ptr parent) { // By default, return the parent session as-is return parent; } diff --git a/src/iceberg/catalog/rest/auth/auth_manager.h b/src/iceberg/catalog/rest/auth/auth_manager.h index e0acebab2..c192ecf2c 100644 --- a/src/iceberg/catalog/rest/auth/auth_manager.h +++ b/src/iceberg/catalog/rest/auth/auth_manager.h @@ -23,7 +23,6 @@ #include #include -#include "iceberg/catalog/rest/auth/session_context.h" #include "iceberg/catalog/rest/iceberg_rest_export.h" #include "iceberg/catalog/rest/type_fwd.h" #include "iceberg/result.h" @@ -71,12 +70,13 @@ class ICEBERG_REST_EXPORT AuthManager { /// This method is used by SessionCatalog to create sessions for different contexts /// (e.g., different users or tenants). /// - /// \param context Per-session properties and credentials. + /// \param context Context properties (e.g., user credentials, tenant info). /// \param parent Catalog session to inherit from or return as-is. /// \return A context-specific session, or the parent session if no context-specific /// session is needed, or an error if session creation fails. virtual Result> ContextualSession( - const SessionContext& context, std::shared_ptr parent); + const std::unordered_map& context, + std::shared_ptr parent); /// \brief Create or reuse a session scoped to a single table/view. /// diff --git a/src/iceberg/catalog/rest/auth/auth_properties.h b/src/iceberg/catalog/rest/auth/auth_properties.h index 745cb1ff3..a699569c1 100644 --- a/src/iceberg/catalog/rest/auth/auth_properties.h +++ b/src/iceberg/catalog/rest/auth/auth_properties.h @@ -59,6 +59,10 @@ class ICEBERG_REST_EXPORT AuthProperties : public ConfigBase { inline static const std::string kSigV4Enabled = "rest.sigv4-enabled"; inline static const std::string kSigV4DelegateAuthType = "rest.auth.sigv4.delegate-auth-type"; + + /// SigV4 signing region. If unset, SigV4 resolves the signing region from + /// AWS environment/profile configuration and fails if no region can be + /// resolved. inline static const std::string kSigV4SigningRegion = "rest.signing-region"; inline static const std::string kSigV4SigningName = "rest.signing-name"; inline static const std::string kSigV4SigningNameDefault = "execute-api"; diff --git a/src/iceberg/catalog/rest/auth/auth_session.cc b/src/iceberg/catalog/rest/auth/auth_session.cc index cea6bb1c9..545ee00b1 100644 --- a/src/iceberg/catalog/rest/auth/auth_session.cc +++ b/src/iceberg/catalog/rest/auth/auth_session.cc @@ -43,12 +43,11 @@ class DefaultAuthSession : public AuthSession { explicit DefaultAuthSession(std::unordered_map headers) : headers_(std::move(headers)) {} - Result Authenticate(const HttpRequest& request) override { - HttpRequest authenticated = request; + Result Authenticate(HttpRequest request) override { for (const auto& [key, value] : headers_) { - authenticated.headers.try_emplace(key, value); + request.headers.try_emplace(key, value); } - return authenticated; + return request; } private: @@ -78,13 +77,12 @@ class OAuth2AuthSession : public AuthSession, return session; } - Result Authenticate(const HttpRequest& request) override { - HttpRequest authenticated = request; + Result Authenticate(HttpRequest request) override { std::shared_lock lock(mutex_); for (const auto& [key, value] : headers_) { - authenticated.headers.try_emplace(key, value); + request.headers.try_emplace(key, value); } - return authenticated; + return request; } Status Close() override { return CloseImpl(); } diff --git a/src/iceberg/catalog/rest/auth/auth_session.h b/src/iceberg/catalog/rest/auth/auth_session.h index 5d09688de..3d0063a04 100644 --- a/src/iceberg/catalog/rest/auth/auth_session.h +++ b/src/iceberg/catalog/rest/auth/auth_session.h @@ -40,9 +40,9 @@ class ICEBERG_REST_EXPORT AuthSession { /// \brief Authenticate an outgoing HTTP request. /// - /// Returns a new request with authentication information (e.g., an - /// Authorization header) added. Implementations must be idempotent and must - /// not mutate the input request. + /// Returns a request with authentication information (e.g., an Authorization + /// header) added. Implementations must be idempotent. The request is passed + /// by value so callers can move request bodies into the authentication path. /// /// \param request The request to authenticate. /// \return The authenticated request on success, or one of: @@ -52,7 +52,7 @@ class ICEBERG_REST_EXPORT AuthSession { /// - NotAuthorized: Not authenticated (401) /// - IOError: Network or connection errors when reaching auth server /// - RestError: HTTP errors from authentication service - virtual Result Authenticate(const HttpRequest& request) = 0; + virtual Result Authenticate(HttpRequest request) = 0; /// \brief Close the session and release any resources. /// diff --git a/src/iceberg/catalog/rest/auth/aws_sdk.h b/src/iceberg/catalog/rest/auth/aws_sdk.h deleted file mode 100644 index b71006289..000000000 --- a/src/iceberg/catalog/rest/auth/aws_sdk.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#pragma once - -#include "iceberg/catalog/rest/iceberg_rest_export.h" -#include "iceberg/result.h" - -/// \file iceberg/catalog/rest/auth/aws_sdk.h -/// \brief Process-wide AWS SDK lifecycle for SigV4 authentication. -/// -/// Applications using SigV4 should call InitializeAwsSdk() at startup and -/// FinalizeAwsSdk() before exit. If never called, the SDK is lazily -/// initialized on first SigV4 use and leaked at process exit. FinalizeAwsSdk() -/// is intended for process-shutdown sequencing, not concurrent teardown. - -namespace iceberg::rest::auth { - -/// \brief Initialize the AWS SDK. Idempotent. -ICEBERG_REST_EXPORT Status InitializeAwsSdk(); - -/// \brief Shut down the AWS SDK. Refuses if any SigV4 sessions are alive. -ICEBERG_REST_EXPORT Status FinalizeAwsSdk(); - -ICEBERG_REST_EXPORT bool IsAwsSdkInitialized(); -ICEBERG_REST_EXPORT bool IsAwsSdkFinalized(); - -} // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/meson.build b/src/iceberg/catalog/rest/auth/meson.build index 954bc4d02..4de6e821e 100644 --- a/src/iceberg/catalog/rest/auth/meson.build +++ b/src/iceberg/catalog/rest/auth/meson.build @@ -21,9 +21,7 @@ install_headers( 'auth_managers.h', 'auth_properties.h', 'auth_session.h', - 'aws_sdk.h', 'oauth2_util.h', - 'session_context.h', ], subdir: 'iceberg/catalog/rest/auth', ) diff --git a/src/iceberg/catalog/rest/auth/session_context.h b/src/iceberg/catalog/rest/auth/session_context.h deleted file mode 100644 index 069eeb95d..000000000 --- a/src/iceberg/catalog/rest/auth/session_context.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#pragma once - -#include -#include - -#include "iceberg/catalog/rest/iceberg_rest_export.h" - -namespace iceberg::rest::auth { - -/// \brief Per-session context passed to AuthManager::ContextualSession. -/// -/// Mirrors Java's `SessionCatalog.SessionContext`. Separate `properties` and -/// `credentials` so per-context credential overrides don't silently collapse -/// into properties. -struct ICEBERG_REST_EXPORT SessionContext { - std::unordered_map properties; - std::unordered_map credentials; -}; - -} // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h b/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h index cc0d1a1ad..a4f42875d 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h +++ b/src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h @@ -38,6 +38,21 @@ class AWSAuthV4Signer; namespace iceberg::rest::auth { +/// \brief Initialize the AWS SDK for SigV4 use. Idempotent. +/// +/// Normal REST SigV4 users do not need to call this. SigV4 sessions lazily +/// initialize the SDK when needed. This hook exists for tests and for explicit +/// process-shutdown sequencing when an embedding application needs it. +ICEBERG_REST_EXPORT Status InitializeAwsSdk(); + +/// \brief Shut down the SigV4-owned AWS SDK lifecycle. +/// +/// Refuses if any SigV4 sessions are alive. +ICEBERG_REST_EXPORT Status FinalizeAwsSdk(); + +ICEBERG_REST_EXPORT bool IsAwsSdkInitialized(); +ICEBERG_REST_EXPORT bool IsAwsSdkFinalized(); + /// \brief An AuthSession that signs requests with AWS SigV4. /// /// The request is first authenticated by the delegate AuthSession (e.g., OAuth2), @@ -69,7 +84,7 @@ class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession { ~SigV4AuthSession() override; - Result Authenticate(const HttpRequest& request) override; + Result Authenticate(HttpRequest request) override; Status Close() override; @@ -111,7 +126,8 @@ class ICEBERG_REST_EXPORT SigV4AuthManager : public AuthManager { const std::unordered_map& properties) override; Result> ContextualSession( - const SessionContext& context, std::shared_ptr parent) override; + const std::unordered_map& context, + std::shared_ptr parent) override; Result> TableSession( const TableIdentifier& table, @@ -121,18 +137,10 @@ class ICEBERG_REST_EXPORT SigV4AuthManager : public AuthManager { Status Close() override; private: - static Result> - MakeCredentialsProvider(const std::unordered_map& properties); - static Result ResolveSigningRegion( - const std::unordered_map& properties); - static std::string ResolveSigningName( - const std::unordered_map& properties); - /// \param reuse_credentials If non-null and `properties` has no explicit - /// access keys, this provider is reused instead of building a new one. Result> WrapSession( std::shared_ptr delegate_session, const std::unordered_map& properties, - std::shared_ptr reuse_credentials = nullptr); + std::shared_ptr credentials_provider); std::unique_ptr delegate_; std::unordered_map catalog_properties_; diff --git a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc b/src/iceberg/catalog/rest/auth/sigv4_manager.cc similarity index 73% rename from src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc rename to src/iceberg/catalog/rest/auth/sigv4_manager.cc index 0b801ddd9..b3cb4dbd4 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_manager.cc @@ -18,14 +18,17 @@ */ #include "iceberg/catalog/rest/auth/auth_manager_internal.h" -#include "iceberg/catalog/rest/auth/aws_sdk.h" #include "iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h" +#include "iceberg/result.h" -#ifdef ICEBERG_SIGV4 +#if ICEBERG_SIGV4_ENABLED # include +# include # include # include +# include +# include # include # include @@ -47,6 +50,8 @@ namespace iceberg::rest::auth { namespace { +constexpr std::string_view kAmzContentSha256Header = "x-amz-content-sha256"; + class AwsSdkLifecycle { public: static AwsSdkLifecycle& Instance() { @@ -157,6 +162,113 @@ class RestSigV4Signer : public Aws::Client::AWSAuthV4Signer { } }; +// TODO(sigv4): support loading a custom AWSCredentialsProvider via a class +// name property, matching Java's AwsProperties.restCredentialsProvider(). +Result> MakeCredentialsProvider( + const std::unordered_map& properties) { + auto access_key_it = properties.find(AuthProperties::kSigV4AccessKeyId); + auto secret_key_it = properties.find(AuthProperties::kSigV4SecretAccessKey); + auto session_token_it = properties.find(AuthProperties::kSigV4SessionToken); + bool has_ak = access_key_it != properties.end() && !access_key_it->second.empty(); + bool has_sk = secret_key_it != properties.end() && !secret_key_it->second.empty(); + bool has_token = + session_token_it != properties.end() && !session_token_it->second.empty(); + + ICEBERG_PRECHECK( + has_ak == has_sk, "Both '{}' and '{}' must be set together, or neither", + AuthProperties::kSigV4AccessKeyId, AuthProperties::kSigV4SecretAccessKey); + ICEBERG_PRECHECK(!has_token || (has_ak && has_sk), + "'{}' requires both '{}' and '{}' to be set", + AuthProperties::kSigV4SessionToken, AuthProperties::kSigV4AccessKeyId, + AuthProperties::kSigV4SecretAccessKey); + + if (has_ak) { + Aws::Auth::AWSCredentials credentials(access_key_it->second.c_str(), + secret_key_it->second.c_str()); + if (has_token) { + credentials.SetSessionToken(session_token_it->second.c_str()); + } + return std::make_shared(credentials); + } + + return std::make_shared(); +} + +Result ResolveSigningRegion( + const std::unordered_map& properties) { + if (auto it = properties.find(AuthProperties::kSigV4SigningRegion); + it != properties.end() && !it->second.empty()) { + return it->second; + } + // Resolve from env then the shared config profile, otherwise fail. + // If this becomes expensive, cache it at the catalog/AuthManager scope or + // introduce an AwsProperties-like object as Java does. + Aws::String region = Aws::Environment::GetEnv("AWS_REGION"); + if (region.empty()) { + region = Aws::Environment::GetEnv("AWS_DEFAULT_REGION"); + } + if (region.empty()) { + const auto& profiles = Aws::Config::GetCachedConfigProfiles(); + if (auto it = profiles.find(Aws::Auth::GetConfigProfileName()); + it != profiles.end()) { + region = it->second.GetRegion(); + } + } + if (region.empty()) { + return InvalidArgument( + "SigV4: could not resolve a signing region; set the '{}' property or the " + "AWS_REGION environment variable", + AuthProperties::kSigV4SigningRegion); + } + return std::string(region.c_str()); +} + +std::string ResolveSigningName( + const std::unordered_map& properties) { + if (auto it = properties.find(AuthProperties::kSigV4SigningName); + it != properties.end() && !it->second.empty()) { + return it->second; + } + return AuthProperties::kSigV4SigningNameDefault; +} + +bool HasSigV4CredentialOverride( + const std::unordered_map& properties) { + return properties.contains(AuthProperties::kSigV4AccessKeyId) || + properties.contains(AuthProperties::kSigV4SecretAccessKey) || + properties.contains(AuthProperties::kSigV4SessionToken); +} + +Result> ResolveCredentialsProvider( + const std::unordered_map& properties, + std::shared_ptr reuse_credentials = nullptr) { + if (reuse_credentials && !HasSigV4CredentialOverride(properties)) { + return reuse_credentials; + } + return MakeCredentialsProvider(properties); +} + +template +class ScopeExit { + public: + explicit ScopeExit(Fn fn) : fn_(std::move(fn)) {} + ScopeExit(ScopeExit&& other) noexcept + : fn_(std::move(other.fn_)), active_(other.active_) { + other.active_ = false; + } + ScopeExit(const ScopeExit&) = delete; + ScopeExit& operator=(const ScopeExit&) = delete; + ScopeExit& operator=(ScopeExit&&) = delete; + ~ScopeExit() { + if (active_) fn_(); + } + void Cancel() noexcept { active_ = false; } + + private: + Fn fn_; + bool active_ = true; +}; + } // namespace // ---- SigV4AuthSession ---- @@ -174,8 +286,9 @@ SigV4AuthSession::SigV4AuthSession( SigV4AuthSession::~SigV4AuthSession() { AwsSdkLifecycle::Instance().UnregisterSession(); } -Result SigV4AuthSession::Authenticate(const HttpRequest& request) { - ICEBERG_ASSIGN_OR_RAISE(auto delegate_request, delegate_->Authenticate(request)); +Result SigV4AuthSession::Authenticate(HttpRequest request) { + ICEBERG_ASSIGN_OR_RAISE(auto delegate_request, + delegate_->Authenticate(std::move(request))); const auto& original_headers = delegate_request.headers; std::unordered_map signing_headers; @@ -194,57 +307,49 @@ Result SigV4AuthSession::Authenticate(const HttpRequest& request) { aws_request->SetHeaderValue(Aws::String(name.c_str()), Aws::String(value.c_str())); } - // Java parity: for non-empty bodies the signed x-amz-content-sha256 header - // carries Base64(SHA256(body)) — matching the Java client's - // SignerChecksumParams behavior — while the canonical request's payload hash - // line remains lowercase hex per SigV4. Empty bodies use the hex - // EMPTY_BODY_SHA256 constant (workaround for the signer computing an invalid - // checksum on empty bodies). + // Empty bodies use the hex SHA256 constant; non-empty bodies use + // Base64(SHA256(body)). This matches Java RESTSigV4AuthSession behavior. if (delegate_request.body.empty()) { - aws_request->SetHeaderValue("x-amz-content-sha256", Aws::String(kEmptyBodySha256)); + aws_request->SetHeaderValue(Aws::String(kAmzContentSha256Header), + Aws::String(kEmptyBodySha256)); } else { auto body_stream = Aws::MakeShared("SigV4Body", delegate_request.body); aws_request->AddContentBody(body_stream); auto sha256 = Aws::Utils::HashingUtils::CalculateSHA256( Aws::String(delegate_request.body.data(), delegate_request.body.size())); - aws_request->SetHeaderValue("x-amz-content-sha256", + aws_request->SetHeaderValue(Aws::String(kAmzContentSha256Header), Aws::Utils::HashingUtils::Base64Encode(sha256)); } if (!signer_->SignRequest(*aws_request)) { - return std::unexpected(Error{.kind = ErrorKind::kAuthenticationFailed, - .message = "SigV4 signing failed"}); + return AuthenticationFailed("AWS SigV4 request signing failed"); } - // Build a case-insensitive index of original headers once so the outer - // loop over signed headers below is O(N + M) instead of O(N * M). - std::unordered_map> originals_by_name; + // Build a case-insensitive view of original headers so signer-added headers + // can be compared without lowercasing or copying the originals. + std::map + originals_by_name; for (const auto& [orig_name, orig_value] : original_headers) { - originals_by_name[StringUtils::ToLower(orig_name)].push_back(&orig_value); + originals_by_name.emplace(orig_name, orig_value); } HttpRequest signed_request{.method = delegate_request.method, .url = std::move(delegate_request.url), .headers = {}, .body = std::move(delegate_request.body)}; - signed_request.headers.reserve(aws_request->GetHeaders().size() + - original_headers.size()); for (const auto& [aws_name, aws_value] : aws_request->GetHeaders()) { std::string name(aws_name.c_str(), aws_name.size()); std::string value(aws_value.c_str(), aws_value.size()); - if (auto it = originals_by_name.find(StringUtils::ToLower(name)); + if (auto it = originals_by_name.find(std::string_view(name)); it != originals_by_name.end()) { - // Preserve every original entry with this name whose value the signer - // didn't produce, matching Java updateRequestHeaders. - for (const auto* orig_value : it->second) { - if (*orig_value != value) { - signed_request.headers.add(std::string(kRelocatedHeaderPrefix) + name, - *orig_value); - } + // Preserve the original value when the signer overwrites a header. + if (it->second != std::string_view(value)) { + signed_request.headers.try_emplace(std::string(kRelocatedHeaderPrefix) + name, + std::string(it->second)); } } - signed_request.headers.add(std::move(name), std::move(value)); + signed_request.headers.insert_or_assign(std::move(name), std::move(value)); } return signed_request; @@ -265,7 +370,8 @@ Result> SigV4AuthManager::InitSession( ICEBERG_RETURN_UNEXPECTED(AwsSdkLifecycle::Instance().EnsureInitialized()); ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->InitSession(init_client, properties)); - return WrapSession(std::move(delegate_session), properties); + ICEBERG_ASSIGN_OR_RAISE(auto credentials, ResolveCredentialsProvider(properties)); + return WrapSession(std::move(delegate_session), properties, std::move(credentials)); } Result> SigV4AuthManager::CatalogSession( @@ -275,14 +381,13 @@ Result> SigV4AuthManager::CatalogSession( catalog_properties_ = properties; ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->CatalogSession(shared_client, properties)); - return WrapSession(std::move(delegate_session), properties); + ICEBERG_ASSIGN_OR_RAISE(auto credentials, ResolveCredentialsProvider(properties)); + return WrapSession(std::move(delegate_session), properties, std::move(credentials)); } -// Both derived sessions merge against the stored catalog_properties_, so -// contextual overrides do not propagate into child table sessions. - Result> SigV4AuthManager::ContextualSession( - const SessionContext& context, std::shared_ptr parent) { + const std::unordered_map& context, + std::shared_ptr parent) { auto sigv4_parent = std::dynamic_pointer_cast(std::move(parent)); ICEBERG_PRECHECK(sigv4_parent != nullptr, "SigV4AuthManager parent must be a SigV4AuthSession"); @@ -290,12 +395,11 @@ Result> SigV4AuthManager::ContextualSession( ICEBERG_ASSIGN_OR_RAISE(auto delegate_session, delegate_->ContextualSession( context, sigv4_parent->delegate())); - // Merge context.credentials into properties so credential overrides aren't - // dropped. - auto merged = MergeProperties(catalog_properties_, - MergeProperties(context.properties, context.credentials)); - return WrapSession(std::move(delegate_session), merged, - sigv4_parent->credentials_provider()); + auto merged = MergeProperties(catalog_properties_, context); + ICEBERG_ASSIGN_OR_RAISE( + auto credentials, + ResolveCredentialsProvider(context, sigv4_parent->credentials_provider())); + return WrapSession(std::move(delegate_session), merged, std::move(credentials)); } Result> SigV4AuthManager::TableSession( @@ -311,147 +415,43 @@ Result> SigV4AuthManager::TableSession( delegate_->TableSession(table, properties, sigv4_parent->delegate())); auto merged = MergeProperties(catalog_properties_, properties); - return WrapSession(std::move(delegate_session), merged, - sigv4_parent->credentials_provider()); + ICEBERG_ASSIGN_OR_RAISE( + auto credentials, + ResolveCredentialsProvider(properties, sigv4_parent->credentials_provider())); + return WrapSession(std::move(delegate_session), merged, std::move(credentials)); } Status SigV4AuthManager::Close() { return delegate_->Close(); } -// TODO(sigv4): support loading a custom AWSCredentialsProvider via a class -// name property, matching Java's AwsProperties.restCredentialsProvider(). -Result> -SigV4AuthManager::MakeCredentialsProvider( - const std::unordered_map& properties) { - auto access_key_it = properties.find(AuthProperties::kSigV4AccessKeyId); - auto secret_key_it = properties.find(AuthProperties::kSigV4SecretAccessKey); - bool has_ak = access_key_it != properties.end() && !access_key_it->second.empty(); - bool has_sk = secret_key_it != properties.end() && !secret_key_it->second.empty(); - - ICEBERG_PRECHECK( - has_ak == has_sk, "Both '{}' and '{}' must be set together, or neither", - AuthProperties::kSigV4AccessKeyId, AuthProperties::kSigV4SecretAccessKey); - - if (has_ak) { - Aws::Auth::AWSCredentials credentials(access_key_it->second.c_str(), - secret_key_it->second.c_str()); - auto session_token_it = properties.find(AuthProperties::kSigV4SessionToken); - if (session_token_it != properties.end() && !session_token_it->second.empty()) { - credentials.SetSessionToken(session_token_it->second.c_str()); - } - return std::make_shared(credentials); - } - - return std::make_shared(); -} - -Result SigV4AuthManager::ResolveSigningRegion( - const std::unordered_map& properties) { - if (auto it = properties.find(AuthProperties::kSigV4SigningRegion); - it != properties.end() && !it->second.empty()) { - return it->second; - } - // Resolve from env then the shared config profile (skip IMDS — it can block - // on non-EC2 hosts), and fail rather than silently defaulting to us-east-1. - // Resolved once per process. - static const std::string kResolvedRegion = []() -> std::string { - Aws::String region = Aws::Environment::GetEnv("AWS_REGION"); - if (region.empty()) { - region = Aws::Environment::GetEnv("AWS_DEFAULT_REGION"); - } - if (region.empty()) { - const auto& profiles = Aws::Config::GetCachedConfigProfiles(); - if (auto it = profiles.find(Aws::Auth::GetConfigProfileName()); - it != profiles.end()) { - region = it->second.GetRegion(); - } - } - return std::string(region.c_str()); - }(); - if (kResolvedRegion.empty()) { - return InvalidArgument( - "SigV4: could not resolve a signing region; set the '{}' property or the " - "AWS_REGION environment variable", - AuthProperties::kSigV4SigningRegion); - } - return kResolvedRegion; -} - -std::string SigV4AuthManager::ResolveSigningName( - const std::unordered_map& properties) { - if (auto it = properties.find(AuthProperties::kSigV4SigningName); - it != properties.end() && !it->second.empty()) { - return it->second; - } - return AuthProperties::kSigV4SigningNameDefault; -} - -namespace { - -// RAII guard so any throw between RegisterSession() and the successful -// SigV4AuthSession construction unwinds the session count. -class SessionSlot { - public: - static Result Reserve() { - ICEBERG_RETURN_UNEXPECTED(AwsSdkLifecycle::Instance().RegisterSession()); - return SessionSlot{}; - } - SessionSlot(SessionSlot&& other) noexcept : armed_(other.armed_) { - other.armed_ = false; - } - SessionSlot& operator=(SessionSlot&&) = delete; - ~SessionSlot() { - if (armed_) AwsSdkLifecycle::Instance().UnregisterSession(); - } - void Release() noexcept { armed_ = false; } - - private: - SessionSlot() = default; - bool armed_ = true; -}; - -} // namespace - Result> SigV4AuthSession::Make( std::shared_ptr delegate, std::string signing_region, std::string signing_name, std::shared_ptr credentials_provider) { - ICEBERG_ASSIGN_OR_RAISE(auto slot, SessionSlot::Reserve()); + ICEBERG_RETURN_UNEXPECTED(AwsSdkLifecycle::Instance().RegisterSession()); + ScopeExit unregister_on_failure( + [] { AwsSdkLifecycle::Instance().UnregisterSession(); }); auto session = std::shared_ptr( new SigV4AuthSession(std::move(delegate), std::move(signing_region), std::move(signing_name), std::move(credentials_provider))); // The session's destructor now owns the unregister. - slot.Release(); + unregister_on_failure.Cancel(); return session; } Result> SigV4AuthManager::WrapSession( std::shared_ptr delegate_session, const std::unordered_map& properties, - std::shared_ptr reuse_credentials) { + std::shared_ptr credentials) { ICEBERG_ASSIGN_OR_RAISE(auto region, ResolveSigningRegion(properties)); auto service = ResolveSigningName(properties); - // Reuse the parent's provider unless properties override keys, avoiding a - // fresh DefaultAWSCredentialsProviderChain (can hit IMDS) per derivation. - auto explicit_keys = properties.find(AuthProperties::kSigV4AccessKeyId); - bool has_explicit_keys = - explicit_keys != properties.end() && !explicit_keys->second.empty(); - std::shared_ptr credentials; - if (reuse_credentials && !has_explicit_keys) { - credentials = std::move(reuse_credentials); - } else { - ICEBERG_ASSIGN_OR_RAISE(credentials, MakeCredentialsProvider(properties)); - } // Fail fast when the provider cannot resolve credentials (e.g. an empty // default chain) instead of sending an effectively unsigned request later. if (credentials->GetAWSCredentials().IsEmpty()) { - return std::unexpected( - Error{.kind = ErrorKind::kAuthenticationFailed, - .message = "SigV4: AWS credentials provider returned empty credentials; " - "set '" + - AuthProperties::kSigV4AccessKeyId + "' and '" + - AuthProperties::kSigV4SecretAccessKey + - "' or configure the AWS credentials chain"}); + return AuthenticationFailed( + "SigV4: AWS credentials provider returned empty credentials; set '{}' and '{}' " + "or configure the AWS credentials chain", + AuthProperties::kSigV4AccessKeyId, AuthProperties::kSigV4SecretAccessKey); } ICEBERG_ASSIGN_OR_RAISE( auto session, SigV4AuthSession::Make(std::move(delegate_session), std::move(region), @@ -493,7 +493,7 @@ bool IsAwsSdkFinalized() { return AwsSdkLifecycle::Instance().IsFinalized(); } } // namespace iceberg::rest::auth -#else // !ICEBERG_SIGV4 +#else // !ICEBERG_SIGV4_ENABLED namespace iceberg::rest::auth { @@ -517,4 +517,4 @@ bool IsAwsSdkFinalized() { return false; } } // namespace iceberg::rest::auth -#endif // ICEBERG_SIGV4 +#endif // ICEBERG_SIGV4_ENABLED diff --git a/src/iceberg/catalog/rest/http_client.cc b/src/iceberg/catalog/rest/http_client.cc index 2fb29e82f..609116eb8 100644 --- a/src/iceberg/catalog/rest/http_client.cc +++ b/src/iceberg/catalog/rest/http_client.cc @@ -75,7 +75,6 @@ HttpHeaders MergeHeaders( const std::unordered_map& default_headers, const std::unordered_map& request_headers) { HttpHeaders merged; - merged.reserve(default_headers.size() + request_headers.size()); for (const auto& [key, val] : default_headers) { merged.try_emplace(key, val); } @@ -90,11 +89,18 @@ cpr::Header ToCprHeader(const HttpRequest& request) { } /// \brief Append URL-encoded query parameters to a URL, sorted by key. -/// \param base_url must not already contain a query string ('?' or '&'). +/// \param base_url must not already contain a query string. Callers pass query +/// parameters separately so authentication signs one unambiguous final URL. Result AppendQueryString( const std::string& base_url, const std::unordered_map& params) { if (params.empty()) return base_url; + if (base_url.find('?') != std::string::npos) { + return InvalidArgument( + "HttpClient base URL must not contain a query string when query parameters " + "are passed separately: {}", + base_url); + } std::map sorted(params.begin(), params.end()); std::string url = base_url + "?"; bool first = true; @@ -108,6 +114,15 @@ Result AppendQueryString( return url; } +Result AuthenticateRequest(auth::AuthSession& session, HttpMethod method, + std::string url, HttpHeaders headers, + std::string body = "") { + return session.Authenticate({.method = method, + .url = std::move(url), + .headers = std::move(headers), + .body = std::move(body)}); +} + /// \brief Checks if the HTTP status code indicates a successful response. bool IsSuccessful(int32_t status_code) { return status_code == 200 // OK @@ -167,12 +182,9 @@ Result HttpClient::Get( const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE(auto url, AppendQueryString(path, params)); - ICEBERG_ASSIGN_OR_RAISE( - auto authenticated, - session.Authenticate({.method = HttpMethod::kGet, - .url = std::move(url), - .headers = MergeHeaders(default_headers_, headers), - .body = ""})); + ICEBERG_ASSIGN_OR_RAISE(auto authenticated, + AuthenticateRequest(session, HttpMethod::kGet, std::move(url), + MergeHeaders(default_headers_, headers))); cpr::Response response = cpr::Get(cpr::Url{authenticated.url}, ToCprHeader(authenticated), *connection_pool_); @@ -188,10 +200,8 @@ Result HttpClient::Post( const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE( auto authenticated, - session.Authenticate({.method = HttpMethod::kPost, - .url = path, - .headers = MergeHeaders(default_headers_, headers), - .body = body})); + AuthenticateRequest(session, HttpMethod::kPost, path, + MergeHeaders(default_headers_, headers), body)); cpr::Response response = cpr::Post(cpr::Url{authenticated.url}, cpr::Body{authenticated.body}, ToCprHeader(authenticated), *connection_pool_); @@ -219,10 +229,9 @@ Result HttpClient::PostForm( cpr::Payload(pair_list.begin(), pair_list.end()).GetContent(); ICEBERG_ASSIGN_OR_RAISE( auto authenticated, - session.Authenticate({.method = HttpMethod::kPost, - .url = path, - .headers = MergeHeaders(default_headers_, form_headers), - .body = std::move(encoded_body)})); + AuthenticateRequest(session, HttpMethod::kPost, path, + MergeHeaders(default_headers_, form_headers), + std::move(encoded_body))); cpr::Response response = cpr::Post(cpr::Url{authenticated.url}, cpr::Body{authenticated.body}, ToCprHeader(authenticated), *connection_pool_); @@ -236,12 +245,9 @@ Result HttpClient::PostForm( Result HttpClient::Head( const std::string& path, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { - ICEBERG_ASSIGN_OR_RAISE( - auto authenticated, - session.Authenticate({.method = HttpMethod::kHead, - .url = path, - .headers = MergeHeaders(default_headers_, headers), - .body = ""})); + ICEBERG_ASSIGN_OR_RAISE(auto authenticated, + AuthenticateRequest(session, HttpMethod::kHead, path, + MergeHeaders(default_headers_, headers))); cpr::Response response = cpr::Head(cpr::Url{authenticated.url}, ToCprHeader(authenticated), *connection_pool_); @@ -258,10 +264,8 @@ Result HttpClient::Delete( ICEBERG_ASSIGN_OR_RAISE(auto url, AppendQueryString(path, params)); ICEBERG_ASSIGN_OR_RAISE( auto authenticated, - session.Authenticate({.method = HttpMethod::kDelete, - .url = std::move(url), - .headers = MergeHeaders(default_headers_, headers), - .body = ""})); + AuthenticateRequest(session, HttpMethod::kDelete, std::move(url), + MergeHeaders(default_headers_, headers))); cpr::Response response = cpr::Delete(cpr::Url{authenticated.url}, ToCprHeader(authenticated), *connection_pool_); diff --git a/src/iceberg/catalog/rest/http_request.cc b/src/iceberg/catalog/rest/http_request.cc deleted file mode 100644 index 4b4880e4c..000000000 --- a/src/iceberg/catalog/rest/http_request.cc +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include "iceberg/catalog/rest/http_request.h" - -#include -#include - -#include "iceberg/util/string_util.h" - -namespace iceberg::rest { - -HttpHeaders::iterator HttpHeaders::find(std::string_view name) { - return std::ranges::find_if(entries_, [name](const Entry& e) { - return StringUtils::EqualsIgnoreCase(e.first, name); - }); -} - -HttpHeaders::const_iterator HttpHeaders::find(std::string_view name) const { - return std::ranges::find_if(entries_, [name](const Entry& e) { - return StringUtils::EqualsIgnoreCase(e.first, name); - }); -} - -std::string& HttpHeaders::at(std::string_view name) { - auto it = find(name); - if (it == entries_.end()) { - throw std::out_of_range("HttpHeaders::at: no header named '" + std::string(name) + - "'"); - } - return it->second; -} - -const std::string& HttpHeaders::at(std::string_view name) const { - auto it = find(name); - if (it == entries_.end()) { - throw std::out_of_range("HttpHeaders::at: no header named '" + std::string(name) + - "'"); - } - return it->second; -} - -std::string& HttpHeaders::operator[](std::string_view name) { - auto it = find(name); - if (it == entries_.end()) { - entries_.emplace_back(std::string(name), std::string{}); - return entries_.back().second; - } - return it->second; -} - -void HttpHeaders::try_emplace(std::string name, std::string value) { - if (find(name) == entries_.end()) { - entries_.emplace_back(std::move(name), std::move(value)); - } -} - -std::size_t HttpHeaders::erase(std::string_view name) { - auto removed = std::ranges::remove_if(entries_, [name](const Entry& e) { - return StringUtils::EqualsIgnoreCase(e.first, name); - }); - std::size_t count = removed.size(); - entries_.erase(removed.begin(), removed.end()); - return count; -} - -} // namespace iceberg::rest diff --git a/src/iceberg/catalog/rest/http_request.h b/src/iceberg/catalog/rest/http_request.h index 1238d34d5..47419c361 100644 --- a/src/iceberg/catalog/rest/http_request.h +++ b/src/iceberg/catalog/rest/http_request.h @@ -19,13 +19,13 @@ #pragma once +#include +#include +#include #include -#include -#include +#include #include #include -#include -#include #include "iceberg/catalog/rest/iceberg_rest_export.h" @@ -51,70 +51,36 @@ constexpr std::string_view ToString(HttpMethod method) { return "UNKNOWN"; } -/// \brief Ordered collection of HTTP headers preserving repeated values. +/// \brief Case-insensitive ordering for HTTP header names. /// -/// Name comparison is case-insensitive (RFC 7230), insertion order is -/// preserved, and multiple entries with the same name coexist. The map-style -/// methods (`operator[]`, `at`, `try_emplace`, `find`) act on the *first* -/// matching entry; `add` appends a new entry even when the name already -/// exists. Not thread-safe. `add`, `try_emplace`, `operator[]` (when -/// inserting) and `erase` invalidate iterators. -class ICEBERG_REST_EXPORT HttpHeaders { - public: - using Entry = std::pair; - using container_type = std::vector; - using iterator = container_type::iterator; - using const_iterator = container_type::const_iterator; - - HttpHeaders() = default; - HttpHeaders(std::initializer_list init) : entries_(init) {} - - iterator begin() noexcept { return entries_.begin(); } - iterator end() noexcept { return entries_.end(); } - const_iterator begin() const noexcept { return entries_.begin(); } - const_iterator end() const noexcept { return entries_.end(); } - const_iterator cbegin() const noexcept { return entries_.cbegin(); } - const_iterator cend() const noexcept { return entries_.cend(); } - - bool empty() const noexcept { return entries_.empty(); } - std::size_t size() const noexcept { return entries_.size(); } - void clear() noexcept { entries_.clear(); } - void reserve(std::size_t n) { entries_.reserve(n); } - - /// \brief Case-insensitive lookup. Returns iterator to the first entry whose - /// name matches, or end() if none. - iterator find(std::string_view name); - const_iterator find(std::string_view name) const; - - bool contains(std::string_view name) const { return find(name) != end(); } - - /// \brief Returns the value of the first entry with the given name. - /// Throws std::out_of_range if none. - std::string& at(std::string_view name); - const std::string& at(std::string_view name) const; - - /// \brief Map-like upsert: returns reference to the first matching entry's - /// value, inserting a new entry with empty value if none exists. - std::string& operator[](std::string_view name); - - /// \brief Insert only if no entry with the same name exists. - void try_emplace(std::string name, std::string value); - - /// \brief Append an entry, preserving any existing entries with the same - /// name. Use this when repeated headers must survive (e.g. multiple - /// Set-Cookie values). - void add(std::string name, std::string value) { - entries_.emplace_back(std::move(name), std::move(value)); +/// HTTP header names are case-insensitive. This comparator also matches +/// cpr::Header's single-value map model. +struct CaseInsensitiveHeaderLess { + using is_transparent = void; + + bool operator()(std::string_view lhs, std::string_view rhs) const noexcept { + const auto min_size = lhs.size() < rhs.size() ? lhs.size() : rhs.size(); + for (std::size_t i = 0; i < min_size; ++i) { + auto left = static_cast(lhs[i]); + auto right = static_cast(rhs[i]); + const int lower_left = std::tolower(left); + const int lower_right = std::tolower(right); + if (lower_left < lower_right) return true; + if (lower_left > lower_right) return false; + } + return lhs.size() < rhs.size(); } - - /// \brief Remove all entries with the given name (case-insensitive). Returns - /// the number of entries removed. - std::size_t erase(std::string_view name); - - private: - container_type entries_; }; +/// \brief Single-value HTTP headers with case-insensitive names. +/// +/// Repeated outgoing headers are intentionally not represented here. The +/// SigV4 path signs headers through the AWS SDK request model, and the final +/// transport uses cpr::Header; both are single-value, map-like containers that +/// fold duplicate names. Keeping the REST request model single-value avoids +/// exposing repeated-header behavior that cannot survive signing or transport. +using HttpHeaders = std::map; + /// \brief An outgoing HTTP request. Mirrors Java's HttpRequest so signing /// implementations like SigV4 see method, url, headers, and body together. struct ICEBERG_REST_EXPORT HttpRequest { diff --git a/src/iceberg/catalog/rest/meson.build b/src/iceberg/catalog/rest/meson.build index dad220013..48254614f 100644 --- a/src/iceberg/catalog/rest/meson.build +++ b/src/iceberg/catalog/rest/meson.build @@ -26,7 +26,6 @@ iceberg_rest_sources = files( 'endpoint.cc', 'error_handlers.cc', 'http_client.cc', - 'http_request.cc', 'json_serde.cc', 'resource_paths.cc', 'rest_catalog.cc', @@ -42,7 +41,7 @@ cpr_needs_static = ( ) cpr_dep = dependency('cpr', static: cpr_needs_static) -iceberg_rest_sources += files('auth/sigv4_auth_manager.cc') +iceberg_rest_sources += files('auth/sigv4_manager.cc') iceberg_rest_build_deps = [iceberg_dep, cpr_dep] iceberg_rest_compile_defs = [] @@ -57,7 +56,9 @@ aws_sdk_core_dep = dependency( ) if aws_sdk_core_dep.found() iceberg_rest_build_deps += aws_sdk_core_dep - iceberg_rest_compile_defs += '-DICEBERG_SIGV4' + iceberg_rest_compile_defs += '-DICEBERG_SIGV4_ENABLED=1' +else + iceberg_rest_compile_defs += '-DICEBERG_SIGV4_ENABLED=0' endif iceberg_rest_lib = library( diff --git a/src/iceberg/catalog/rest/rest_catalog.cc b/src/iceberg/catalog/rest/rest_catalog.cc index b43ea4983..6472adc4b 100644 --- a/src/iceberg/catalog/rest/rest_catalog.cc +++ b/src/iceberg/catalog/rest/rest_catalog.cc @@ -118,56 +118,14 @@ Result CaptureNoSuchNamespace(const auto& status) { return CaptureNoSuchObject(status, ErrorKind::kNoSuchNamespace); } -std::string TableSessionKey(const TableIdentifier& identifier) { - std::string key; - for (const auto& level : identifier.ns.levels) { - key += level; - key += '\x1f'; - } - key += identifier.name; - return key; -} - } // namespace RestCatalog::~RestCatalog() { - for (auto& [key, session] : table_sessions_) { - if (session) { - std::ignore = session->Close(); - } - } if (catalog_session_) { std::ignore = catalog_session_->Close(); } } -Status RestCatalog::RememberTableSession( - const TableIdentifier& identifier, - const std::unordered_map& config) { - ICEBERG_ASSIGN_OR_RAISE( - auto session, auth_manager_->TableSession(identifier, config, catalog_session_)); - if (session == catalog_session_) { - return {}; - } - std::shared_ptr replaced; - { - std::lock_guard lock(table_sessions_mutex_); - auto& slot = table_sessions_[TableSessionKey(identifier)]; - replaced = std::exchange(slot, std::move(session)); - } - if (replaced) { - std::ignore = replaced->Close(); - } - return {}; -} - -std::shared_ptr RestCatalog::SessionFor( - const TableIdentifier& identifier) const { - std::lock_guard lock(table_sessions_mutex_); - auto it = table_sessions_.find(TableSessionKey(identifier)); - return it != table_sessions_.end() ? it->second : catalog_session_; -} - Result> RestCatalog::Make( const RestCatalogProperties& config) { ICEBERG_ASSIGN_OR_RAISE(auto uri, config.Uri()); @@ -398,7 +356,9 @@ Result RestCatalog::CreateTableInternal( ICEBERG_ASSIGN_OR_RAISE(auto json, FromJsonString(response.body())); ICEBERG_ASSIGN_OR_RAISE(auto load_result, LoadTableResultFromJson(json)); - ICEBERG_RETURN_UNEXPECTED(RememberTableSession(identifier, load_result.config)); + // TODO: Wire table-specific auth config from LoadTableResponse once C++ has + // table-scoped REST operations or a table-scoped catalog wrapper. The current + // Table implementation routes refresh and commit back through Catalog. return load_result; } @@ -435,7 +395,7 @@ Result> RestCatalog::UpdateTable( ICEBERG_ASSIGN_OR_RAISE( const auto response, client_->Post(path, json_request, /*headers=*/{}, *TableErrorHandler::Instance(), - *SessionFor(identifier))); + *catalog_session_)); ICEBERG_ASSIGN_OR_RAISE(auto json, FromJsonString(response.body())); ICEBERG_ASSIGN_OR_RAISE(auto commit_response, CommitTableResponseFromJson(json)); @@ -512,12 +472,10 @@ Result RestCatalog::LoadTableInternal( params["snapshots"] = "all"; } - // Refresh uses the cached per-table session; the initial load falls back to - // the catalog session (no table session is cached yet). ICEBERG_ASSIGN_OR_RAISE( const auto response, client_->Get(path, params, /*headers=*/{}, *TableErrorHandler::Instance(), - *SessionFor(identifier))); + *catalog_session_)); return response.body(); } @@ -525,8 +483,8 @@ Result> RestCatalog::LoadTable(const TableIdentifier& ide ICEBERG_ASSIGN_OR_RAISE(const auto body, LoadTableInternal(identifier)); ICEBERG_ASSIGN_OR_RAISE(auto json, FromJsonString(body)); ICEBERG_ASSIGN_OR_RAISE(auto load_result, LoadTableResultFromJson(json)); - ICEBERG_RETURN_UNEXPECTED(RememberTableSession(identifier, load_result.config)); - /// FIXME: support per-table FileIO creation + // TODO: Support table-specific auth config and per-table FileIO from the REST + // load response when table-scoped REST operations are introduced. return Table::Make(identifier, std::move(load_result.metadata), std::move(load_result.metadata_location), file_io_, shared_from_this()); @@ -550,7 +508,8 @@ Result> RestCatalog::RegisterTable( ICEBERG_ASSIGN_OR_RAISE(auto json, FromJsonString(response.body())); ICEBERG_ASSIGN_OR_RAISE(auto load_result, LoadTableResultFromJson(json)); - ICEBERG_RETURN_UNEXPECTED(RememberTableSession(identifier, load_result.config)); + // TODO: Support table-specific auth config and per-table FileIO from the REST + // register response when table-scoped REST operations are introduced. return Table::Make(identifier, std::move(load_result.metadata), std::move(load_result.metadata_location), file_io_, shared_from_this()); diff --git a/src/iceberg/catalog/rest/rest_catalog.h b/src/iceberg/catalog/rest/rest_catalog.h index f876c818d..4fd4db5b8 100644 --- a/src/iceberg/catalog/rest/rest_catalog.h +++ b/src/iceberg/catalog/rest/rest_catalog.h @@ -20,9 +20,7 @@ #pragma once #include -#include #include -#include #include #include "iceberg/catalog.h" @@ -111,14 +109,6 @@ class ICEBERG_REST_EXPORT RestCatalog : public Catalog, Result LoadTableInternal(const TableIdentifier& identifier) const; - /// \brief Derives and caches a per-table auth session from a table response - /// `config`, per the REST spec (`config.token` must be used for table requests). - Status RememberTableSession(const TableIdentifier& identifier, - const std::unordered_map& config); - - /// \brief Returns the cached per-table session, or the catalog session. - std::shared_ptr SessionFor(const TableIdentifier& identifier) const; - Result CreateTableInternal( const TableIdentifier& identifier, const std::shared_ptr& schema, const std::shared_ptr& spec, const std::shared_ptr& order, @@ -134,8 +124,6 @@ class ICEBERG_REST_EXPORT RestCatalog : public Catalog, std::unique_ptr auth_manager_; std::shared_ptr catalog_session_; SnapshotMode snapshot_mode_; - mutable std::mutex table_sessions_mutex_; - std::unordered_map> table_sessions_; }; } // namespace iceberg::rest diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 88a373c88..0e8f03150 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -286,7 +286,6 @@ if(ICEBERG_BUILD_REST) SOURCES auth_manager_test.cc endpoint_test.cc - rest_catalog_session_test.cc rest_file_io_test.cc rest_json_serde_test.cc rest_util_test.cc) diff --git a/src/iceberg/test/auth_manager_test.cc b/src/iceberg/test/auth_manager_test.cc index 184362904..22ecef864 100644 --- a/src/iceberg/test/auth_manager_test.cc +++ b/src/iceberg/test/auth_manager_test.cc @@ -37,6 +37,7 @@ #include "iceberg/catalog/rest/auth/auth_session.h" #include "iceberg/catalog/rest/auth/oauth2_util.h" #include "iceberg/catalog/rest/auth/token_refresh_scheduler.h" +#include "iceberg/catalog/rest/error_handlers.h" #include "iceberg/catalog/rest/http_client.h" #include "iceberg/catalog/rest/json_serde_internal.h" #include "iceberg/json_serde_internal.h" @@ -89,6 +90,25 @@ TEST_F(AuthManagerTest, LoadNoopAuthManagerInferred) { ASSERT_THAT(manager_result, IsOk()); } +TEST_F(AuthManagerTest, HttpHeadersAreCaseInsensitiveSingleValueMap) { + HttpHeaders headers; + headers.emplace("Authorization", "Bearer first"); + headers.emplace("authorization", "Bearer second"); + + EXPECT_EQ(headers.size(), 1); + EXPECT_EQ(headers.at("AUTHORIZATION"), "Bearer first"); +} + +TEST_F(AuthManagerTest, HttpClientRejectsParamsWhenUrlAlreadyHasQuery) { + auto session = AuthSession::MakeDefault({}); + auto result = + client_.Get("http://127.0.0.1/v1/config?existing=true", {{"warehouse", "prod"}}, + /*headers=*/{}, *rest::DefaultErrorHandler::Instance(), *session); + + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(result, HasErrorMessage("must not contain a query string")); +} + // Verifies that auth type is case-insensitive TEST_F(AuthManagerTest, AuthTypeCaseInsensitive) { for (const auto& auth_type : {"NONE", "None", "NoNe"}) { diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index b46317f73..03d9e1f6c 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -128,7 +128,6 @@ if get_option('rest').enabled() 'sources': files( 'auth_manager_test.cc', 'endpoint_test.cc', - 'rest_catalog_session_test.cc', 'rest_file_io_test.cc', 'rest_json_serde_test.cc', 'rest_util_test.cc', diff --git a/src/iceberg/test/rest_catalog_session_test.cc b/src/iceberg/test/rest_catalog_session_test.cc deleted file mode 100644 index cddfe0d79..000000000 --- a/src/iceberg/test/rest_catalog_session_test.cc +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include - -#ifndef _WIN32 - -# include - -# include -# include -# include -# include -# include -# include -# include -# include - -# include -# include - -# include "iceberg/catalog/rest/auth/auth_manager.h" -# include "iceberg/catalog/rest/auth/auth_managers.h" -# include "iceberg/catalog/rest/auth/auth_properties.h" -# include "iceberg/catalog/rest/auth/auth_session.h" -# include "iceberg/catalog/rest/catalog_properties.h" -# include "iceberg/catalog/rest/rest_catalog.h" -# include "iceberg/file_io.h" -# include "iceberg/file_io_registry.h" -# include "iceberg/table_identifier.h" -# include "iceberg/table_requirement.h" -# include "iceberg/table_update.h" -# include "iceberg/test/matchers.h" - -namespace iceberg::rest { - -namespace { - -constexpr std::string_view kMetadataJson = - R"({"format-version":2,"table-uuid":"test-uuid-1234","location":"s3://bucket/test",)" - R"("last-sequence-number":0,"last-updated-ms":0,"last-column-id":1,)" - R"("schemas":[{"type":"struct","schema-id":1,"fields":[{"id":1,"name":"id","type":"int","required":true}]}],)" - R"("current-schema-id":1,"partition-specs":[{"spec-id":0,"fields":[]}],"default-spec-id":0,)" - R"("last-partition-id":0,"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0})"; - -struct RecordedRequest { - std::string method; - std::string path; - std::string auth_marker; -}; - -class MiniRestServer { - public: - bool Start() { - listen_fd_ = ::socket(AF_INET, SOCK_STREAM, 0); - if (listen_fd_ < 0) return false; - int reuse = 1; - ::setsockopt(listen_fd_, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)); - sockaddr_in addr{}; - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - addr.sin_port = 0; - if (::bind(listen_fd_, reinterpret_cast(&addr), sizeof(addr)) < 0) { - return false; - } - socklen_t len = sizeof(addr); - ::getsockname(listen_fd_, reinterpret_cast(&addr), &len); - port_ = ntohs(addr.sin_port); - if (::listen(listen_fd_, 8) < 0) return false; - server_thread_ = std::thread([this, fd = listen_fd_] { Loop(fd); }); - return true; - } - - void Stop() { - stopping_ = true; - if (listen_fd_ >= 0) { - ::shutdown(listen_fd_, SHUT_RDWR); - ::close(listen_fd_); - listen_fd_ = -1; - } - if (server_thread_.joinable()) server_thread_.join(); - } - - int port() const { return port_; } - - std::vector requests() { - std::lock_guard lock(mutex_); - return requests_; - } - - private: - void Loop(int listen_fd) { - while (!stopping_) { - int fd = ::accept(listen_fd, nullptr, nullptr); - if (fd < 0) break; - HandleConnection(fd); - ::close(fd); - } - } - - void HandleConnection(int fd) { - std::string raw; - std::array buf{}; - size_t header_end = std::string::npos; - while (header_end == std::string::npos) { - ssize_t n = ::read(fd, buf.data(), buf.size()); - if (n <= 0) return; - raw.append(buf.data(), static_cast(n)); - header_end = raw.find("\r\n\r\n"); - } - size_t content_length = 0; - { - std::string lower; - lower.reserve(header_end); - for (size_t i = 0; i < header_end; ++i) { - lower.push_back( - static_cast(std::tolower(static_cast(raw[i])))); - } - auto pos = lower.find("content-length:"); - if (pos != std::string::npos) { - content_length = std::stoul(lower.substr(pos + 15)); - } - } - while (raw.size() < header_end + 4 + content_length) { - ssize_t n = ::read(fd, buf.data(), buf.size()); - if (n <= 0) break; - raw.append(buf.data(), static_cast(n)); - } - - auto line_end = raw.find("\r\n"); - auto request_line = raw.substr(0, line_end); - auto sp1 = request_line.find(' '); - auto sp2 = request_line.find(' ', sp1 + 1); - RecordedRequest req; - req.method = request_line.substr(0, sp1); - req.path = request_line.substr(sp1 + 1, sp2 - sp1 - 1); - req.auth_marker = HeaderValue(raw.substr(0, header_end), "x-test-auth"); - { - std::lock_guard lock(mutex_); - requests_.push_back(req); - } - - Respond(fd, BodyFor(req)); - } - - static std::string HeaderValue(const std::string& headers, std::string_view name) { - std::string lower; - lower.reserve(headers.size()); - for (char c : headers) { - lower.push_back(static_cast(std::tolower(static_cast(c)))); - } - auto pos = lower.find(std::string(name) + ":"); - if (pos == std::string::npos) return ""; - auto value_start = pos + name.size() + 1; - auto value_end = headers.find("\r\n", value_start); - auto value = headers.substr(value_start, value_end - value_start); - auto first = value.find_first_not_of(' '); - return first == std::string::npos ? "" : value.substr(first); - } - - std::string BodyFor(const RecordedRequest& req) { - if (req.path.find("/v1/config") != std::string::npos) { - return R"({"defaults":{},"overrides":{}})"; - } - if (req.method == "GET" && req.path.find("/tables/") != std::string::npos) { - return std::string(R"({"metadata-location":"s3://bucket/meta/v1.json",)") + - R"("metadata":)" + std::string(kMetadataJson) + - R"(,"config":{"token":"tbl-token-1"}})"; - } - if (req.method == "POST" && req.path.find("/tables/") != std::string::npos) { - return std::string(R"({"metadata-location":"s3://bucket/meta/v2.json",)") + - R"("metadata":)" + std::string(kMetadataJson) + "}"; - } - return "{}"; - } - - static void Respond(int fd, const std::string& body) { - std::string response = "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\n"; - response += "Content-Length: " + std::to_string(body.size()) + "\r\n"; - response += "Connection: close\r\n\r\n"; - response += body; - size_t sent = 0; - while (sent < response.size()) { - ssize_t n = ::write(fd, response.data() + sent, response.size() - sent); - if (n <= 0) break; - sent += static_cast(n); - } - } - - int listen_fd_ = -1; - int port_ = 0; - std::atomic stopping_{false}; - std::thread server_thread_; - std::mutex mutex_; - std::vector requests_; -}; - -class RecordingAuthManager : public auth::AuthManager { - public: - Result> InitSession( - HttpClient& /*init_client*/, - const std::unordered_map& /*properties*/) override { - return auth::AuthSession::MakeDefault({{"x-test-auth", "init"}}); - } - - Result> CatalogSession( - HttpClient& /*shared_client*/, - const std::unordered_map& /*properties*/) override { - return auth::AuthSession::MakeDefault({{"x-test-auth", "catalog"}}); - } - - Result> TableSession( - const TableIdentifier& /*table*/, - const std::unordered_map& properties, - std::shared_ptr parent) override { - auto token = properties.find("token"); - if (token == properties.end()) { - return parent; - } - return auth::AuthSession::MakeDefault({{"x-test-auth", "table:" + token->second}}); - } -}; - -class MockFileIO : public FileIO {}; - -} // namespace - -TEST(RestCatalogSessionTest, RefreshAndCommitUseTableSessionFromResponseConfig) { - MiniRestServer server; - ASSERT_TRUE(server.Start()); - - auth::AuthManagers::Register( - "test-session-recorder", - [](std::string_view /*name*/, - const std::unordered_map& /*properties*/) - -> Result> { - return std::make_unique(); - }); - FileIORegistry::Register( - "test.SessionMockFileIO", - [](const std::unordered_map& /*properties*/) - -> Result> { return std::make_unique(); }); - - auto config = RestCatalogProperties::FromMap( - {{"uri", "http://127.0.0.1:" + std::to_string(server.port())}, - {auth::AuthProperties::kAuthType, "test-session-recorder"}, - {"io-impl", "test.SessionMockFileIO"}}); - - { - auto catalog_result = RestCatalog::Make(config); - ASSERT_THAT(catalog_result, IsOk()); - auto catalog = catalog_result.value(); - - TableIdentifier identifier{.ns = Namespace{{"ns1"}}, .name = "tbl1"}; - ASSERT_THAT(catalog->LoadTable(identifier), IsOk()); - ASSERT_THAT(catalog->LoadTable(identifier), IsOk()); - ASSERT_THAT(catalog->UpdateTable(identifier, {}, {}), IsOk()); - } - - server.Stop(); - - auto requests = server.requests(); - ASSERT_EQ(requests.size(), 4); - EXPECT_TRUE(requests[0].path.find("/v1/config") != std::string::npos); - EXPECT_EQ(requests[0].auth_marker, "init"); - EXPECT_EQ(requests[1].method, "GET"); - EXPECT_TRUE(requests[1].path.find("/tables/tbl1") != std::string::npos); - EXPECT_EQ(requests[1].auth_marker, "catalog"); - EXPECT_EQ(requests[2].method, "GET"); - EXPECT_TRUE(requests[2].path.find("/tables/tbl1") != std::string::npos); - EXPECT_EQ(requests[2].auth_marker, "table:tbl-token-1"); - EXPECT_EQ(requests[3].method, "POST"); - EXPECT_TRUE(requests[3].path.find("/tables/tbl1") != std::string::npos); - EXPECT_EQ(requests[3].auth_marker, "table:tbl-token-1"); -} - -} // namespace iceberg::rest - -#else - -TEST(RestCatalogSessionTest, RefreshAndCommitUseTableSessionFromResponseConfig) { - GTEST_SKIP() << "POSIX-socket test server is not available on Windows"; -} - -#endif // _WIN32 diff --git a/src/iceberg/test/sigv4_auth_test.cc b/src/iceberg/test/sigv4_auth_test.cc index 512bb0d95..ac8c36b63 100644 --- a/src/iceberg/test/sigv4_auth_test.cc +++ b/src/iceberg/test/sigv4_auth_test.cc @@ -17,19 +17,25 @@ * under the License. */ -#ifdef ICEBERG_SIGV4 +#if ICEBERG_SIGV4_ENABLED +# include +# include # include +# include # include +# include +# include # include +# include +# include +# include # include # include "iceberg/catalog/rest/auth/auth_managers.h" # include "iceberg/catalog/rest/auth/auth_properties.h" # include "iceberg/catalog/rest/auth/auth_session.h" -# include "iceberg/catalog/rest/auth/aws_sdk.h" -# include "iceberg/catalog/rest/auth/session_context.h" # include "iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h" # include "iceberg/catalog/rest/http_client.h" # include "iceberg/table_identifier.h" @@ -37,22 +43,164 @@ namespace iceberg::rest::auth { +namespace { + +using ::testing::HasSubstr; +using ::testing::StartsWith; + +constexpr std::string_view kAccessKey = "AKIAIOSFODNN7EXAMPLE"; +constexpr std::string_view kSecretKey = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"; +constexpr std::string_view kAmzContentSha256Header = "x-amz-content-sha256"; + +std::string HexEncode(const Aws::Utils::ByteBuffer& buffer) { + static constexpr char kHex[] = "0123456789abcdef"; + std::string hex; + hex.reserve(buffer.GetLength() * 2); + for (size_t i = 0; i < buffer.GetLength(); ++i) { + auto byte = buffer.GetUnderlyingData()[i]; + hex.push_back(kHex[byte >> 4]); + hex.push_back(kHex[byte & 0x0F]); + } + return hex; +} + +Aws::Utils::ByteBuffer BufferFromString(std::string_view value) { + return Aws::Utils::ByteBuffer(reinterpret_cast(value.data()), + value.size()); +} + +Aws::Utils::ByteBuffer HmacSha256(const Aws::Utils::ByteBuffer& key, + std::string_view value) { + Aws::Utils::Crypto::Sha256HMAC hmac; + auto result = hmac.Calculate(BufferFromString(value), key); + EXPECT_TRUE(result.IsSuccess()); + return result.GetResult(); +} + +std::string Sha256Hex(std::string_view value) { + auto digest = + Aws::Utils::HashingUtils::CalculateSHA256(Aws::String(value.data(), value.size())); + return Aws::Utils::HashingUtils::HexEncode(digest).c_str(); +} + +std::string ExtractAuthField(std::string_view authorization, std::string_view prefix) { + auto pos = authorization.find(prefix); + EXPECT_NE(pos, std::string_view::npos) << authorization; + if (pos == std::string_view::npos) return {}; + pos += prefix.size(); + auto end = authorization.find(',', pos); + return std::string(authorization.substr(pos, end - pos)); +} + +std::string HeaderValue(const HttpHeaders& headers, std::string_view name) { + auto it = headers.find(name); + EXPECT_NE(it, headers.end()) << "Missing header: " << name; + if (it == headers.end()) return {}; + return it->second; +} + +std::string PathFromUrl(const std::string& url) { + auto scheme = url.find("://"); + auto path_start = + scheme == std::string::npos ? url.find('/') : url.find('/', scheme + 3); + if (path_start == std::string::npos) return "/"; + auto query_start = url.find('?', path_start); + return url.substr(path_start, query_start - path_start); +} + +std::string CanonicalQueryFromUrl(const std::string& url) { + auto query_start = url.find('?'); + if (query_start == std::string::npos) return {}; + std::vector params; + size_t start = query_start + 1; + while (start <= url.size()) { + auto end = url.find('&', start); + params.emplace_back(url.substr(start, end - start)); + if (end == std::string::npos) break; + start = end + 1; + } + std::sort(params.begin(), params.end()); + + std::string canonical; + for (const auto& param : params) { + if (!canonical.empty()) canonical += '&'; + canonical += param; + } + return canonical; +} + +std::string ExpectedSigV4Signature(const HttpRequest& request, + std::string_view signing_region, + std::string_view signing_name) { + const auto authorization = HeaderValue(request.headers, "authorization"); + const auto x_amz_date = HeaderValue(request.headers, "x-amz-date"); + const auto credential_scope = + ExtractAuthField(authorization, std::string(kAccessKey) + "/"); + const auto signed_headers = ExtractAuthField(authorization, "SignedHeaders="); + const auto date = x_amz_date.substr(0, 8); + + std::string canonical_headers; + size_t start = 0; + while (start <= signed_headers.size()) { + auto end = signed_headers.find(';', start); + auto header_name = signed_headers.substr(start, end - start); + canonical_headers += header_name; + canonical_headers += ':'; + canonical_headers += HeaderValue(request.headers, header_name); + canonical_headers += '\n'; + if (end == std::string::npos) break; + start = end + 1; + } + + auto payload_hash = request.body.empty() + ? std::string(SigV4AuthSession::kEmptyBodySha256) + : Sha256Hex(request.body); + const auto canonical_request = + std::string(ToString(request.method)) + "\n" + PathFromUrl(request.url) + "\n" + + CanonicalQueryFromUrl(request.url) + "\n" + canonical_headers + "\n" + + signed_headers + "\n" + payload_hash; + const auto string_to_sign = "AWS4-HMAC-SHA256\n" + x_amz_date + "\n" + + credential_scope + "\n" + Sha256Hex(canonical_request); + + auto date_key = HmacSha256(BufferFromString("AWS4" + std::string(kSecretKey)), date); + auto region_key = HmacSha256(date_key, signing_region); + auto service_key = HmacSha256(region_key, signing_name); + auto signing_key = HmacSha256(service_key, "aws4_request"); + return HexEncode(HmacSha256(signing_key, string_to_sign)); +} + +} // namespace + class SigV4AuthTest : public ::testing::Test { protected: static void SetUpTestSuite() { ASSERT_THAT(InitializeAwsSdk(), IsOk()); } - HttpClient client_{{}}; + static void TearDownTestSuite() { EXPECT_THAT(FinalizeAwsSdk(), IsOk()); } std::unordered_map MakeSigV4Properties() { return { {AuthProperties::kAuthType, "sigv4"}, {AuthProperties::kSigV4SigningRegion, "us-east-1"}, {AuthProperties::kSigV4SigningName, "execute-api"}, - {AuthProperties::kSigV4AccessKeyId, "AKIAIOSFODNN7EXAMPLE"}, - {AuthProperties::kSigV4SecretAccessKey, - "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"}, + {AuthProperties::kSigV4AccessKeyId, std::string(kAccessKey)}, + {AuthProperties::kSigV4SecretAccessKey, std::string(kSecretKey)}, }; } + + Result> MakeCatalogSession( + const std::unordered_map& properties) { + ICEBERG_ASSIGN_OR_RAISE(auto manager, AuthManagers::Load("test-catalog", properties)); + return manager->CatalogSession(client_, properties); + } + + Result SignRequest( + const std::unordered_map& properties, + HttpRequest request) { + ICEBERG_ASSIGN_OR_RAISE(auto session, MakeCatalogSession(properties)); + return session->Authenticate(std::move(request)); + } + + HttpClient client_{{}}; }; TEST_F(SigV4AuthTest, LifecycleInitializeIsIdempotent) { @@ -62,130 +210,127 @@ TEST_F(SigV4AuthTest, LifecycleInitializeIsIdempotent) { } TEST_F(SigV4AuthTest, LifecycleFinalizeRefusesWhileSessionsAlive) { - auto properties = MakeSigV4Properties(); - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); - - EXPECT_THAT(FinalizeAwsSdk(), IsError(ErrorKind::kInvalid)); - EXPECT_TRUE(IsAwsSdkInitialized()); -} - -TEST_F(SigV4AuthTest, SessionRegistrationBalancesLifecycleCount) { - { - auto delegate = AuthSession::MakeDefault({}); - auto credentials = std::make_shared( - Aws::Auth::AWSCredentials("id", "secret")); - auto session_result = - SigV4AuthSession::Make(delegate, "us-east-1", "execute-api", credentials); - ASSERT_THAT(session_result, IsOk()); - EXPECT_THAT(FinalizeAwsSdk(), IsError(ErrorKind::kInvalid)); - } - - auto properties = MakeSigV4Properties(); - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); + ICEBERG_UNWRAP_OR_FAIL(auto session, MakeCatalogSession(MakeSigV4Properties())); EXPECT_THAT(FinalizeAwsSdk(), IsError(ErrorKind::kInvalid)); EXPECT_TRUE(IsAwsSdkInitialized()); } -TEST_F(SigV4AuthTest, LoadSigV4AuthManager) { - auto properties = MakeSigV4Properties(); - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); -} - -TEST_F(SigV4AuthTest, CatalogSessionProducesSession) { - auto properties = MakeSigV4Properties(); - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); +TEST_F(SigV4AuthTest, AuthenticateWithoutBodyDetailedHeaders) { + ICEBERG_UNWRAP_OR_FAIL(auto signed_request, + SignRequest(MakeSigV4Properties(), + {.method = HttpMethod::kGet, + .url = "http://localhost:8080/path", + .headers = {{"Content-Type", "application/json"}, + {"Content-Encoding", "gzip"}}})); + + EXPECT_EQ(HeaderValue(signed_request.headers, "content-type"), "application/json"); + EXPECT_EQ(HeaderValue(signed_request.headers, "content-encoding"), "gzip"); + EXPECT_EQ(HeaderValue(signed_request.headers, "host"), "localhost:8080"); + EXPECT_EQ(HeaderValue(signed_request.headers, kAmzContentSha256Header), + SigV4AuthSession::kEmptyBodySha256); + EXPECT_NE(signed_request.headers.find("x-amz-date"), signed_request.headers.end()); + + auto authorization = HeaderValue(signed_request.headers, "authorization"); + EXPECT_THAT(authorization, StartsWith("AWS4-HMAC-SHA256 Credential=")); + EXPECT_THAT(authorization, HasSubstr("SignedHeaders=content-encoding;content-type;host;" + "x-amz-content-sha256;x-amz-date")); + EXPECT_EQ(ExtractAuthField(authorization, "Signature="), + ExpectedSigV4Signature(signed_request, "us-east-1", "execute-api")); } -TEST_F(SigV4AuthTest, AuthenticateAddsAuthorizationHeader) { - auto properties = MakeSigV4Properties(); - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); - - HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; - auto auth_result = session_result.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; - - EXPECT_NE(headers.find("authorization"), headers.end()); - EXPECT_TRUE(headers.at("authorization").starts_with("AWS4-HMAC-SHA256")); - EXPECT_NE(headers.find("x-amz-date"), headers.end()); +TEST_F(SigV4AuthTest, AuthenticateWithBodyDetailedHeaders) { + ICEBERG_UNWRAP_OR_FAIL( + auto signed_request, + SignRequest(MakeSigV4Properties(), + {.method = HttpMethod::kPost, + .url = "http://localhost:8080/path", + .headers = {{"Content-Type", "application/x-www-form-urlencoded"}, + {"Content-Encoding", "gzip"}}, + .body = R"({"namespace":["ns"]})"})); + + auto authorization = HeaderValue(signed_request.headers, "authorization"); + EXPECT_THAT(authorization, StartsWith("AWS4-HMAC-SHA256 Credential=")); + EXPECT_THAT(authorization, HasSubstr("SignedHeaders=content-encoding;content-type;host;" + "x-amz-content-sha256;x-amz-date")); + EXPECT_EQ(HeaderValue(signed_request.headers, kAmzContentSha256Header), + "LL0/LbCIE/WzVCHsfA3ASGOx9vJNPeTL0jBro8scPfA="); + EXPECT_EQ(ExtractAuthField(authorization, "Signature="), + ExpectedSigV4Signature(signed_request, "us-east-1", "execute-api")); } -TEST_F(SigV4AuthTest, AuthenticateWithPostBody) { - auto properties = MakeSigV4Properties(); - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); - - HttpRequest request{.method = HttpMethod::kPost, - .url = "https://example.com/v1/namespaces", - .headers = {{"Content-Type", "application/json"}}, - .body = R"({"namespace":["ns1"]})"}; - auto auth_result = session_result.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; - - EXPECT_NE(headers.find("authorization"), headers.end()); - EXPECT_TRUE(headers.at("authorization").starts_with("AWS4-HMAC-SHA256")); +TEST_F(SigV4AuthTest, QueryStringIsIncludedInSignature) { + ICEBERG_UNWRAP_OR_FAIL( + auto signed_request, + SignRequest(MakeSigV4Properties(), + {.method = HttpMethod::kGet, + .url = "http://localhost:8080/path?warehouse=prod&prefix=a"})); + + auto authorization = HeaderValue(signed_request.headers, "authorization"); + EXPECT_THAT(authorization, StartsWith("AWS4-HMAC-SHA256 Credential=")); + EXPECT_EQ(ExtractAuthField(authorization, "Signature="), + ExpectedSigV4Signature(signed_request, "us-east-1", "execute-api")); } -TEST_F(SigV4AuthTest, DelegateAuthorizationHeaderRelocated) { +TEST_F(SigV4AuthTest, DelegateAuthorizationHeaderIsRelocatedAndSigned) { auto properties = MakeSigV4Properties(); properties[AuthProperties::kToken.key()] = "my-oauth-token"; - properties[AuthProperties::kSigV4DelegateAuthType] = "oauth2"; - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); - - HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; - auto auth_result = session_result.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; + ICEBERG_UNWRAP_OR_FAIL( + auto signed_request, + SignRequest(properties, {.method = HttpMethod::kGet, + .url = "http://localhost:8080/path", + .headers = {{"Content-Type", "application/json"}}})); + + EXPECT_EQ(HeaderValue(signed_request.headers, "original-authorization"), + "Bearer my-oauth-token"); + auto authorization = HeaderValue(signed_request.headers, "authorization"); + EXPECT_THAT(authorization, + HasSubstr("SignedHeaders=content-type;host;original-authorization;" + "x-amz-content-sha256;x-amz-date")); + EXPECT_EQ(ExtractAuthField(authorization, "Signature="), + ExpectedSigV4Signature(signed_request, "us-east-1", "execute-api")); +} - EXPECT_NE(headers.find("authorization"), headers.end()); - EXPECT_TRUE(headers.at("authorization").starts_with("AWS4-HMAC-SHA256")); - EXPECT_NE(headers.find("original-authorization"), headers.end()); - EXPECT_EQ(headers.at("original-authorization"), "Bearer my-oauth-token"); +TEST_F(SigV4AuthTest, ConflictingSigV4HeadersRelocated) { + auto delegate = AuthSession::MakeDefault({ + {"x-amz-content-sha256", "fake-sha256"}, + {"X-Amz-Date", "fake-date"}, + {"Content-Type", "application/json"}, + }); + auto credentials = + std::make_shared(Aws::Auth::AWSCredentials( + std::string(kAccessKey).c_str(), std::string(kSecretKey).c_str())); + ICEBERG_UNWRAP_OR_FAIL( + auto session, + SigV4AuthSession::Make(delegate, "us-east-1", "execute-api", credentials)); + + ICEBERG_UNWRAP_OR_FAIL(auto signed_request, + session->Authenticate({.method = HttpMethod::kGet, + .url = "http://localhost:8080/path"})); + + EXPECT_EQ(HeaderValue(signed_request.headers, kAmzContentSha256Header), + SigV4AuthSession::kEmptyBodySha256); + EXPECT_EQ(HeaderValue(signed_request.headers, "Original-x-amz-content-sha256"), + "fake-sha256"); + EXPECT_EQ(HeaderValue(signed_request.headers, "Original-X-Amz-Date"), "fake-date"); + EXPECT_NE(signed_request.headers.find("authorization"), signed_request.headers.end()); } TEST_F(SigV4AuthTest, AuthenticateWithSessionToken) { auto properties = MakeSigV4Properties(); properties[AuthProperties::kSigV4SessionToken] = "FwoGZXIvYXdzEBYaDHqa0"; - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); + ICEBERG_UNWRAP_OR_FAIL( + auto signed_request, + SignRequest(properties, + {.method = HttpMethod::kGet, .url = "https://example.com/v1/config"})); - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); - - HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; - auto auth_result = session_result.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; - - EXPECT_NE(headers.find("authorization"), headers.end()); - EXPECT_NE(headers.find("x-amz-security-token"), headers.end()); - EXPECT_EQ(headers.at("x-amz-security-token"), "FwoGZXIvYXdzEBYaDHqa0"); + EXPECT_EQ(HeaderValue(signed_request.headers, "x-amz-security-token"), + "FwoGZXIvYXdzEBYaDHqa0"); + EXPECT_THAT(HeaderValue(signed_request.headers, "authorization"), + HasSubstr("SignedHeaders=host;x-amz-content-sha256;x-amz-date;" + "x-amz-security-token")); } TEST_F(SigV4AuthTest, CustomSigningNameAndRegion) { @@ -193,38 +338,28 @@ TEST_F(SigV4AuthTest, CustomSigningNameAndRegion) { properties[AuthProperties::kSigV4SigningRegion] = "eu-west-1"; properties[AuthProperties::kSigV4SigningName] = "custom-service"; - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); - - HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; - auto auth_result = session_result.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; + ICEBERG_UNWRAP_OR_FAIL( + auto signed_request, + SignRequest(properties, + {.method = HttpMethod::kGet, .url = "https://example.com/v1/config"})); - auto auth_it = headers.find("authorization"); - ASSERT_NE(auth_it, headers.end()); - EXPECT_TRUE(auth_it->second.find("eu-west-1") != std::string::npos); - EXPECT_TRUE(auth_it->second.find("custom-service") != std::string::npos); + auto authorization = HeaderValue(signed_request.headers, "authorization"); + EXPECT_THAT(authorization, HasSubstr("eu-west-1")); + EXPECT_THAT(authorization, HasSubstr("custom-service")); } TEST_F(SigV4AuthTest, LegacySigV4EnabledFlagSelectsSigV4) { auto properties = MakeSigV4Properties(); properties.erase(AuthProperties::kAuthType); properties[AuthProperties::kSigV4Enabled] = "true"; - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); + ICEBERG_UNWRAP_OR_FAIL( + auto signed_request, + SignRequest(properties, + {.method = HttpMethod::kGet, .url = "https://example.com/v1/config"})); - HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; - auto auth_result = session_result.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - EXPECT_TRUE( - auth_result.value().headers.at("authorization").starts_with("AWS4-HMAC-SHA256")); + EXPECT_THAT(HeaderValue(signed_request.headers, "authorization"), + StartsWith("AWS4-HMAC-SHA256")); } TEST_F(SigV4AuthTest, AuthTypeCaseInsensitive) { @@ -236,226 +371,66 @@ TEST_F(SigV4AuthTest, AuthTypeCaseInsensitive) { } } -TEST_F(SigV4AuthTest, DelegateDefaultsToOAuth2NoAuth) { - auto properties = MakeSigV4Properties(); - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); +TEST_F(SigV4AuthTest, MissingStaticCredentialsAreRejected) { + for (auto missing_property : + {AuthProperties::kSigV4AccessKeyId, AuthProperties::kSigV4SecretAccessKey}) { + auto properties = MakeSigV4Properties(); + properties.erase(missing_property); + ICEBERG_UNWRAP_OR_FAIL(auto manager, AuthManagers::Load("test-catalog", properties)); - HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; - auto auth_result = session_result.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; + auto session_result = manager->CatalogSession(client_, properties); + EXPECT_THAT(session_result, IsError(ErrorKind::kInvalidArgument)) + << "Missing property: " << missing_property; + EXPECT_THAT(session_result, HasErrorMessage("must be set together")); + } - EXPECT_EQ(headers.find("original-authorization"), headers.end()); -} + auto session_token_only = MakeSigV4Properties(); + session_token_only.erase(AuthProperties::kSigV4AccessKeyId); + session_token_only.erase(AuthProperties::kSigV4SecretAccessKey); + session_token_only[AuthProperties::kSigV4SessionToken] = "token"; + ICEBERG_UNWRAP_OR_FAIL(auto manager, + AuthManagers::Load("test-catalog", session_token_only)); -TEST_F(SigV4AuthTest, TableSessionInheritsProperties) { - auto properties = MakeSigV4Properties(); - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto catalog_session = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(catalog_session, IsOk()); - - iceberg::TableIdentifier table_id{.ns = iceberg::Namespace{{"ns1"}}, .name = "table1"}; - std::unordered_map table_props; - auto table_session = manager_result.value()->TableSession(table_id, table_props, - catalog_session.value()); - ASSERT_THAT(table_session, IsOk()); - - HttpRequest request{.method = HttpMethod::kGet, - .url = "https://example.com/v1/ns1/tables/table1"}; - auto auth_result = table_session.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - EXPECT_NE(auth_result.value().headers.find("authorization"), - auth_result.value().headers.end()); + auto session_result = manager->CatalogSession(client_, session_token_only); + EXPECT_THAT(session_result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(session_result, HasErrorMessage("requires")); } -TEST_F(SigV4AuthTest, AuthenticateWithoutBodyDetailedHeaders) { +TEST_F(SigV4AuthTest, DerivedCredentialOverridesMustBeComplete) { auto properties = MakeSigV4Properties(); - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); + ICEBERG_UNWRAP_OR_FAIL(auto manager, AuthManagers::Load("test-catalog", properties)); + ICEBERG_UNWRAP_OR_FAIL(auto catalog_session, + manager->CatalogSession(client_, properties)); - HttpRequest request{.method = HttpMethod::kGet, - .url = "http://localhost:8080/path", - .headers = {{"Content-Type", "application/json"}}}; - auto auth_result = session_result.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; + auto context_result = manager->ContextualSession( + {{AuthProperties::kSigV4SecretAccessKey, "context-secret"}}, catalog_session); + EXPECT_THAT(context_result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(context_result, HasErrorMessage("must be set together")); - // Original header preserved - EXPECT_EQ(headers.at("content-type"), "application/json"); - - // Host header generated by the signer - EXPECT_NE(headers.find("host"), headers.end()); - - // SigV4 headers - auto auth_it = headers.find("authorization"); - ASSERT_NE(auth_it, headers.end()); - EXPECT_TRUE(auth_it->second.starts_with("AWS4-HMAC-SHA256 Credential=")); - - EXPECT_TRUE(auth_it->second.find("content-type") != std::string::npos); - EXPECT_TRUE(auth_it->second.find("host") != std::string::npos); - EXPECT_TRUE(auth_it->second.find("x-amz-content-sha256") != std::string::npos); - EXPECT_TRUE(auth_it->second.find("x-amz-date") != std::string::npos); - - // Empty body SHA256 hash - EXPECT_EQ(headers.at("x-amz-content-sha256"), SigV4AuthSession::kEmptyBodySha256); - - // X-Amz-Date present - EXPECT_NE(headers.find("x-amz-date"), headers.end()); + iceberg::TableIdentifier table_id{.ns = iceberg::Namespace{{"db1"}}, .name = "table1"}; + auto table_result = manager->TableSession( + table_id, {{AuthProperties::kSigV4SessionToken, "table-token"}}, catalog_session); + EXPECT_THAT(table_result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(table_result, HasErrorMessage("requires")); } -TEST_F(SigV4AuthTest, AuthenticateWithBodyDetailedHeaders) { +TEST_F(SigV4AuthTest, CreateCustomDelegateNone) { auto properties = MakeSigV4Properties(); - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); - - HttpRequest request{.method = HttpMethod::kPost, - .url = "http://localhost:8080/path", - .headers = {{"Content-Type", "application/json"}}, - .body = R"({"namespace":["ns1"]})"}; - auto auth_result = session_result.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; - - // SigV4 Authorization header - auto auth_it = headers.find("authorization"); - ASSERT_NE(auth_it, headers.end()); - EXPECT_TRUE(auth_it->second.starts_with("AWS4-HMAC-SHA256 Credential=")); - - // Java parity: the x-amz-content-sha256 header is Base64(SHA256(body)) for - // non-empty bodies; the canonical request payload hash stays lowercase hex. - auto sha_it = headers.find("x-amz-content-sha256"); - ASSERT_NE(sha_it, headers.end()); - EXPECT_NE(sha_it->second, SigV4AuthSession::kEmptyBodySha256); - - EXPECT_EQ(sha_it->second.size(), 44) - << "Expected Base64 SHA256, got: " << sha_it->second; -} + properties[AuthProperties::kSigV4DelegateAuthType] = "none"; -TEST_F(SigV4AuthTest, ConflictingAuthorizationHeaderIncludedInSignedHeaders) { - auto properties = MakeSigV4Properties(); - properties[AuthProperties::kToken.key()] = "my-oauth-token"; - properties[AuthProperties::kSigV4DelegateAuthType] = "oauth2"; - - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); - - HttpRequest request{.method = HttpMethod::kGet, - .url = "http://localhost:8080/path", - .headers = {{"Content-Type", "application/json"}}}; - auto auth_result = session_result.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; - - // SigV4 Authorization header - auto auth_it = headers.find("authorization"); - ASSERT_NE(auth_it, headers.end()); - EXPECT_TRUE(auth_it->second.starts_with("AWS4-HMAC-SHA256 Credential=")); - - // Relocated delegate header should be in SignedHeaders - EXPECT_TRUE(auth_it->second.find("original-authorization") != std::string::npos) - << "SignedHeaders should include 'original-authorization', got: " - << auth_it->second; - - // Relocated Authorization present - auto orig_it = headers.find("original-authorization"); - ASSERT_NE(orig_it, headers.end()); - EXPECT_EQ(orig_it->second, "Bearer my-oauth-token"); -} + ICEBERG_UNWRAP_OR_FAIL( + auto signed_request, + SignRequest(properties, + {.method = HttpMethod::kGet, .url = "https://example.com/v1/config"})); -TEST_F(SigV4AuthTest, ConflictingSigV4HeadersRelocated) { - auto delegate = AuthSession::MakeDefault({ - {"x-amz-content-sha256", "fake-sha256"}, - {"X-Amz-Date", "fake-date"}, - {"Content-Type", "application/json"}, - }); - auto credentials = - std::make_shared(Aws::Auth::AWSCredentials( - "AKIAIOSFODNN7EXAMPLE", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")); - auto session_result = - SigV4AuthSession::Make(delegate, "us-east-1", "execute-api", credentials); - ASSERT_THAT(session_result, IsOk()); - auto session = session_result.value(); - - HttpRequest request{.method = HttpMethod::kGet, .url = "http://localhost:8080/path"}; - auto auth_result = session->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; - - // The real x-amz-content-sha256 should be the empty body hash (signer overwrites fake) - EXPECT_EQ(headers.at("x-amz-content-sha256"), SigV4AuthSession::kEmptyBodySha256); - - // The fake values should be relocated since the signer produced different values - auto orig_sha_it = headers.find("Original-x-amz-content-sha256"); - ASSERT_NE(orig_sha_it, headers.end()); - EXPECT_EQ(orig_sha_it->second, "fake-sha256"); - - auto orig_date_it = headers.find("Original-X-Amz-Date"); - ASSERT_NE(orig_date_it, headers.end()); - EXPECT_EQ(orig_date_it->second, "fake-date"); - - // SigV4 Authorization present - EXPECT_NE(headers.find("authorization"), headers.end()); -} - -TEST_F(SigV4AuthTest, SessionCloseDelegatesToInner) { - auto delegate = AuthSession::MakeDefault({}); - auto credentials = std::make_shared( - Aws::Auth::AWSCredentials("id", "secret")); - auto session_result = - SigV4AuthSession::Make(delegate, "us-east-1", "execute-api", credentials); - ASSERT_THAT(session_result, IsOk()); - - EXPECT_THAT(session_result.value()->Close(), IsOk()); -} - -TEST_F(SigV4AuthTest, CreateCustomDelegateNone) { - std::unordered_map properties = { - {AuthProperties::kAuthType, "sigv4"}, - {AuthProperties::kSigV4DelegateAuthType, "none"}, - {AuthProperties::kSigV4SigningRegion, "us-west-2"}, - {AuthProperties::kSigV4AccessKeyId, "id"}, - {AuthProperties::kSigV4SecretAccessKey, "secret"}, - }; - - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto session_result = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(session_result, IsOk()); - - // Authenticate should work with noop delegate - HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; - auto auth_result = session_result.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; - - EXPECT_NE(headers.find("authorization"), headers.end()); - EXPECT_EQ(headers.find("original-authorization"), headers.end()); + EXPECT_NE(signed_request.headers.find("authorization"), signed_request.headers.end()); + EXPECT_EQ(signed_request.headers.find("original-authorization"), + signed_request.headers.end()); } TEST_F(SigV4AuthTest, CreateInvalidCustomDelegateSigV4Circular) { - std::unordered_map properties = { - {AuthProperties::kAuthType, "sigv4"}, - {AuthProperties::kSigV4DelegateAuthType, "sigv4"}, - {AuthProperties::kSigV4SigningRegion, "us-east-1"}, - {AuthProperties::kSigV4AccessKeyId, "id"}, - {AuthProperties::kSigV4SecretAccessKey, "secret"}, - }; + auto properties = MakeSigV4Properties(); + properties[AuthProperties::kSigV4DelegateAuthType] = "sigv4"; auto result = AuthManagers::Load("test-catalog", properties); EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); @@ -466,114 +441,72 @@ TEST_F(SigV4AuthTest, CreateInvalidCustomDelegateSigV4Circular) { TEST_F(SigV4AuthTest, ContextualSessionOverridesProperties) { auto properties = MakeSigV4Properties(); properties[AuthProperties::kSigV4SigningRegion] = "us-west-2"; - - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto catalog_session = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(catalog_session, IsOk()); - - SessionContext context{ - .properties = {{AuthProperties::kSigV4SigningRegion, "eu-west-1"}}, - .credentials = {{AuthProperties::kSigV4AccessKeyId, "id2"}, - {AuthProperties::kSigV4SecretAccessKey, "secret2"}}, - }; - - auto ctx_session = - manager_result.value()->ContextualSession(context, catalog_session.value()); - ASSERT_THAT(ctx_session, IsOk()); - - HttpRequest request{.method = HttpMethod::kGet, .url = "https://example.com/v1/config"}; - auto auth_result = ctx_session.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; - - auto auth_it = headers.find("authorization"); - ASSERT_NE(auth_it, headers.end()); - - EXPECT_TRUE(auth_it->second.find("eu-west-1") != std::string::npos) - << "Expected eu-west-1 in Authorization, got: " << auth_it->second; + ICEBERG_UNWRAP_OR_FAIL(auto manager, AuthManagers::Load("test-catalog", properties)); + ICEBERG_UNWRAP_OR_FAIL(auto catalog_session, + manager->CatalogSession(client_, properties)); + + ICEBERG_UNWRAP_OR_FAIL( + auto ctx_session, + manager->ContextualSession({{AuthProperties::kSigV4AccessKeyId, "id2"}, + {AuthProperties::kSigV4SecretAccessKey, "secret2"}, + {AuthProperties::kSigV4SigningRegion, "eu-west-1"}}, + catalog_session)); + + ICEBERG_UNWRAP_OR_FAIL( + auto signed_request, + ctx_session->Authenticate( + {.method = HttpMethod::kGet, .url = "https://example.com/v1/config"})); + EXPECT_THAT(HeaderValue(signed_request.headers, "authorization"), + HasSubstr("eu-west-1")); } TEST_F(SigV4AuthTest, TableSessionOverridesProperties) { auto properties = MakeSigV4Properties(); properties[AuthProperties::kSigV4SigningRegion] = "us-west-2"; - - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto catalog_session = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(catalog_session, IsOk()); - - // Table properties override region and credentials - std::unordered_map table_props = { - {AuthProperties::kSigV4AccessKeyId, "table-key-id"}, - {AuthProperties::kSigV4SecretAccessKey, "table-secret"}, - {AuthProperties::kSigV4SigningRegion, "ap-southeast-1"}, - }; + ICEBERG_UNWRAP_OR_FAIL(auto manager, AuthManagers::Load("test-catalog", properties)); + ICEBERG_UNWRAP_OR_FAIL(auto catalog_session, + manager->CatalogSession(client_, properties)); iceberg::TableIdentifier table_id{.ns = iceberg::Namespace{{"db1"}}, .name = "table1"}; - auto table_session = manager_result.value()->TableSession(table_id, table_props, - catalog_session.value()); - ASSERT_THAT(table_session, IsOk()); - - HttpRequest request{.method = HttpMethod::kGet, - .url = "https://example.com/v1/db1/tables/table1"}; - auto auth_result = table_session.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - const auto& headers = auth_result.value().headers; - - auto auth_it = headers.find("authorization"); - ASSERT_NE(auth_it, headers.end()); - - EXPECT_TRUE(auth_it->second.find("ap-southeast-1") != std::string::npos) - << "Expected ap-southeast-1 in Authorization, got: " << auth_it->second; + ICEBERG_UNWRAP_OR_FAIL( + auto table_session, + manager->TableSession(table_id, + {{AuthProperties::kSigV4AccessKeyId, "table-key-id"}, + {AuthProperties::kSigV4SecretAccessKey, "table-secret"}, + {AuthProperties::kSigV4SigningRegion, "ap-southeast-1"}}, + catalog_session)); + + ICEBERG_UNWRAP_OR_FAIL( + auto signed_request, + table_session->Authenticate({.method = HttpMethod::kGet, + .url = "https://example.com/v1/db1/tables/table1"})); + EXPECT_THAT(HeaderValue(signed_request.headers, "authorization"), + HasSubstr("ap-southeast-1")); } -// Matches Java RESTSigV4AuthManager: a table session derived from a contextual -// parent does NOT inherit the contextual overrides; it merges catalog props -// with table props directly. Contextual and table are independent dimensions. TEST_F(SigV4AuthTest, TableSessionIgnoresContextualOverrides) { auto properties = MakeSigV4Properties(); properties[AuthProperties::kSigV4SigningRegion] = "us-west-2"; - - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - auto catalog_session = manager_result.value()->CatalogSession(client_, properties); - ASSERT_THAT(catalog_session, IsOk()); - - auto ctx_session = manager_result.value()->ContextualSession( - SessionContext{.properties = {{AuthProperties::kSigV4SigningRegion, "eu-west-1"}}}, - catalog_session.value()); - ASSERT_THAT(ctx_session, IsOk()); + ICEBERG_UNWRAP_OR_FAIL(auto manager, AuthManagers::Load("test-catalog", properties)); + ICEBERG_UNWRAP_OR_FAIL(auto catalog_session, + manager->CatalogSession(client_, properties)); + ICEBERG_UNWRAP_OR_FAIL( + auto ctx_session, + manager->ContextualSession({{AuthProperties::kSigV4SigningRegion, "eu-west-1"}}, + catalog_session)); iceberg::TableIdentifier table_id{.ns = iceberg::Namespace{{"db1"}}, .name = "table1"}; - auto table_session = manager_result.value()->TableSession(table_id, /*properties=*/{}, - ctx_session.value()); - ASSERT_THAT(table_session, IsOk()); - - HttpRequest request{.method = HttpMethod::kGet, - .url = "https://example.com/v1/db1/tables/table1"}; - auto auth_result = table_session.value()->Authenticate(request); - ASSERT_THAT(auth_result, IsOk()); - - auto auth_it = auth_result.value().headers.find("authorization"); - ASSERT_NE(auth_it, auth_result.value().headers.end()); - EXPECT_TRUE(auth_it->second.find("us-west-2") != std::string::npos) - << "Table session should use the catalog region, not the contextual override, got: " - << auth_it->second; -} - -TEST_F(SigV4AuthTest, ManagerCloseDelegatesToInner) { - auto properties = MakeSigV4Properties(); - auto manager_result = AuthManagers::Load("test-catalog", properties); - ASSERT_THAT(manager_result, IsOk()); - - // Close should succeed without error - EXPECT_THAT(manager_result.value()->Close(), IsOk()); + ICEBERG_UNWRAP_OR_FAIL(auto table_session, + manager->TableSession(table_id, {}, ctx_session)); + + ICEBERG_UNWRAP_OR_FAIL( + auto signed_request, + table_session->Authenticate({.method = HttpMethod::kGet, + .url = "https://example.com/v1/db1/tables/table1"})); + EXPECT_THAT(HeaderValue(signed_request.headers, "authorization"), + HasSubstr("us-west-2")); } } // namespace iceberg::rest::auth -#endif // ICEBERG_SIGV4 +#endif // ICEBERG_SIGV4_ENABLED