From de92b216c714e5513689e11de53812e1b2ce702e Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Wed, 22 May 2024 20:10:45 +0200 Subject: [PATCH 01/11] fix: remote config polling parsing According to the Remote Configuration specification, `DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS` support floating point input and can be set to `0`. --- src/datadog/datadog_agent_config.cpp | 11 ++++++----- src/datadog/datadog_agent_config.h | 2 +- test/test_tracer_config.cpp | 10 +--------- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/src/datadog/datadog_agent_config.cpp b/src/datadog/datadog_agent_config.cpp index aef6074c..6049a823 100644 --- a/src/datadog/datadog_agent_config.cpp +++ b/src/datadog/datadog_agent_config.cpp @@ -21,7 +21,7 @@ Expected load_datadog_agent_env_config() { if (auto raw_rc_poll_interval_value = lookup(environment::DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS)) { - auto res = parse_int(*raw_rc_poll_interval_value, 10); + auto res = parse_double(*raw_rc_poll_interval_value); if (auto error = res.if_error()) { return error->with_prefix( "DatadogAgent: Remote Configuration poll interval error "); @@ -114,12 +114,13 @@ Expected finalize_config( "milliseconds."}; } - if (int rc_poll_interval_seconds = + if (double rc_poll_interval_seconds = value_or(env_config->remote_configuration_poll_interval_seconds, - user_config.remote_configuration_poll_interval_seconds, 5); - rc_poll_interval_seconds > 0) { + user_config.remote_configuration_poll_interval_seconds, 5.0); + rc_poll_interval_seconds >= 0.0) { result.remote_configuration_poll_interval = - std::chrono::seconds(rc_poll_interval_seconds); + std::chrono::duration_cast( + std::chrono::duration(rc_poll_interval_seconds)); } else { return Error{Error::DATADOG_AGENT_INVALID_REMOTE_CONFIG_POLL_INTERVAL, "DatadogAgent: Remote Configuration poll interval must be a " diff --git a/src/datadog/datadog_agent_config.h b/src/datadog/datadog_agent_config.h index 432b11b7..67559aee 100644 --- a/src/datadog/datadog_agent_config.h +++ b/src/datadog/datadog_agent_config.h @@ -61,7 +61,7 @@ struct DatadogAgentConfig { Optional remote_configuration_enabled; // How often, in seconds, to query the Datadog Agent for remote configuration // updates. - Optional remote_configuration_poll_interval_seconds; + Optional remote_configuration_poll_interval_seconds; static Expected parse(StringView); }; diff --git a/test/test_tracer_config.cpp b/test/test_tracer_config.cpp index 42c93bc1..a94129c8 100644 --- a/test/test_tracer_config.cpp +++ b/test/test_tracer_config.cpp @@ -446,14 +446,6 @@ TEST_CASE("TracerConfig::agent") { } SECTION("remote configuration poll interval") { - SECTION("cannot be zero") { - config.agent.remote_configuration_poll_interval_seconds = 0; - auto finalized = finalize_config(config); - REQUIRE(!finalized); - REQUIRE(finalized.error().code == - Error::DATADOG_AGENT_INVALID_REMOTE_CONFIG_POLL_INTERVAL); - } - SECTION("cannot be negative") { config.agent.remote_configuration_poll_interval_seconds = -1337; auto finalized = finalize_config(config); @@ -491,7 +483,7 @@ TEST_CASE("TracerConfig::agent") { "ddog"}; auto finalized = finalize_config(config); REQUIRE(!finalized); - REQUIRE(finalized.error().code == Error::INVALID_INTEGER); + REQUIRE(finalized.error().code == Error::INVALID_DOUBLE); } } } From 4b87ef78580ee92541eaa89f13bc119c3a62afdc Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Tue, 23 Apr 2024 11:37:17 +0200 Subject: [PATCH 02/11] Use a map for trace sampler rules --- src/datadog/span_matcher.h | 16 +++++++++ src/datadog/trace_sampler.cpp | 18 +++++----- src/datadog/trace_sampler.h | 3 +- src/datadog/trace_sampler_config.cpp | 17 ++------- src/datadog/trace_sampler_config.h | 9 +---- test/test_tracer_config.cpp | 53 +++++++++++++++------------- 6 files changed, 59 insertions(+), 57 deletions(-) diff --git a/src/datadog/span_matcher.h b/src/datadog/span_matcher.h index 0de2fbf2..2ff0f4c8 100644 --- a/src/datadog/span_matcher.h +++ b/src/datadog/span_matcher.h @@ -32,7 +32,23 @@ struct SpanMatcher { nlohmann::json to_json() const; static Expected from_json(const nlohmann::json&); + + bool operator==(const SpanMatcher& other) const { + return (service == other.service && name == other.name && + resource == other.resource && tags == other.tags); + } + + // TODO: add tags + struct Hash { + size_t operator()(const SpanMatcher& rule) const { + return std::hash()(rule.service) ^ + (std::hash()(rule.name) << 1) ^ + (std::hash()(rule.resource) << 2); + } + }; }; +static const SpanMatcher catch_all; + } // namespace tracing } // namespace datadog diff --git a/src/datadog/trace_sampler.cpp b/src/datadog/trace_sampler.cpp index ae74865b..0e9d5169 100644 --- a/src/datadog/trace_sampler.cpp +++ b/src/datadog/trace_sampler.cpp @@ -26,20 +26,20 @@ SamplingDecision TraceSampler::decide(const SpanData& span) { decision.origin = SamplingDecision::Origin::LOCAL; // First check sampling rules. - auto found_rule = - std::find_if(rules_.begin(), rules_.end(), - [&](const auto& rule) { return rule.match(span); }); + const auto found_rule = + std::find_if(rules_.cbegin(), rules_.cend(), + [&](const auto& it) { return it.first.match(span); }); // `mutex_` protects `limiter_`, `collector_sample_rates_`, and // `collector_default_sample_rate_`, so let's lock it here. std::lock_guard lock(mutex_); if (found_rule != rules_.end()) { - const auto& rule = *found_rule; + const auto& [rule, rate] = *found_rule; decision.mechanism = int(SamplingMechanism::RULE); decision.limiter_max_per_second = limiter_max_per_second_; - decision.configured_rate = rule.sample_rate; - const std::uint64_t threshold = max_id_from_rate(rule.sample_rate); + decision.configured_rate = rate; + const std::uint64_t threshold = max_id_from_rate(rate); if (knuth_hash(span.trace_id.low) < threshold) { const auto result = limiter_.allow(); if (result.allowed) { @@ -99,8 +99,10 @@ void TraceSampler::handle_collector_response( nlohmann::json TraceSampler::config_json() const { std::vector rules; - for (const auto& rule : rules_) { - rules.push_back(to_json(rule)); + for (const auto& [rule, rate] : rules_) { + nlohmann::json j = rule.to_json(); + j["sampling_rate"] = rate.value(); + rules.push_back(std::move(j)); } return nlohmann::json::object({ diff --git a/src/datadog/trace_sampler.h b/src/datadog/trace_sampler.h index ddaec47d..0efb7a19 100644 --- a/src/datadog/trace_sampler.h +++ b/src/datadog/trace_sampler.h @@ -106,8 +106,7 @@ class TraceSampler { Optional collector_default_sample_rate_; std::unordered_map collector_sample_rates_; - - std::vector rules_; + std::unordered_map rules_; Limiter limiter_; double limiter_max_per_second_; diff --git a/src/datadog/trace_sampler_config.cpp b/src/datadog/trace_sampler_config.cpp index 4a2cc229..af79c8b7 100644 --- a/src/datadog/trace_sampler_config.cpp +++ b/src/datadog/trace_sampler_config.cpp @@ -179,10 +179,8 @@ Expected finalize_config( return error->with_prefix(prefix); } - FinalizedTraceSamplerConfig::Rule finalized; - static_cast(finalized) = rule; - finalized.sample_rate = *maybe_rate; - result.rules.push_back(std::move(finalized)); + SpanMatcher matcher = rule; + result.rules.emplace(matcher, *maybe_rate); } Optional sample_rate; @@ -208,9 +206,7 @@ Expected finalize_config( "Unable to parse overall sample_rate for trace sampling: "); } - FinalizedTraceSamplerConfig::Rule catch_all; - catch_all.sample_rate = *maybe_rate; - result.rules.push_back(std::move(catch_all)); + result.rules.emplace(catch_all, *maybe_rate); } const auto [origin, max_per_second] = @@ -234,12 +230,5 @@ Expected finalize_config( return result; } -nlohmann::json to_json(const FinalizedTraceSamplerConfig::Rule &rule) { - // Get the base class's fields, then add our own. - auto result = static_cast(rule).to_json(); - result["sample_rate"] = double(rule.sample_rate); - return result; -} - } // namespace tracing } // namespace datadog diff --git a/src/datadog/trace_sampler_config.h b/src/datadog/trace_sampler_config.h index bfe93a50..8bbc98bc 100644 --- a/src/datadog/trace_sampler_config.h +++ b/src/datadog/trace_sampler_config.h @@ -41,20 +41,13 @@ class FinalizedTraceSamplerConfig { FinalizedTraceSamplerConfig() = default; public: - struct Rule : public SpanMatcher { - Rate sample_rate; - }; - - std::vector rules; double max_per_second; - std::unordered_map metadata; + std::unordered_map rules; }; Expected finalize_config( const TraceSamplerConfig& config); -nlohmann::json to_json(const FinalizedTraceSamplerConfig::Rule&); - } // namespace tracing } // namespace datadog diff --git a/test/test_tracer_config.cpp b/test/test_tracer_config.cpp index a94129c8..23b1787e 100644 --- a/test/test_tracer_config.cpp +++ b/test/test_tracer_config.cpp @@ -608,7 +608,7 @@ TEST_CASE("TracerConfig::trace_sampler") { REQUIRE(finalized); REQUIRE(finalized->trace_sampler.rules.size() == 1); // and the default sample_rate is 100% - REQUIRE(finalized->trace_sampler.rules.front().sample_rate == 1.0); + REQUIRE(finalized->trace_sampler.rules[catch_all] == 1.0); } SECTION("has to have a valid sample_rate") { @@ -629,22 +629,17 @@ TEST_CASE("TracerConfig::trace_sampler") { rules[1].sample_rate = 0.6; auto finalized = finalize_config(config); REQUIRE(finalized); - REQUIRE(finalized->trace_sampler.rules.size() == 2); - REQUIRE(finalized->trace_sampler.rules[0].sample_rate == 0.5); - REQUIRE(finalized->trace_sampler.rules[1].sample_rate == 0.6); + REQUIRE(finalized->trace_sampler.rules.size() == 1); + REQUIRE(finalized->trace_sampler.rules[catch_all] == 0.5); } SECTION("global sample_rate creates a catch-all rule") { config.trace_sampler.sample_rate = 0.25; auto finalized = finalize_config(config); REQUIRE(finalized); - REQUIRE(finalized->trace_sampler.rules.size() == 1); - const auto& rule = finalized->trace_sampler.rules.front(); - REQUIRE(rule.sample_rate == 0.25); - REQUIRE(rule.service == "*"); - REQUIRE(rule.name == "*"); - REQUIRE(rule.resource == "*"); - REQUIRE(rule.tags.empty()); + REQUIRE(finalized->trace_sampler.rules.count(catch_all)); + const auto& rate = finalized->trace_sampler.rules[catch_all]; + REQUIRE(rate == 0.25); } SECTION("DD_TRACE_SAMPLE_RATE") { @@ -652,8 +647,8 @@ TEST_CASE("TracerConfig::trace_sampler") { const EnvGuard guard{"DD_TRACE_SAMPLE_RATE", "0.5"}; auto finalized = finalize_config(config); REQUIRE(finalized); - REQUIRE(finalized->trace_sampler.rules.size() == 1); - REQUIRE(finalized->trace_sampler.rules.front().sample_rate == 0.5); + REQUIRE(finalized->trace_sampler.rules.count(catch_all)); + REQUIRE(finalized->trace_sampler.rules[catch_all] == 0.5); } SECTION("overrides TraceSamplerConfig::sample_rate") { @@ -661,8 +656,8 @@ TEST_CASE("TracerConfig::trace_sampler") { const EnvGuard guard{"DD_TRACE_SAMPLE_RATE", "0.5"}; auto finalized = finalize_config(config); REQUIRE(finalized); - REQUIRE(finalized->trace_sampler.rules.size() == 1); - REQUIRE(finalized->trace_sampler.rules.front().sample_rate == 0.5); + REQUIRE(finalized->trace_sampler.rules.count(catch_all)); + REQUIRE(finalized->trace_sampler.rules[catch_all] == 0.5); } SECTION("has to have a valid value") { @@ -794,16 +789,24 @@ TEST_CASE("TracerConfig::trace_sampler") { CAPTURE(rules_json); CAPTURE(rules); REQUIRE(rules.size() == 2); - REQUIRE(rules[0].service == "poohbear"); - REQUIRE(rules[0].name == "get.honey"); - REQUIRE(rules[0].sample_rate == 0); - REQUIRE(rules[0].tags.size() == 0); - REQUIRE(rules[1].service == "*"); - REQUIRE(rules[1].name == "*"); - REQUIRE(rules[1].sample_rate == 1); - REQUIRE(rules[1].tags.size() == 1); - REQUIRE(rules[1].tags.at("error") == "*"); - REQUIRE(rules[1].resource == "/admin/*"); + + SpanMatcher matcher; + matcher.service = "poohbear"; + matcher.name = "get.honey"; + + auto found = rules.find(matcher); + REQUIRE(found != rules.cend()); + CHECK(found->second == 0); + + SpanMatcher matcher2; + matcher2.service = "*"; + matcher2.name = "*"; + matcher2.tags.emplace("error", "*"); + matcher2.resource = "/admin/*"; + + found = rules.find(matcher2); + REQUIRE(found != rules.cend()); + CHECK(found->second == 1); } SECTION("must be valid") { From fbf61ef48fa90af9e6063bfa9c7e4d405be65cad Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Tue, 23 Apr 2024 13:27:58 +0200 Subject: [PATCH 03/11] pair rate and decision maker --- src/datadog/sampling_mechanism.h | 7 +++++++ src/datadog/trace_sampler.cpp | 8 +++---- src/datadog/trace_sampler.h | 3 ++- src/datadog/trace_sampler_config.cpp | 6 ++++-- src/datadog/trace_sampler_config.h | 8 ++++++- test/test_tracer_config.cpp | 31 ++++++++++++++++++++-------- 6 files changed, 46 insertions(+), 17 deletions(-) diff --git a/src/datadog/sampling_mechanism.h b/src/datadog/sampling_mechanism.h index 26a9ecef..70e990c4 100644 --- a/src/datadog/sampling_mechanism.h +++ b/src/datadog/sampling_mechanism.h @@ -58,6 +58,13 @@ enum class SamplingMechanism { // Individual span kept by a matching span sampling rule when the enclosing // trace was dropped. SPAN_RULE = 8, + // Reserved for future use. + OTLP_RULE = 9, + // Sampling rule configured by user via remote configuration. + REMOTE_RULE = 10, + // Adaptive sampling rule automatically computed by Datadog backend and sent + // via remote configuration. + REMOTE_ADAPTIVE_RULE = 11, }; } // namespace tracing diff --git a/src/datadog/trace_sampler.cpp b/src/datadog/trace_sampler.cpp index 0e9d5169..773ff691 100644 --- a/src/datadog/trace_sampler.cpp +++ b/src/datadog/trace_sampler.cpp @@ -36,10 +36,10 @@ SamplingDecision TraceSampler::decide(const SpanData& span) { if (found_rule != rules_.end()) { const auto& [rule, rate] = *found_rule; - decision.mechanism = int(SamplingMechanism::RULE); + decision.mechanism = int(rate.mechanism); decision.limiter_max_per_second = limiter_max_per_second_; - decision.configured_rate = rate; - const std::uint64_t threshold = max_id_from_rate(rate); + decision.configured_rate = rate.value; + const std::uint64_t threshold = max_id_from_rate(rate.value); if (knuth_hash(span.trace_id.low) < threshold) { const auto result = limiter_.allow(); if (result.allowed) { @@ -101,7 +101,7 @@ nlohmann::json TraceSampler::config_json() const { std::vector rules; for (const auto& [rule, rate] : rules_) { nlohmann::json j = rule.to_json(); - j["sampling_rate"] = rate.value(); + j["sampling_rate"] = rate.value.value(); rules.push_back(std::move(j)); } diff --git a/src/datadog/trace_sampler.h b/src/datadog/trace_sampler.h index 0efb7a19..063d0809 100644 --- a/src/datadog/trace_sampler.h +++ b/src/datadog/trace_sampler.h @@ -102,11 +102,12 @@ struct SamplingDecision; struct SpanData; class TraceSampler { + private: std::mutex mutex_; Optional collector_default_sample_rate_; std::unordered_map collector_sample_rates_; - std::unordered_map rules_; + std::unordered_map rules_; Limiter limiter_; double limiter_max_per_second_; diff --git a/src/datadog/trace_sampler_config.cpp b/src/datadog/trace_sampler_config.cpp index af79c8b7..2588f5f0 100644 --- a/src/datadog/trace_sampler_config.cpp +++ b/src/datadog/trace_sampler_config.cpp @@ -180,7 +180,8 @@ Expected finalize_config( } SpanMatcher matcher = rule; - result.rules.emplace(matcher, *maybe_rate); + result.rules.emplace( + matcher, TraceSamplerRate{*maybe_rate, SamplingMechanism::RULE}); } Optional sample_rate; @@ -206,7 +207,8 @@ Expected finalize_config( "Unable to parse overall sample_rate for trace sampling: "); } - result.rules.emplace(catch_all, *maybe_rate); + result.rules.emplace( + catch_all, TraceSamplerRate{*maybe_rate, SamplingMechanism::RULE}); } const auto [origin, max_per_second] = diff --git a/src/datadog/trace_sampler_config.h b/src/datadog/trace_sampler_config.h index 8bbc98bc..2e0f65b7 100644 --- a/src/datadog/trace_sampler_config.h +++ b/src/datadog/trace_sampler_config.h @@ -15,11 +15,17 @@ #include "json_fwd.hpp" #include "optional.h" #include "rate.h" +#include "sampling_mechanism.h" #include "span_matcher.h" namespace datadog { namespace tracing { +struct TraceSamplerRate final { + Rate value; + SamplingMechanism mechanism; +}; + struct TraceSamplerConfig { struct Rule : public SpanMatcher { double sample_rate = 1.0; @@ -43,7 +49,7 @@ class FinalizedTraceSamplerConfig { public: double max_per_second; std::unordered_map metadata; - std::unordered_map rules; + std::unordered_map rules; }; Expected finalize_config( diff --git a/test/test_tracer_config.cpp b/test/test_tracer_config.cpp index 23b1787e..ed5a2df1 100644 --- a/test/test_tracer_config.cpp +++ b/test/test_tracer_config.cpp @@ -606,9 +606,11 @@ TEST_CASE("TracerConfig::trace_sampler") { SECTION("yields one sampling rule") { auto finalized = finalize_config(config); REQUIRE(finalized); - REQUIRE(finalized->trace_sampler.rules.size() == 1); + REQUIRE(finalized->trace_sampler.rules.count(catch_all)); // and the default sample_rate is 100% - REQUIRE(finalized->trace_sampler.rules[catch_all] == 1.0); + const auto& rate = finalized->trace_sampler.rules[catch_all]; + CHECK(rate.value == 1.0); + CHECK(rate.mechanism == SamplingMechanism::RULE); } SECTION("has to have a valid sample_rate") { @@ -629,8 +631,11 @@ TEST_CASE("TracerConfig::trace_sampler") { rules[1].sample_rate = 0.6; auto finalized = finalize_config(config); REQUIRE(finalized); - REQUIRE(finalized->trace_sampler.rules.size() == 1); - REQUIRE(finalized->trace_sampler.rules[catch_all] == 0.5); + REQUIRE(finalized->trace_sampler.rules.count(catch_all)); + + const auto& rate = finalized->trace_sampler.rules[catch_all]; + CHECK(rate.value == 0.5); + CHECK(rate.mechanism == SamplingMechanism::RULE); } SECTION("global sample_rate creates a catch-all rule") { @@ -639,7 +644,8 @@ TEST_CASE("TracerConfig::trace_sampler") { REQUIRE(finalized); REQUIRE(finalized->trace_sampler.rules.count(catch_all)); const auto& rate = finalized->trace_sampler.rules[catch_all]; - REQUIRE(rate == 0.25); + CHECK(rate.value == 0.25); + CHECK(rate.mechanism == SamplingMechanism::RULE); } SECTION("DD_TRACE_SAMPLE_RATE") { @@ -648,7 +654,9 @@ TEST_CASE("TracerConfig::trace_sampler") { auto finalized = finalize_config(config); REQUIRE(finalized); REQUIRE(finalized->trace_sampler.rules.count(catch_all)); - REQUIRE(finalized->trace_sampler.rules[catch_all] == 0.5); + const auto& rate = finalized->trace_sampler.rules[catch_all]; + CHECK(rate.value == 0.5); + CHECK(rate.mechanism == SamplingMechanism::RULE); } SECTION("overrides TraceSamplerConfig::sample_rate") { @@ -657,7 +665,9 @@ TEST_CASE("TracerConfig::trace_sampler") { auto finalized = finalize_config(config); REQUIRE(finalized); REQUIRE(finalized->trace_sampler.rules.count(catch_all)); - REQUIRE(finalized->trace_sampler.rules[catch_all] == 0.5); + const auto& rate = finalized->trace_sampler.rules[catch_all]; + CHECK(rate.value == 0.5); + CHECK(rate.mechanism == SamplingMechanism::RULE); } SECTION("has to have a valid value") { @@ -796,7 +806,9 @@ TEST_CASE("TracerConfig::trace_sampler") { auto found = rules.find(matcher); REQUIRE(found != rules.cend()); - CHECK(found->second == 0); + + CHECK(found->second.value == 0); + CHECK(found->second.mechanism == SamplingMechanism::RULE); SpanMatcher matcher2; matcher2.service = "*"; @@ -806,7 +818,8 @@ TEST_CASE("TracerConfig::trace_sampler") { found = rules.find(matcher2); REQUIRE(found != rules.cend()); - CHECK(found->second == 1); + CHECK(found->second.value == 1); + CHECK(found->second.mechanism == SamplingMechanism::RULE); } SECTION("must be valid") { From 0f0027d511798c8c8a2803061a91a5dd9424ebd1 Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Tue, 23 Apr 2024 14:04:05 +0200 Subject: [PATCH 04/11] Fix remote configuration Now, sampling remote configuration do not create a new trace sampler which has the side effect to reset the rate limiter. --- src/datadog/config_manager.cpp | 35 +++++++++++++++------------------- src/datadog/config_manager.h | 4 +++- src/datadog/trace_sampler.cpp | 6 ++++++ src/datadog/trace_sampler.h | 4 ++++ test/test_remote_config.cpp | 19 +++++++++++------- 5 files changed, 40 insertions(+), 28 deletions(-) diff --git a/src/datadog/config_manager.cpp b/src/datadog/config_manager.cpp index d2badc83..200b0bc0 100644 --- a/src/datadog/config_manager.cpp +++ b/src/datadog/config_manager.cpp @@ -12,12 +12,13 @@ ConfigManager::ConfigManager(const FinalizedTracerConfig& config) default_metadata_(config.metadata), trace_sampler_( std::make_shared(config.trace_sampler, clock_)), + rules_(config.trace_sampler.rules), span_defaults_(std::make_shared(config.defaults)), report_traces_(config.report_traces) {} std::shared_ptr ConfigManager::trace_sampler() { std::lock_guard lock(mutex_); - return trace_sampler_.value(); + return trace_sampler_; } std::shared_ptr ConfigManager::span_defaults() { @@ -35,32 +36,27 @@ std::vector ConfigManager::update(const ConfigUpdate& conf) { std::lock_guard lock(mutex_); + auto rules = rules_; + if (!conf.trace_sampling_rate) { - reset_config(ConfigName::TRACE_SAMPLING_RATE, trace_sampler_, metadata); + auto found = default_metadata_.find(ConfigName::TRACE_SAMPLING_RATE); + if (found != default_metadata_.cend()) { + metadata.push_back(found->second); + } } else { ConfigMetadata trace_sampling_metadata( ConfigName::TRACE_SAMPLING_RATE, to_string(*conf.trace_sampling_rate, 1), ConfigMetadata::Origin::REMOTE_CONFIG); - TraceSamplerConfig trace_sampler_cfg; - trace_sampler_cfg.sample_rate = *conf.trace_sampling_rate; - - auto finalized_trace_sampler_cfg = finalize_config(trace_sampler_cfg); - if (auto error = finalized_trace_sampler_cfg.if_error()) { - trace_sampling_metadata.error = *error; - } - - auto trace_sampler = - std::make_shared(*finalized_trace_sampler_cfg, clock_); + auto rate = Rate::from(*conf.trace_sampling_rate); + rules[catch_all] = TraceSamplerRate{*rate, SamplingMechanism::REMOTE_RULE}; - // This reset rate limiting and `TraceSampler` has no `operator==`. - // TODO: Instead of creating another `TraceSampler`, we should - // update the default sampling rate. - trace_sampler_ = std::move(trace_sampler); metadata.emplace_back(std::move(trace_sampling_metadata)); } + trace_sampler_->set_rules(rules); + if (!conf.tags) { reset_config(ConfigName::TAGS, span_defaults_, metadata); } else { @@ -109,10 +105,9 @@ std::vector ConfigManager::reset() { return update({}); } nlohmann::json ConfigManager::config_json() const { std::lock_guard lock(mutex_); - return nlohmann::json{ - {"defaults", to_json(*span_defaults_.value())}, - {"trace_sampler", trace_sampler_.value()->config_json()}, - {"report_traces", report_traces_.value()}}; + return nlohmann::json{{"defaults", to_json(*span_defaults_.value())}, + {"trace_sampler", trace_sampler_->config_json()}, + {"report_traces", report_traces_.value()}}; } } // namespace tracing diff --git a/src/datadog/config_manager.h b/src/datadog/config_manager.h index 88698616..5c0536bc 100644 --- a/src/datadog/config_manager.h +++ b/src/datadog/config_manager.h @@ -54,7 +54,9 @@ class ConfigManager { Clock clock_; std::unordered_map default_metadata_; - DynamicConfig> trace_sampler_; + std::shared_ptr trace_sampler_; + std::unordered_map rules_; + DynamicConfig> span_defaults_; DynamicConfig report_traces_; diff --git a/src/datadog/trace_sampler.cpp b/src/datadog/trace_sampler.cpp index 773ff691..a5b40089 100644 --- a/src/datadog/trace_sampler.cpp +++ b/src/datadog/trace_sampler.cpp @@ -21,6 +21,12 @@ TraceSampler::TraceSampler(const FinalizedTraceSamplerConfig& config, limiter_(clock, config.max_per_second), limiter_max_per_second_(config.max_per_second) {} +void TraceSampler::set_rules( + std::unordered_map + rules) { + rules_ = std::move(rules); +} + SamplingDecision TraceSampler::decide(const SpanData& span) { SamplingDecision decision; decision.origin = SamplingDecision::Origin::LOCAL; diff --git a/src/datadog/trace_sampler.h b/src/datadog/trace_sampler.h index 063d0809..03d19fa5 100644 --- a/src/datadog/trace_sampler.h +++ b/src/datadog/trace_sampler.h @@ -114,6 +114,10 @@ class TraceSampler { public: TraceSampler(const FinalizedTraceSamplerConfig& config, const Clock& clock); + void set_rules( + std::unordered_map + rules); + // Return a sampling decision for the specified root span. SamplingDecision decide(const SpanData&); diff --git a/test/test_remote_config.cpp b/test/test_remote_config.cpp index 2800a1c7..e3bd9d98 100644 --- a/test/test_remote_config.cpp +++ b/test/test_remote_config.cpp @@ -3,6 +3,7 @@ #include "catch.hpp" #include "datadog/json_fwd.hpp" #include "datadog/remote_config.h" +#include "datadog/trace_sampler.h" #include "mocks/loggers.h" #include "test.h" @@ -199,16 +200,18 @@ REMOTE_CONFIG_TEST("response processing") { REQUIRE(!response_json.is_discarded()); - const auto old_trace_sampler = config_manager->trace_sampler(); + const auto old_trace_sampler_config = + config_manager->trace_sampler()->config_json(); const auto old_span_defaults = config_manager->span_defaults(); const auto old_report_traces = config_manager->report_traces(); const auto config_updated = rc.process_response(response_json); REQUIRE(config_updated.size() == 3); - const auto new_trace_sampler = config_manager->trace_sampler(); + const auto new_trace_sampler_config = + config_manager->trace_sampler()->config_json(); const auto new_span_defaults = config_manager->span_defaults(); const auto new_report_traces = config_manager->report_traces(); - CHECK(new_trace_sampler != old_trace_sampler); + CHECK(new_trace_sampler_config != old_trace_sampler_config); CHECK(new_span_defaults != old_span_defaults); CHECK(new_report_traces != old_report_traces); @@ -245,11 +248,12 @@ REMOTE_CONFIG_TEST("response processing") { const auto config_updated = rc.process_response(response_json); REQUIRE(config_updated.size() == 3); - const auto current_trace_sampler = config_manager->trace_sampler(); + const auto current_trace_sampler_config = + config_manager->trace_sampler()->config_json(); const auto current_span_defaults = config_manager->span_defaults(); const auto current_report_traces = config_manager->report_traces(); - CHECK(old_trace_sampler == current_trace_sampler); + CHECK(old_trace_sampler_config == current_trace_sampler_config); CHECK(old_span_defaults == current_span_defaults); CHECK(old_report_traces == current_report_traces); } @@ -279,8 +283,9 @@ REMOTE_CONFIG_TEST("response processing") { const auto config_updated = rc.process_response(response_json); REQUIRE(config_updated.size() == 1); - const auto current_trace_sampler = config_manager->trace_sampler(); - CHECK(old_trace_sampler == current_trace_sampler); + const auto current_trace_sampler_config = + config_manager->trace_sampler()->config_json(); + CHECK(old_trace_sampler_config == current_trace_sampler_config); } } } From 908a852ec62831f303f572d208b8dc57596264a4 Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Tue, 23 Apr 2024 14:52:37 +0200 Subject: [PATCH 05/11] add sampling rules RC support --- src/datadog/config_manager.cpp | 82 ++++++++++++++++++++++++++++++++++ src/datadog/config_update.h | 1 + src/datadog/remote_config.cpp | 11 ++++- src/datadog/trace_sampler.cpp | 1 + 4 files changed, 93 insertions(+), 2 deletions(-) diff --git a/src/datadog/config_manager.cpp b/src/datadog/config_manager.cpp index 200b0bc0..d35955bd 100644 --- a/src/datadog/config_manager.cpp +++ b/src/datadog/config_manager.cpp @@ -6,6 +6,68 @@ namespace datadog { namespace tracing { +namespace { + +using Rules = + std::unordered_map; + +Expected parse_trace_sampling_rules(const nlohmann::json& json_rules) { + Rules parsed_rules; + + std::string type = json_rules.type_name(); + if (type != "array") { + std::string message; + return Error{Error::TRACE_SAMPLING_RULES_WRONG_TYPE, std::move(message)}; + } + + for (const auto& json_rule : json_rules) { + auto matcher = SpanMatcher::from_json(json_rule); + if (auto* error = matcher.if_error()) { + std::string prefix; + return error->with_prefix(prefix); + } + + TraceSamplerRate rate; + if (auto sample_rate = json_rule.find("sample_rate"); + sample_rate != json_rule.end()) { + type = sample_rate->type_name(); + if (type != "number") { + std::string message; + return Error{Error::TRACE_SAMPLING_RULES_SAMPLE_RATE_WRONG_TYPE, + std::move(message)}; + } + + auto maybe_rate = Rate::from(*sample_rate); + if (auto error = maybe_rate.if_error()) { + return *error; + } + + rate.value = *maybe_rate; + } + + if (auto provenance_it = json_rule.find("provenance"); + provenance_it != json_rule.cend()) { + if (!provenance_it->is_string()) { + std::string message; + return Error{Error::TRACE_SAMPLING_RULES_SAMPLE_RATE_WRONG_TYPE, + std::move(message)}; + } + + auto provenance = provenance_it->get(); + if (provenance == "customer") { + rate.mechanism = SamplingMechanism::REMOTE_RULE; + } else if (provenance == "dynamic") { + rate.mechanism = SamplingMechanism::REMOTE_ADAPTIVE_RULE; + } + } + + parsed_rules.emplace(std::move(*matcher), std::move(rate)); + } + + return parsed_rules; +} + +} // namespace ConfigManager::ConfigManager(const FinalizedTracerConfig& config) : clock_(config.clock), @@ -55,6 +117,26 @@ std::vector ConfigManager::update(const ConfigUpdate& conf) { metadata.emplace_back(std::move(trace_sampling_metadata)); } + if (!conf.trace_sampling_rules) { + auto found = default_metadata_.find(ConfigName::TRACE_SAMPLING_RULES); + if (found != default_metadata_.cend()) { + metadata.emplace_back(found->second); + } + } else { + ConfigMetadata trace_sampling_rules_metadata( + ConfigName::TRACE_SAMPLING_RULES, + conf.trace_sampling_rules->get(), + ConfigMetadata::Origin::REMOTE_CONFIG); + + auto maybe_rules = parse_trace_sampling_rules(*conf.trace_sampling_rules); + if (auto error = maybe_rules.if_error()) { + trace_sampling_rules_metadata.error = std::move(*error); + } else { + rules.merge(*maybe_rules); + metadata.emplace_back(std::move(trace_sampling_rules_metadata)); + } + } + trace_sampler_->set_rules(rules); if (!conf.tags) { diff --git a/src/datadog/config_update.h b/src/datadog/config_update.h index cd7e07f4..d16fe92b 100644 --- a/src/datadog/config_update.h +++ b/src/datadog/config_update.h @@ -17,6 +17,7 @@ struct ConfigUpdate { Optional report_traces; Optional trace_sampling_rate; Optional> tags; + const nlohmann::json* trace_sampling_rules = nullptr; }; } // namespace tracing diff --git a/src/datadog/remote_config.cpp b/src/datadog/remote_config.cpp index a7be6d82..a2a4489f 100644 --- a/src/datadog/remote_config.cpp +++ b/src/datadog/remote_config.cpp @@ -30,7 +30,8 @@ namespace { enum CapabilitiesFlag : uint64_t { APM_TRACING_SAMPLE_RATE = 1 << 12, APM_TRACING_TAGS = 1 << 15, - APM_TRACING_ENABLED = 1 << 19 + APM_TRACING_ENABLED = 1 << 19, + APM_TRACING_SAMPLE_RULES = 1 << 29, }; constexpr std::array capabilities_byte_array( @@ -46,7 +47,7 @@ constexpr std::array capabilities_byte_array( constexpr std::array k_apm_capabilities = capabilities_byte_array(APM_TRACING_SAMPLE_RATE | APM_TRACING_TAGS | - APM_TRACING_ENABLED); + APM_TRACING_ENABLED | APM_TRACING_SAMPLE_RULES); constexpr StringView k_apm_product = "APM_TRACING"; constexpr StringView k_apm_product_path_substring = "/APM_TRACING/"; @@ -69,6 +70,12 @@ ConfigUpdate parse_dynamic_config(const nlohmann::json& j) { config_update.report_traces = tracing_enabled_it->get(); } + if (auto tracing_sampling_rules_it = j.find("tracing_sampling_rules"); + tracing_sampling_rules_it != j.cend() && + tracing_sampling_rules_it->is_array()) { + config_update.trace_sampling_rules = &(*tracing_sampling_rules_it); + } + return config_update; } diff --git a/src/datadog/trace_sampler.cpp b/src/datadog/trace_sampler.cpp index a5b40089..356381a5 100644 --- a/src/datadog/trace_sampler.cpp +++ b/src/datadog/trace_sampler.cpp @@ -24,6 +24,7 @@ TraceSampler::TraceSampler(const FinalizedTraceSamplerConfig& config, void TraceSampler::set_rules( std::unordered_map rules) { + std::lock_guard lock(mutex_); rules_ = std::move(rules); } From 9b2aadb0c99691ad25db6a9c7adfeb31ccfd8aa4 Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Tue, 23 Apr 2024 15:27:13 +0200 Subject: [PATCH 06/11] fix: reported telemetry sampling rules --- src/datadog/config_manager.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/datadog/config_manager.cpp b/src/datadog/config_manager.cpp index d35955bd..827bbd82 100644 --- a/src/datadog/config_manager.cpp +++ b/src/datadog/config_manager.cpp @@ -124,8 +124,7 @@ std::vector ConfigManager::update(const ConfigUpdate& conf) { } } else { ConfigMetadata trace_sampling_rules_metadata( - ConfigName::TRACE_SAMPLING_RULES, - conf.trace_sampling_rules->get(), + ConfigName::TRACE_SAMPLING_RULES, conf.trace_sampling_rules->dump(), ConfigMetadata::Origin::REMOTE_CONFIG); auto maybe_rules = parse_trace_sampling_rules(*conf.trace_sampling_rules); From 9545d64206d236f121ae311856d7398a146e1c82 Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Wed, 22 May 2024 16:28:37 +0200 Subject: [PATCH 07/11] code review --- src/datadog/config_manager.cpp | 6 ++++-- src/datadog/trace_sampler_config.cpp | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/datadog/config_manager.cpp b/src/datadog/config_manager.cpp index 827bbd82..f163f34c 100644 --- a/src/datadog/config_manager.cpp +++ b/src/datadog/config_manager.cpp @@ -98,7 +98,7 @@ std::vector ConfigManager::update(const ConfigUpdate& conf) { std::lock_guard lock(mutex_); - auto rules = rules_; + decltype(rules_) rules; if (!conf.trace_sampling_rate) { auto found = default_metadata_.find(ConfigName::TRACE_SAMPLING_RATE); @@ -132,10 +132,12 @@ std::vector ConfigManager::update(const ConfigUpdate& conf) { trace_sampling_rules_metadata.error = std::move(*error); } else { rules.merge(*maybe_rules); - metadata.emplace_back(std::move(trace_sampling_rules_metadata)); } + + metadata.emplace_back(std::move(trace_sampling_rules_metadata)); } + rules.merge(rules_); trace_sampler_->set_rules(rules); if (!conf.tags) { diff --git a/src/datadog/trace_sampler_config.cpp b/src/datadog/trace_sampler_config.cpp index 2588f5f0..bc8c638c 100644 --- a/src/datadog/trace_sampler_config.cpp +++ b/src/datadog/trace_sampler_config.cpp @@ -134,7 +134,9 @@ Expected load_trace_sampler_env_config() { std::string to_string(const std::vector &rules) { nlohmann::json res; for (const auto &r : rules) { - res.emplace_back(r.to_json()); + auto j = r.to_json(); + j["sample_rate"] = r.sample_rate; + res.emplace_back(std::move(j)); } return res.dump(); From be97f1986e25e4fde626dde64fa5d9d4c29b139f Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Wed, 22 May 2024 19:45:31 +0200 Subject: [PATCH 08/11] fix: report telemetry rps and sample rate for span --- src/datadog/span_sampler_config.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/datadog/span_sampler_config.cpp b/src/datadog/span_sampler_config.cpp index bde23eb8..38a33c11 100644 --- a/src/datadog/span_sampler_config.cpp +++ b/src/datadog/span_sampler_config.cpp @@ -16,7 +16,12 @@ namespace { std::string to_string(const std::vector &rules) { nlohmann::json res; for (const auto &r : rules) { - res.emplace_back(r.to_json()); + auto j = r.to_json(); + j["sample_rate"] = r.sample_rate; + if (r.max_per_second) { + j["max_per_second"] = *r.max_per_second; + } + res.emplace_back(std::move(j)); } return res.dump(); From 23bb27cc653b4cb64da4b31b497cc7b6c1c4e941 Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Thu, 23 May 2024 11:24:15 +0200 Subject: [PATCH 09/11] overall improvements - report remote trace sample rate as RULE instead of REMOTE_RULE for legacy reasons - update REMOTE_RULES and REMOTE_ADAPTIVE_RULE values to match the spec - report default sample rate for telemetry - add _dd.psr for new remote rules --- src/datadog/config_manager.cpp | 4 ++-- src/datadog/sampling_mechanism.h | 4 ++-- src/datadog/trace_sampler_config.cpp | 3 +++ src/datadog/trace_segment.cpp | 5 ++++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/datadog/config_manager.cpp b/src/datadog/config_manager.cpp index f163f34c..d8c78128 100644 --- a/src/datadog/config_manager.cpp +++ b/src/datadog/config_manager.cpp @@ -112,7 +112,7 @@ std::vector ConfigManager::update(const ConfigUpdate& conf) { ConfigMetadata::Origin::REMOTE_CONFIG); auto rate = Rate::from(*conf.trace_sampling_rate); - rules[catch_all] = TraceSamplerRate{*rate, SamplingMechanism::REMOTE_RULE}; + rules[catch_all] = TraceSamplerRate{*rate, SamplingMechanism::RULE}; metadata.emplace_back(std::move(trace_sampling_metadata)); } @@ -137,7 +137,7 @@ std::vector ConfigManager::update(const ConfigUpdate& conf) { metadata.emplace_back(std::move(trace_sampling_rules_metadata)); } - rules.merge(rules_); + rules.insert(rules_.cbegin(), rules_.cend()); trace_sampler_->set_rules(rules); if (!conf.tags) { diff --git a/src/datadog/sampling_mechanism.h b/src/datadog/sampling_mechanism.h index 70e990c4..71c99a99 100644 --- a/src/datadog/sampling_mechanism.h +++ b/src/datadog/sampling_mechanism.h @@ -61,10 +61,10 @@ enum class SamplingMechanism { // Reserved for future use. OTLP_RULE = 9, // Sampling rule configured by user via remote configuration. - REMOTE_RULE = 10, + REMOTE_RULE = 11, // Adaptive sampling rule automatically computed by Datadog backend and sent // via remote configuration. - REMOTE_ADAPTIVE_RULE = 11, + REMOTE_ADAPTIVE_RULE = 12, }; } // namespace tracing diff --git a/src/datadog/trace_sampler_config.cpp b/src/datadog/trace_sampler_config.cpp index bc8c638c..81e67b6b 100644 --- a/src/datadog/trace_sampler_config.cpp +++ b/src/datadog/trace_sampler_config.cpp @@ -197,6 +197,9 @@ Expected finalize_config( result.metadata[ConfigName::TRACE_SAMPLING_RATE] = ConfigMetadata( ConfigName::TRACE_SAMPLING_RATE, to_string(*sample_rate, 1), ConfigMetadata::Origin::CODE); + } else { + result.metadata[ConfigName::TRACE_SAMPLING_RATE] = ConfigMetadata( + ConfigName::TRACE_SAMPLING_RATE, "1.0", ConfigMetadata::Origin::CODE); } // If `sample_rate` was specified, then it translates to a "catch-all" rule diff --git a/src/datadog/trace_segment.cpp b/src/datadog/trace_segment.cpp index e2506f7d..a91c4da2 100644 --- a/src/datadog/trace_segment.cpp +++ b/src/datadog/trace_segment.cpp @@ -230,7 +230,10 @@ void TraceSegment::span_finished() { decision.mechanism == int(SamplingMechanism::DEFAULT)) { local_root.numeric_tags[tags::internal::agent_sample_rate] = *decision.configured_rate; - } else if (decision.mechanism == int(SamplingMechanism::RULE)) { + } else if (decision.mechanism == int(SamplingMechanism::RULE) || + decision.mechanism == int(SamplingMechanism::REMOTE_RULE) || + decision.mechanism == + int(SamplingMechanism::REMOTE_ADAPTIVE_RULE)) { local_root.numeric_tags[tags::internal::rule_sample_rate] = *decision.configured_rate; if (decision.limiter_effective_rate) { From 0a61f4b10f34018fcaa176d3cc9cebb7bcb77f2d Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Thu, 23 May 2024 16:55:06 +0200 Subject: [PATCH 10/11] Apply suggestions from code review --- src/datadog/trace_sampler_config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datadog/trace_sampler_config.cpp b/src/datadog/trace_sampler_config.cpp index 81e67b6b..ac77c07d 100644 --- a/src/datadog/trace_sampler_config.cpp +++ b/src/datadog/trace_sampler_config.cpp @@ -199,7 +199,7 @@ Expected finalize_config( ConfigMetadata::Origin::CODE); } else { result.metadata[ConfigName::TRACE_SAMPLING_RATE] = ConfigMetadata( - ConfigName::TRACE_SAMPLING_RATE, "1.0", ConfigMetadata::Origin::CODE); + ConfigName::TRACE_SAMPLING_RATE, "1.0", ConfigMetadata::Origin::DEFAULT); } // If `sample_rate` was specified, then it translates to a "catch-all" rule From 3df7c9f0751369b65c57cf8c97461a45172c8c09 Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Thu, 23 May 2024 17:26:06 +0200 Subject: [PATCH 11/11] fix: lint --- src/datadog/trace_sampler_config.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/datadog/trace_sampler_config.cpp b/src/datadog/trace_sampler_config.cpp index ac77c07d..4e39914e 100644 --- a/src/datadog/trace_sampler_config.cpp +++ b/src/datadog/trace_sampler_config.cpp @@ -198,8 +198,9 @@ Expected finalize_config( ConfigName::TRACE_SAMPLING_RATE, to_string(*sample_rate, 1), ConfigMetadata::Origin::CODE); } else { - result.metadata[ConfigName::TRACE_SAMPLING_RATE] = ConfigMetadata( - ConfigName::TRACE_SAMPLING_RATE, "1.0", ConfigMetadata::Origin::DEFAULT); + result.metadata[ConfigName::TRACE_SAMPLING_RATE] = + ConfigMetadata(ConfigName::TRACE_SAMPLING_RATE, "1.0", + ConfigMetadata::Origin::DEFAULT); } // If `sample_rate` was specified, then it translates to a "catch-all" rule