-
-
Notifications
You must be signed in to change notification settings - Fork 6
feat: Add datadog metrics backend #703
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,15 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import atexit | ||
| import resource | ||
| import time | ||
| from abc import abstractmethod | ||
| from collections.abc import Mapping | ||
| from contextlib import contextmanager | ||
| from typing import Generator, Protocol, runtime_checkable | ||
| from typing import TYPE_CHECKING, Generator, Protocol, runtime_checkable | ||
|
|
||
| if TYPE_CHECKING: | ||
| from datadog.dogstatsd.base import DogStatsd | ||
|
|
||
| Tags = Mapping[str, str | int | float] | ||
|
|
||
|
|
@@ -21,7 +27,7 @@ def gauge( | |
| value: float, | ||
| instance: str | None = None, | ||
| tags: Tags | None = None, | ||
| sample_rate: float = 1, | ||
| sample_rate: float | None = None, | ||
| unit: str | None = None, | ||
| stacklevel: int = 0, | ||
| ) -> None: | ||
|
|
@@ -93,7 +99,7 @@ def gauge( | |
| value: float, | ||
| instance: str | None = None, | ||
| tags: Tags | None = None, | ||
| sample_rate: float = 1, | ||
| sample_rate: float | None = None, | ||
| unit: str | None = None, | ||
| stacklevel: int = 0, | ||
| ) -> None: | ||
|
|
@@ -139,3 +145,162 @@ def track_memory_usage( | |
| of rss_usage between the context manager opening and closing. | ||
| """ | ||
| yield None | ||
|
|
||
|
|
||
| class DatadogMetrics(MetricsBackend): | ||
| """ | ||
| An opinionated metrics backend that emits to Datadog via DogStatsD. | ||
|
|
||
| All metrics are tagged with ``application`` and ``processing_pool`` so that | ||
| dashboards and alerts can be built once and shared across every application | ||
| using taskbroker-client, without depending on a host-application metrics | ||
| prefix. | ||
|
sentry[bot] marked this conversation as resolved.
|
||
|
|
||
| When ``enable_prefixed_metrics`` is enabled each metric is emitted twice: once | ||
| prefix-free with ``application`` as a tag, and once with ``application`` | ||
| as a metric prefix (and not included in tags). This eases migrating existing | ||
| alerts and dashboards from the prefixed form to the prefix-free form. | ||
|
|
||
| The ``datadog`` package is an optional dependency. Install it with | ||
| ``pip install taskbroker-client[datadog]``. | ||
| """ | ||
|
|
||
| def __init__( | ||
| self, | ||
| application: str, | ||
| processing_pool: str | None = None, | ||
| statsd_host: str | None = None, | ||
| statsd_port: str | int | None = None, | ||
| sample_rate: float = 1.0, | ||
| enable_prefixed_metrics: bool = False, | ||
| client: DogStatsd | None = None, | ||
| ) -> None: | ||
| self.application = application | ||
| self.processing_pool = processing_pool or "unknown" | ||
| self.sample_rate = sample_rate | ||
| self.enable_prefixed_metrics = enable_prefixed_metrics | ||
| if client is None: | ||
| from datadog.dogstatsd.base import DogStatsd | ||
|
|
||
| client = DogStatsd( | ||
| host=statsd_host or "localhost", | ||
| port=int(statsd_port) if statsd_port is not None else 8125, | ||
| disable_telemetry=True, | ||
| # Use a background thread to send metrics | ||
| disable_background_sender=False, | ||
| # Allow buffering and background delivery | ||
| disable_buffering=False, | ||
| ) | ||
| # Origin detection is enabled after 0.45 by default. | ||
| # Disable it since it silently fails. | ||
| # Ref: https://github.com/DataDog/datadogpy/issues/764 | ||
| client._container_id = None | ||
|
|
||
| # Call wait_for_pending() before exiting to make sure all pending metrics are sent. | ||
| atexit.register(client.wait_for_pending) | ||
|
|
||
| self.client = client | ||
|
|
||
| def _build_tag_list(self, tags: Tags | None, *, with_application: bool) -> list[str]: | ||
| merged: dict[str, str | int | float] = {"processing_pool": self.processing_pool} | ||
| if with_application: | ||
| merged["application"] = self.application | ||
| if tags: | ||
| # Per-call tags win so call sites can override the structural defaults. | ||
| merged.update(tags) | ||
| return [f"{key}:{value}" for key, value in merged.items()] | ||
|
|
||
| def _emit( | ||
| self, | ||
| method: str, | ||
| name: str, | ||
| value: float, | ||
| tags: Tags | None, | ||
| sample_rate: float | None, | ||
| ) -> None: | ||
| rate = self.sample_rate if sample_rate is None else sample_rate | ||
| emit = getattr(self.client, method) | ||
|
|
||
| # Prefix-free form: application is carried as a tag. | ||
| emit( | ||
| name, | ||
| value, | ||
| tags=self._build_tag_list(tags, with_application=True), | ||
| sample_rate=rate, | ||
| ) | ||
|
|
||
| # Prefixed form: application is in the metric name and removed from the tags. | ||
| if self.enable_prefixed_metrics: | ||
| emit( | ||
| f"{self.application}.{name}", | ||
| value, | ||
| tags=self._build_tag_list(tags, with_application=False), | ||
| sample_rate=rate, | ||
| ) | ||
|
|
||
| def gauge( | ||
| self, | ||
| key: str, | ||
| value: float, | ||
| instance: str | None = None, | ||
| tags: Tags | None = None, | ||
| sample_rate: float | None = None, | ||
| unit: str | None = None, | ||
| stacklevel: int = 0, | ||
| ) -> None: | ||
| # instance, unit and stacklevel have no DogStatsD equivalent and are ignored. | ||
| self._emit("gauge", key, value, tags, sample_rate) | ||
|
cursor[bot] marked this conversation as resolved.
|
||
|
|
||
| def incr( | ||
| self, | ||
| name: str, | ||
| value: int | float = 1, | ||
| tags: Tags | None = None, | ||
| sample_rate: float | None = None, | ||
| ) -> None: | ||
| self._emit("increment", name, value, tags, sample_rate) | ||
|
|
||
| def distribution( | ||
| self, | ||
| name: str, | ||
| value: int | float, | ||
| tags: Tags | None = None, | ||
| unit: str | None = None, | ||
| sample_rate: float | None = None, | ||
| ) -> None: | ||
| # unit has no DogStatsD equivalent and is ignored. | ||
| self._emit("distribution", name, value, tags, sample_rate) | ||
|
|
||
| @contextmanager | ||
| def timer( | ||
| self, | ||
| key: str, | ||
| tags: Tags | None = None, | ||
| sample_rate: float | None = None, | ||
| stacklevel: int = 0, | ||
| ) -> Generator[None]: | ||
| start = time.monotonic() | ||
| try: | ||
| yield None | ||
| finally: | ||
| self._emit("timing", key, time.monotonic() - start, tags, sample_rate) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Timer sends seconds not millisecondsHigh Severity
Reviewed by Cursor Bugbot for commit fe820a1. Configure here.
sentry[bot] marked this conversation as resolved.
|
||
|
|
||
| @contextmanager | ||
| def track_memory_usage( | ||
| self, | ||
| key: str, | ||
| tags: Tags | None = None, | ||
| ) -> Generator[None]: | ||
| """ | ||
| Records a distribution metric that tracks the delta | ||
| of rss usage between the context manager opening and closing. | ||
| """ | ||
| start = _rss_bytes() | ||
| try: | ||
| yield None | ||
| finally: | ||
| self._emit("distribution", key, _rss_bytes() - start, tags, None) | ||
|
|
||
|
|
||
| def _rss_bytes() -> int: | ||
| return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Memory metric uses peak RSSMedium Severity
Reviewed by Cursor Bugbot for commit fe820a1. Configure here.
sentry[bot] marked this conversation as resolved.
markstory marked this conversation as resolved.
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from unittest.mock import Mock | ||
|
|
||
| import pytest | ||
|
|
||
| from taskbroker_client.metrics import DatadogMetrics | ||
|
|
||
|
|
||
| def make_metrics( | ||
| *, | ||
| enable_prefixed_metrics: bool = False, | ||
| processing_pool: str | None = "ingest-errors", | ||
| sample_rate: float = 1.0, | ||
| client: Mock | None = None, | ||
| ) -> tuple[DatadogMetrics, Mock]: | ||
| mock_client = client or Mock() | ||
| metrics = DatadogMetrics( | ||
| application="sentry", | ||
| processing_pool=processing_pool, | ||
| sample_rate=sample_rate, | ||
| enable_prefixed_metrics=enable_prefixed_metrics, | ||
| client=mock_client, | ||
| ) | ||
| return metrics, mock_client | ||
|
|
||
|
|
||
| def test_incr_prefix_off() -> None: | ||
| metrics, client = make_metrics(sample_rate=0.5) | ||
| metrics.incr("taskworker.x", tags={"namespace": "n"}) | ||
|
|
||
| client.increment.assert_called_once() | ||
| args, kwargs = client.increment.call_args | ||
| assert args[0] == "taskworker.x" | ||
| assert args[1] == 1 | ||
| assert set(kwargs["tags"]) == { | ||
| "application:sentry", | ||
| "processing_pool:ingest-errors", | ||
| "namespace:n", | ||
| } | ||
| assert kwargs["sample_rate"] == 0.5 | ||
|
|
||
|
|
||
| def test_incr_prefix_on() -> None: | ||
| metrics, client = make_metrics(enable_prefixed_metrics=True) | ||
| metrics.incr("taskworker.x") | ||
|
|
||
| assert client.increment.call_count == 2 | ||
|
|
||
| first_args, first_kwargs = client.increment.call_args_list[0] | ||
| assert first_args[0] == "taskworker.x" | ||
| assert "application:sentry" in first_kwargs["tags"] | ||
|
|
||
| second_args, second_kwargs = client.increment.call_args_list[1] | ||
| assert second_args[0] == "sentry.taskworker.x" | ||
| assert not any(tag.startswith("application:") for tag in second_kwargs["tags"]) | ||
| assert "processing_pool:ingest-errors" in second_kwargs["tags"] | ||
|
|
||
|
|
||
| def test_gauge_ignores_unsupported_params() -> None: | ||
| metrics, client = make_metrics() | ||
| metrics.gauge("taskworker.size", 12.0, instance="i", unit="bytes", stacklevel=3) | ||
|
|
||
| client.gauge.assert_called_once() | ||
| args, kwargs = client.gauge.call_args | ||
| assert args[0] == "taskworker.size" | ||
| assert args[1] == 12.0 | ||
| assert set(kwargs) == {"tags", "sample_rate"} | ||
|
|
||
|
|
||
| def test_distribution_ignores_unit() -> None: | ||
| metrics, client = make_metrics() | ||
| metrics.distribution("taskworker.duration", 0.25, unit="seconds") | ||
|
|
||
| client.distribution.assert_called_once() | ||
| args, kwargs = client.distribution.call_args | ||
| assert args[0] == "taskworker.duration" | ||
| assert args[1] == 0.25 | ||
| assert "unit" not in kwargs | ||
|
|
||
|
|
||
| def test_tag_precedence() -> None: | ||
| metrics, client = make_metrics() | ||
| metrics.incr("taskworker.x", tags={"processing_pool": "override", "namespace": "n"}) | ||
|
|
||
| _, kwargs = client.increment.call_args | ||
| assert "processing_pool:override" in kwargs["tags"] | ||
| assert "processing_pool:ingest-errors" not in kwargs["tags"] | ||
|
|
||
|
|
||
| def test_none_tags_still_emit_structural_tags() -> None: | ||
| metrics, client = make_metrics() | ||
| metrics.incr("taskworker.x") | ||
|
|
||
| _, kwargs = client.increment.call_args | ||
| assert set(kwargs["tags"]) == {"application:sentry", "processing_pool:ingest-errors"} | ||
|
|
||
|
|
||
| def test_sample_rate_defaulting() -> None: | ||
| metrics, client = make_metrics(sample_rate=0.1) | ||
|
|
||
| metrics.incr("taskworker.x") | ||
| assert client.increment.call_args.kwargs["sample_rate"] == 0.1 | ||
|
|
||
| metrics.incr("taskworker.x", sample_rate=0.5) | ||
| assert client.increment.call_args.kwargs["sample_rate"] == 0.5 | ||
|
|
||
|
|
||
| def test_timer_happy_path() -> None: | ||
| metrics, client = make_metrics(enable_prefixed_metrics=True) | ||
| with metrics.timer("taskworker.duration", tags={"host": "h"}): | ||
| pass | ||
|
|
||
| assert client.timing.call_count == 2 | ||
| args, kwargs = client.timing.call_args_list[0] | ||
| assert args[0] == "taskworker.duration" | ||
| assert isinstance(args[1], float) | ||
| assert "host:h" in kwargs["tags"] | ||
|
|
||
|
|
||
| def test_timer_emits_on_exception() -> None: | ||
| metrics, client = make_metrics() | ||
| with pytest.raises(ValueError): | ||
| with metrics.timer("taskworker.dur"): | ||
| raise ValueError("boom") | ||
|
|
||
| client.timing.assert_called_once() | ||
| assert isinstance(client.timing.call_args.args[1], float) | ||
|
|
||
|
|
||
| def test_track_memory_usage() -> None: | ||
| metrics, client = make_metrics() | ||
| with metrics.track_memory_usage("taskworker.mem"): | ||
| var = "a" * 1000000 | ||
| var += "b" | ||
|
|
||
| client.distribution.assert_called_once() | ||
| args, _ = client.distribution.call_args | ||
| assert args[0] == "taskworker.mem" | ||
| assert isinstance(args[1], int) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: The Suggested FixTo accurately measure memory usage within the context, replace Prompt for AI Agent |
||
| assert args[1] > 0 | ||
|
|
||
|
|
||
| def test_track_memory_usage_prefixed() -> None: | ||
| metrics, client = make_metrics(enable_prefixed_metrics=True) | ||
| with metrics.track_memory_usage("taskworker.mem"): | ||
| pass | ||
|
|
||
| assert client.distribution.call_count == 2 | ||
| assert client.distribution.call_args_list[1].args[0] == "sentry.taskworker.mem" | ||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This default value has been silly for a while. Now is a good time to change this.