From 56d4a995184bc40b0b0b0a14c8cb703458d37d20 Mon Sep 17 00:00:00 2001 From: yeya24 Date: Sun, 26 Oct 2025 14:28:58 -0700 Subject: [PATCH 1/2] add head stale series metric Signed-off-by: yeya24 --- pkg/ingester/metrics.go | 7 +++++++ pkg/ingester/metrics_test.go | 15 +++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/pkg/ingester/metrics.go b/pkg/ingester/metrics.go index 8160216f2a1..f74d2295f8e 100644 --- a/pkg/ingester/metrics.go +++ b/pkg/ingester/metrics.go @@ -343,6 +343,7 @@ type tsdbMetrics struct { tsdbHeadTruncateFail *prometheus.Desc tsdbHeadTruncateTotal *prometheus.Desc tsdbHeadGcDuration *prometheus.Desc + tsdbHeadStaleSeries *prometheus.Desc tsdbActiveAppenders *prometheus.Desc tsdbSeriesNotFound *prometheus.Desc tsdbChunks *prometheus.Desc @@ -470,6 +471,10 @@ func newTSDBMetrics(r prometheus.Registerer) *tsdbMetrics { "cortex_ingester_tsdb_head_gc_duration_seconds", "Runtime of garbage collection in the TSDB head.", nil, nil), + tsdbHeadStaleSeries: prometheus.NewDesc( + "cortex_ingester_tsdb_head_stale_series", + "Total number of stale series in the head block.", + []string{"user"}, nil), tsdbActiveAppenders: prometheus.NewDesc( "cortex_ingester_tsdb_head_active_appenders", "Number of currently active TSDB appender transactions.", @@ -623,6 +628,7 @@ func (sm *tsdbMetrics) Describe(out chan<- *prometheus.Desc) { out <- sm.tsdbWALWritesFailed out <- sm.tsdbHeadTruncateFail out <- sm.tsdbHeadTruncateTotal + out <- sm.tsdbHeadStaleSeries out <- sm.tsdbHeadGcDuration out <- sm.tsdbActiveAppenders out <- sm.tsdbSeriesNotFound @@ -684,6 +690,7 @@ func (sm *tsdbMetrics) Collect(out chan<- prometheus.Metric) { data.SendSumOfCounters(out, sm.tsdbHeadTruncateFail, "prometheus_tsdb_head_truncations_failed_total") data.SendSumOfCounters(out, sm.tsdbHeadTruncateTotal, "prometheus_tsdb_head_truncations_total") data.SendSumOfSummaries(out, sm.tsdbHeadGcDuration, "prometheus_tsdb_head_gc_duration_seconds") + data.SendSumOfGaugesPerUser(out, sm.tsdbHeadStaleSeries, "prometheus_tsdb_head_stale_series") data.SendSumOfGauges(out, sm.tsdbActiveAppenders, "prometheus_tsdb_head_active_appenders") data.SendSumOfCounters(out, sm.tsdbSeriesNotFound, "prometheus_tsdb_head_series_not_found_total") data.SendSumOfGauges(out, sm.tsdbChunks, "prometheus_tsdb_head_chunks") diff --git a/pkg/ingester/metrics_test.go b/pkg/ingester/metrics_test.go index 9c7d316b964..40f84f3f97b 100644 --- a/pkg/ingester/metrics_test.go +++ b/pkg/ingester/metrics_test.go @@ -428,6 +428,11 @@ func TestTSDBMetrics(t *testing.T) { # HELP cortex_ingester_tsdb_exemplar_exemplars_in_storage Number of TSDB exemplars currently in storage. # TYPE cortex_ingester_tsdb_exemplar_exemplars_in_storage gauge cortex_ingester_tsdb_exemplar_exemplars_in_storage 30 + # HELP cortex_ingester_tsdb_head_stale_series Total number of stale series in the head block. + # TYPE cortex_ingester_tsdb_head_stale_series gauge + cortex_ingester_tsdb_head_stale_series{user="user1"} 382695 + cortex_ingester_tsdb_head_stale_series{user="user2"} 2659397 + cortex_ingester_tsdb_head_stale_series{user="user3"} 30969 `)) require.NoError(t, err) } @@ -691,6 +696,10 @@ func TestTSDBMetricsWithRemoval(t *testing.T) { # HELP cortex_ingester_tsdb_exemplar_exemplars_in_storage Number of TSDB exemplars currently in storage. # TYPE cortex_ingester_tsdb_exemplar_exemplars_in_storage gauge cortex_ingester_tsdb_exemplar_exemplars_in_storage 20 + # HELP cortex_ingester_tsdb_head_stale_series Total number of stale series in the head block. + # TYPE cortex_ingester_tsdb_head_stale_series gauge + cortex_ingester_tsdb_head_stale_series{user="user1"} 382695 + cortex_ingester_tsdb_head_stale_series{user="user2"} 2659397 `)) require.NoError(t, err) } @@ -1018,5 +1027,11 @@ func populateTSDBMetrics(base float64) *prometheus.Registry { }) exemplarsOutOfOrderTotal.Add(3) + headStaleSeries := promauto.With(r).NewGauge(prometheus.GaugeOpts{ + Name: "prometheus_tsdb_head_stale_series", + Help: "Total number of stale series in the head block.", + }) + headStaleSeries.Set(31 * base) + return r } From b4924f7b9440f31185d8b12fb53184a7bb0c2c92 Mon Sep 17 00:00:00 2001 From: yeya24 Date: Sun, 26 Oct 2025 14:32:39 -0700 Subject: [PATCH 2/2] update changelog Signed-off-by: yeya24 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a62898f89a0..96f226b3638 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -82,6 +82,7 @@ * [ENHANCEMENT] Implement versioned transactions for writes to DynamoDB ring. #6986 * [ENHANCEMENT] Add source metadata to requests(api vs ruler) #6947 * [ENHANCEMENT] Add new metric `cortex_discarded_series` and `cortex_discarded_series_per_labelset` to track number of series that have a discarded sample. #6995 +* [ENHANCEMENT] Ingester: Add `cortex_ingester_tsdb_head_stale_series` metric to keep track of number of stale series on head. #7071 * [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517 * [BUGFIX] Ingester: Fix labelset data race condition. #6573 * [BUGFIX] Compactor: Cleaner should not put deletion marker for blocks with no-compact marker. #6576