From 49190e6271e371e0ec8e9d0f670b8a2aee98f7ab Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Wed, 13 May 2026 07:29:35 +0000 Subject: [PATCH] feat(ingester): Add cortex_ingester_active_metric_names gauge per user Expose the number of unique metric names (distinct __name__ values) per tenant in the ingester head as a new Prometheus gauge metric. The data is sourced from the existing seriesInMetric counter which already tracks series counts per metric name via TSDB lifecycle callbacks. The metric is registered when -ingester.active-series-metrics-enabled is true (same gate as cortex_ingester_active_series) and updated in the same periodic loop alongside active series counts. This enables operators to monitor metric name cardinality per tenant without additional overhead, as the underlying data structure already exists. Signed-off-by: Ben Ye --- pkg/ingester/ingester.go | 2 ++ pkg/ingester/metrics.go | 9 +++++++++ pkg/ingester/user_state.go | 11 +++++++++++ pkg/ingester/user_state_test.go | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+) diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index 6116a318992..c68c6719deb 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -1152,6 +1152,7 @@ func (i *Ingester) updateActiveSeries(ctx context.Context) { userDB.activeSeries.Purge(purgeTime) i.metrics.activeSeriesPerUser.WithLabelValues(userID).Set(float64(userDB.activeSeries.Active())) i.metrics.activeNHSeriesPerUser.WithLabelValues(userID).Set(float64(userDB.activeSeries.ActiveNativeHistogram())) + i.metrics.activeMetricNamesPerUser.WithLabelValues(userID).Set(float64(userDB.seriesInMetric.ActiveMetricNames())) if err := userDB.labelSetCounter.UpdateMetric(ctx, userDB, i.metrics); err != nil { level.Warn(i.logger).Log("msg", "failed to update per labelSet metrics", "user", userID, "err", err) } @@ -3041,6 +3042,7 @@ func (i *Ingester) closeAllTSDB() { i.metrics.memUsers.Dec() i.metrics.activeSeriesPerUser.DeleteLabelValues(userID) i.metrics.activeNHSeriesPerUser.DeleteLabelValues(userID) + i.metrics.activeMetricNamesPerUser.DeleteLabelValues(userID) }(userDB) } diff --git a/pkg/ingester/metrics.go b/pkg/ingester/metrics.go index 238c578e656..a1289b84be8 100644 --- a/pkg/ingester/metrics.go +++ b/pkg/ingester/metrics.go @@ -57,6 +57,7 @@ type ingesterMetrics struct { activeSeriesPerUser *prometheus.GaugeVec activeNHSeriesPerUser *prometheus.GaugeVec + activeMetricNamesPerUser *prometheus.GaugeVec activeQueriedSeriesPerUser *prometheus.GaugeVec limitsPerLabelSet *prometheus.GaugeVec usagePerLabelSet *prometheus.GaugeVec @@ -298,6 +299,12 @@ func newIngesterMetrics(r prometheus.Registerer, Help: "Number of currently active native histogram series per user.", }, []string{"user"}), + // Not registered automatically, but only if activeSeriesEnabled is true. + activeMetricNamesPerUser: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_ingester_active_metric_names", + Help: "Number of unique metric names in the TSDB head per user.", + }, []string{"user"}), + // Not registered automatically, but only if activeSeriesEnabled is true. activeSeriesPerTracker: prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "cortex_ingester_active_series_per_tracker", @@ -349,6 +356,7 @@ func newIngesterMetrics(r prometheus.Registerer, if activeSeriesEnabled && r != nil { r.MustRegister(m.activeSeriesPerUser) r.MustRegister(m.activeNHSeriesPerUser) + r.MustRegister(m.activeMetricNamesPerUser) r.MustRegister(m.activeSeriesPerTracker) } @@ -380,6 +388,7 @@ func (m *ingesterMetrics) deletePerUserMetrics(userID string) { m.memMetadataRemovedTotal.DeleteLabelValues(userID) m.activeSeriesPerUser.DeleteLabelValues(userID) m.activeNHSeriesPerUser.DeleteLabelValues(userID) + m.activeMetricNamesPerUser.DeleteLabelValues(userID) m.activeSeriesPerTracker.DeletePartialMatch(prometheus.Labels{"user": userID}) m.activeQueriedSeriesPerUser.DeletePartialMatch(prometheus.Labels{"user": userID}) m.usagePerLabelSet.DeletePartialMatch(prometheus.Labels{"user": userID}) diff --git a/pkg/ingester/user_state.go b/pkg/ingester/user_state.go index 2918c8993aa..f7531d64cbe 100644 --- a/pkg/ingester/user_state.go +++ b/pkg/ingester/user_state.go @@ -86,6 +86,17 @@ func (m *metricCounter) increaseSeriesForMetric(metric string) { shard.mtx.Unlock() } +// ActiveMetricNames returns the total number of unique metric names tracked across all shards. +func (m *metricCounter) ActiveMetricNames() int { + total := 0 + for i := range m.shards { + m.shards[i].mtx.Lock() + total += len(m.shards[i].m) + m.shards[i].mtx.Unlock() + } + return total +} + type labelSetCounterEntry struct { count int labels labels.Labels diff --git a/pkg/ingester/user_state_test.go b/pkg/ingester/user_state_test.go index 38be322854d..bccc00e8927 100644 --- a/pkg/ingester/user_state_test.go +++ b/pkg/ingester/user_state_test.go @@ -378,3 +378,36 @@ func (ir *mockIndexReader) LabelNamesFor(ctx context.Context, postings index.Pos } func (ir *mockIndexReader) Close() error { return nil } + +func TestMetricCounter_ActiveMetricNames(t *testing.T) { + limits := validation.Limits{MaxLocalSeriesPerMetric: 100} + overrides := validation.NewOverrides(limits, nil) + limiter := NewLimiter(overrides, nil, util.ShardingStrategyDefault, true, 3, false, "") + mc := newMetricCounter(limiter, nil) + + // Initially zero. + assert.Equal(t, 0, mc.ActiveMetricNames()) + + // Add series for 3 different metrics. + mc.increaseSeriesForMetric("metric_a") + mc.increaseSeriesForMetric("metric_a") + mc.increaseSeriesForMetric("metric_b") + mc.increaseSeriesForMetric("metric_c") + assert.Equal(t, 3, mc.ActiveMetricNames()) + + // Remove all series for metric_b. + mc.decreaseSeriesForMetric("metric_b") + assert.Equal(t, 2, mc.ActiveMetricNames()) + + // Remove one series for metric_a (still has one left). + mc.decreaseSeriesForMetric("metric_a") + assert.Equal(t, 2, mc.ActiveMetricNames()) + + // Remove last series for metric_a. + mc.decreaseSeriesForMetric("metric_a") + assert.Equal(t, 1, mc.ActiveMetricNames()) + + // Remove last series for metric_c. + mc.decreaseSeriesForMetric("metric_c") + assert.Equal(t, 0, mc.ActiveMetricNames()) +}