From 6dd6ddba3079d1a45670324db7388bed64dd7209 Mon Sep 17 00:00:00 2001 From: Sandy Chen Date: Wed, 20 May 2026 16:51:36 +0900 Subject: [PATCH 1/3] [BUGFIX] Ingester: release TSDB appender on early-return paths in Push Push acquired a TSDB appender at the start of the function and, on early returns (e.g. out-of-order label set, hard append errors, Commit failure), could return without calling Commit or Rollback. This leaked TSDB head series references, mmap'd chunks, and pending appender state, observable as monotonic growth of cortex_ingester_tsdb_head_active_appenders. Defer Rollback right after the appender is acquired and skip it on the success path. `committed` is flipped to true before app.Commit() because Prometheus closes the appender even on Commit failure (internal self-rollback on WAL error), so the deferred Rollback must not run afterwards. The two now-redundant explicit Rollback calls inside the sample / histogram failure paths are removed and rely on the same defer. Add a regression test asserting cortex_ingester_tsdb_head_active_appenders stays at 0 after repeated out-of-order-label-set pushes. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Sandy Chen --- CHANGELOG.md | 1 + pkg/ingester/ingester.go | 33 ++++++++++++++------ pkg/ingester/ingester_test.go | 58 +++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f86f69e383..bea6302941a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ * [BUGFIX] Compactor: Fix stale `cortex_bucket_index_last_successful_update_timestamp_seconds` metric not being cleaned up when tenant ownership changes due to ring rebalancing. This caused false alarms on bucket index update rate when a tenant moved between compactors. #7485 * [BUGFIX] Security: Fix stored XSS vulnerability in Alertmanager and Store Gateway status pages by replacing `text/template` with `html/template`. #7512 * [BUGFIX] Security: Limit decompressed gzip output in `ParseProtoReader` and OTLP ingestion path. The decompressed body is now capped by `-distributor.otlp-max-recv-msg-size`. #7515 +* [BUGFIX] Ingester: Release the TSDB appender on every early-return path in `Push` (e.g. out-of-order label set) by deferring `Rollback`. Previously such requests leaked TSDB head series references, mmap'd chunks and pending state per request, causing the `cortex_ingester_tsdb_head_active_appenders` gauge to grow unbounded. ## 1.21.0 2026-04-24 diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index 6116a318992..c6b8430d36b 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -1437,6 +1437,22 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte // Walk the samples, appending them to the users database app := db.Appender(ctx).(extendedAppender) + // Ensure the appender is always released so that we don't leak TSDB head + // series references, mmap'd chunks and pending state on early returns. + // `committed` is flipped to true immediately before app.Commit() because + // Prometheus closes the appender even on Commit failure (it self-rolls + // back internally on WAL error), so the deferred Rollback must not run + // afterwards. + committed := false + defer func() { + if committed { + return + } + if rollbackErr := app.Rollback(); rollbackErr != nil { + level.Warn(logutil.WithContext(ctx, i.logger)).Log("msg", "failed to rollback appender on early return", "user", userID, "err", rollbackErr) + } + }() + // Even when OOO is enabled globally, we want to reject OOO samples in some cases. // prometheus implementation: https://github.com/prometheus/prometheus/pull/14710 if req.DiscardOutOfOrder { @@ -1505,11 +1521,8 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte if rollback := handleAppendFailure(err, s.TimestampMs, ts.Labels, copiedLabels, matchedLabelSetLimits); !rollback { continue } - // The error looks an issue on our side, so we should rollback - if rollbackErr := app.Rollback(); rollbackErr != nil { - level.Warn(logutil.WithContext(ctx, i.logger)).Log("msg", "failed to rollback on error", "user", userID, "err", rollbackErr) - } - + // The error looks an issue on our side, so we should rollback. + // The deferred rollback above will close the appender; nothing to do here. return nil, wrapWithUser(err, userID) } @@ -1560,10 +1573,8 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte if rollback := handleAppendFailure(err, hp.TimestampMs, ts.Labels, copiedLabels, matchedLabelSetLimits); !rollback { continue } - // The error looks an issue on our side, so we should rollback - if rollbackErr := app.Rollback(); rollbackErr != nil { - level.Warn(logutil.WithContext(ctx, i.logger)).Log("msg", "failed to rollback on error", "user", userID, "err", rollbackErr) - } + // The error looks an issue on our side, so we should rollback. + // The deferred rollback above will close the appender; nothing to do here. return nil, wrapWithUser(err, userID) } } else { @@ -1626,6 +1637,10 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte } startCommit := time.Now() + // Mark committed before calling Commit: Prometheus closes the appender on + // both success and failure of Commit (it self-rolls-back on WAL error), so + // the deferred Rollback must not fire afterwards. + committed = true if err := app.Commit(); err != nil { return nil, wrapWithUser(err, userID) } diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index 5dd0d1ec36b..80f12edfcd7 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -2833,6 +2833,64 @@ func TestIngester_Push_OutOfOrderLabels(t *testing.T) { require.NoError(t, err) } +// TestIngester_Push_OutOfOrderLabels_AppenderNotLeaked verifies that when Push +// returns early because of an out-of-order label set (and on any other early +// return after the appender is acquired) the appender is released via +// Rollback. Otherwise the TSDB head leaks series references, mmap'd chunks and +// pending state on every such request; observable via the +// cortex_ingester_tsdb_head_active_appenders gauge. +func TestIngester_Push_OutOfOrderLabels_AppenderNotLeaked(t *testing.T) { + cfg := defaultIngesterTestConfig(t) + r := prometheus.NewRegistry() + i, err := prepareIngesterWithBlocksStorage(t, cfg, r) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), i)) + defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck + + // Wait until it's ACTIVE. + test.Poll(t, time.Second, ring.ACTIVE, func() any { + return i.lifecycler.GetState() + }) + + ctx := user.InjectOrgID(context.Background(), "test-user") + + // First push a valid sample to initialise the user TSDB head so that + // subsequent Push() calls take the headAppender path. + validLabels := labels.FromStrings(labels.MetricName, "test_metric", "a", "1", "b", "2") + validReq, _ := mockWriteRequest(t, validLabels, 1, 1) + _, err = i.Push(ctx, validReq) + require.NoError(t, err) + + // Sanity check: no appenders are currently active. + const activeAppendersMetric = "cortex_ingester_tsdb_head_active_appenders" + expectedZero := ` + # HELP cortex_ingester_tsdb_head_active_appenders Number of currently active TSDB appender transactions. + # TYPE cortex_ingester_tsdb_head_active_appenders gauge + cortex_ingester_tsdb_head_active_appenders 0 +` + require.NoError(t, testutil.GatherAndCompare(r, bytes.NewBufferString(expectedZero), activeAppendersMetric)) + + // Now push a series of requests with an out-of-order label set. Each + // such request creates an appender that, without the leak fix, is never + // released, leaving the active-appenders gauge growing. + outOfOrderLabels := []cortexpb.LabelAdapter{ + {Name: labels.MetricName, Value: "test_metric"}, + {Name: "c", Value: "3"}, + {Name: "a", Value: "1"}, + } + const numLeakyPushes = 5 + for n := 0; n < numLeakyPushes; n++ { + req, _ := mockWriteRequest(t, cortexpb.FromLabelAdaptersToLabels(outOfOrderLabels), 1, int64(2+n)) + _, err = i.Push(ctx, req) + require.Error(t, err) + require.Contains(t, err.Error(), "out-of-order label set found") + } + + // The active-appenders gauge must still be 0 — every appender created by + // the early-returning Push() must have been released. + require.NoError(t, testutil.GatherAndCompare(r, bytes.NewBufferString(expectedZero), activeAppendersMetric)) +} + func BenchmarkIngesterPush(b *testing.B) { limits := defaultLimitsTestConfig() benchmarkIngesterPush(b, limits, false) From 703fefdda831c45161fe74af19c0933995dc5c9b Mon Sep 17 00:00:00 2001 From: Sandy Chen Date: Wed, 20 May 2026 17:37:33 +0900 Subject: [PATCH 2/3] [BUGFIX] Ingester: modernize for-loop in regression test Address `make check-modernize` lint failure by using Go 1.22+ `for range N` syntax instead of `for n := 0; n < numLeakyPushes; n++`. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Sandy Chen --- pkg/ingester/ingester_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index 80f12edfcd7..5c2b66270c7 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -2879,7 +2879,7 @@ func TestIngester_Push_OutOfOrderLabels_AppenderNotLeaked(t *testing.T) { {Name: "a", Value: "1"}, } const numLeakyPushes = 5 - for n := 0; n < numLeakyPushes; n++ { + for n := range numLeakyPushes { req, _ := mockWriteRequest(t, cortexpb.FromLabelAdaptersToLabels(outOfOrderLabels), 1, int64(2+n)) _, err = i.Push(ctx, req) require.Error(t, err) From f524db39adb2dc9e069a73bb63ae0c1e0b0bc67a Mon Sep 17 00:00:00 2001 From: Sandy Chen Date: Thu, 21 May 2026 22:27:50 +0900 Subject: [PATCH 3/3] Update CHANGELOG.md Co-authored-by: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com> Signed-off-by: Sandy Chen --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bea6302941a..b0006eabd36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,7 +37,7 @@ * [BUGFIX] Compactor: Fix stale `cortex_bucket_index_last_successful_update_timestamp_seconds` metric not being cleaned up when tenant ownership changes due to ring rebalancing. This caused false alarms on bucket index update rate when a tenant moved between compactors. #7485 * [BUGFIX] Security: Fix stored XSS vulnerability in Alertmanager and Store Gateway status pages by replacing `text/template` with `html/template`. #7512 * [BUGFIX] Security: Limit decompressed gzip output in `ParseProtoReader` and OTLP ingestion path. The decompressed body is now capped by `-distributor.otlp-max-recv-msg-size`. #7515 -* [BUGFIX] Ingester: Release the TSDB appender on every early-return path in `Push` (e.g. out-of-order label set) by deferring `Rollback`. Previously such requests leaked TSDB head series references, mmap'd chunks and pending state per request, causing the `cortex_ingester_tsdb_head_active_appenders` gauge to grow unbounded. +* [BUGFIX] Ingester: Release the TSDB appender on every early-return path in `Push` (e.g. out-of-order label set) by deferring `Rollback`. Previously such requests leaked TSDB head series references, mmap'd chunks and pending state per request, causing the `cortex_ingester_tsdb_head_active_appenders` gauge to grow unbounded. #7528 ## 1.21.0 2026-04-24