apache · justahuman1 · Apr 2, 2026 · Apr 3, 2026 · Apr 13, 2026 · Apr 14, 2026
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java b/pinot-core/src/main/java/org/apache/pinot/core/common/MinionConstants.java
@@ -189,7 +189,7 @@ public static class RealtimeToOfflineSegmentsTask extends MergeTask {
             DISTINCTCOUNTRAWTHETASKETCH, DISTINCTCOUNTTUPLESKETCH, DISTINCTCOUNTRAWINTEGERSUMTUPLESKETCH,
             SUMVALUESINTEGERSUMTUPLESKETCH, AVGVALUEINTEGERSUMTUPLESKETCH, DISTINCTCOUNTHLLPLUS,
             DISTINCTCOUNTRAWHLLPLUS, DISTINCTCOUNTCPCSKETCH, DISTINCTCOUNTRAWCPCSKETCH, DISTINCTCOUNTULL,
-            DISTINCTCOUNTRAWULL, PERCENTILEKLL, PERCENTILERAWKLL);
+            DISTINCTCOUNTRAWULL, PERCENTILEKLL, PERCENTILERAWKLL, PERCENTILETDIGEST, PERCENTILERAWTDIGEST);
   }
 
   // Generate segment and push to controller based on batch ingestion configs

diff --git a/...java/org/apache/pinot/core/segment/processing/aggregator/PercentileTDigestAggregator.java b/...java/org/apache/pinot/core/segment/processing/aggregator/PercentileTDigestAggregator.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.segment.processing.aggregator;
+
+import com.tdunning.math.stats.TDigest;
+import java.util.Map;
+import org.apache.pinot.core.common.ObjectSerDeUtils;
+import org.apache.pinot.core.query.aggregation.function.PercentileTDigestAggregationFunction;
+import org.apache.pinot.segment.spi.Constants;
+
+
+public class PercentileTDigestAggregator implements ValueAggregator {
+
+  @Override
+  public Object aggregate(Object value1, Object value2, Map<String, String> functionParameters) {
+    byte[] bytes1 = (byte[]) value1;
+    byte[] bytes2 = (byte[]) value2;
+
+    // Empty byte arrays represent the default null value for BYTES columns.
+    // Deserializing byte[0] would throw BufferUnderflowException, so handle it explicitly.
+    if (bytes1.length == 0 && bytes2.length == 0) {
+      int compression = getCompression(functionParameters);
+      return ObjectSerDeUtils.TDIGEST_SER_DE.serialize(TDigest.createMergingDigest(compression));
+    }
+    if (bytes1.length == 0) {
+      return bytes2;
+    }
+    if (bytes2.length == 0) {
+      return bytes1;
+    }
+
+    int compression = getCompression(functionParameters);
+    TDigest first = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytes1);
+    TDigest second = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytes2);
+    TDigest merged = TDigest.createMergingDigest(compression);
+    merged.add(first);
+    merged.add(second);
+    return ObjectSerDeUtils.TDIGEST_SER_DE.serialize(merged);
+  }
+
+  private int getCompression(Map<String, String> functionParameters) {
+    String compressionParam = functionParameters.get(Constants.PERCENTILETDIGEST_COMPRESSION_FACTOR_KEY);
+    return compressionParam != null
+        ? Integer.parseInt(compressionParam)
+        : PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION;
+  }
+}
diff --git a/...main/java/org/apache/pinot/core/segment/processing/aggregator/ValueAggregatorFactory.java b/...main/java/org/apache/pinot/core/segment/processing/aggregator/ValueAggregatorFactory.java
@@ -64,6 +64,9 @@ public static ValueAggregator getValueAggregator(AggregationFunctionType aggrega
       case PERCENTILEKLL:
       case PERCENTILERAWKLL:
         return new PercentileKLLSketchAggregator();
+      case PERCENTILETDIGEST:
+      case PERCENTILERAWTDIGEST:
+        return new PercentileTDigestAggregator();
       default:
         throw new IllegalStateException("Unsupported aggregation type: " + aggregationType);
     }

diff --git a/.../org/apache/pinot/core/segment/processing/aggregator/PercentileTDigestAggregatorTest.java b/.../org/apache/pinot/core/segment/processing/aggregator/PercentileTDigestAggregatorTest.java
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.segment.processing.aggregator;
+
+import com.tdunning.math.stats.TDigest;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.pinot.core.common.ObjectSerDeUtils;
+import org.apache.pinot.segment.spi.Constants;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
+
+
+public class PercentileTDigestAggregatorTest {
+
+  private PercentileTDigestAggregator _aggregator;
+
+  @BeforeMethod
+  public void setUp() {
+    _aggregator = new PercentileTDigestAggregator();
+  }
+
+  @Test
+  public void testAggregateWithDefaultCompression() {
+    TDigest first = TDigest.createMergingDigest(100);
+    for (int i = 0; i < 100; i++) {
+      first.add(i);
+    }
+    TDigest second = TDigest.createMergingDigest(100);
+    for (int i = 100; i < 200; i++) {
+      second.add(i);
+    }
+
+    byte[] value1 = ObjectSerDeUtils.TDIGEST_SER_DE.serialize(first);
+    byte[] value2 = ObjectSerDeUtils.TDIGEST_SER_DE.serialize(second);
+
+    Map<String, String> functionParameters = new HashMap<>();
+    byte[] result = (byte[]) _aggregator.aggregate(value1, value2, functionParameters);
+
+    TDigest resultDigest = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(result);
+    assertNotNull(resultDigest);
+    assertEquals(resultDigest.size(), 200);
+    assertEquals(resultDigest.quantile(0.5), 99.5, 1);
+  }
+
+  @Test
+  public void testAggregateWithCustomCompression() {
+    TDigest first = TDigest.createMergingDigest(100);
+    for (int i = 0; i < 50; i++) {
+      first.add(i);
+    }
+    TDigest second = TDigest.createMergingDigest(100);
+    for (int i = 50; i < 100; i++) {
+      second.add(i);
+    }
+
+    byte[] value1 = ObjectSerDeUtils.TDIGEST_SER_DE.serialize(first);
+    byte[] value2 = ObjectSerDeUtils.TDIGEST_SER_DE.serialize(second);
+
+    Map<String, String> functionParameters = new HashMap<>();
+    functionParameters.put(Constants.PERCENTILETDIGEST_COMPRESSION_FACTOR_KEY, "200");
+
+    byte[] result = (byte[]) _aggregator.aggregate(value1, value2, functionParameters);
+
+    TDigest resultDigest = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(result);
+    assertNotNull(resultDigest);
+    assertEquals(resultDigest.size(), 100);
+    assertEquals(resultDigest.quantile(0.5), 49.5, 1);
+  }
+
+  @Test
+  public void testAggregateWithBothEmptyBytes() {
+    byte[] empty1 = new byte[0];
+    byte[] empty2 = new byte[0];
+
+    Map<String, String> functionParameters = new HashMap<>();
+    byte[] result = (byte[]) _aggregator.aggregate(empty1, empty2, functionParameters);
+
+    // Should return a valid serialized empty TDigest, not crash
+    TDigest resultDigest = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(result);
+    assertNotNull(resultDigest);
+    assertEquals(resultDigest.size(), 0);
+  }
+
+  @Test
+  public void testAggregateWithFirstEmptyBytes() {
+    TDigest second = TDigest.createMergingDigest(100);
+    for (int i = 0; i < 50; i++) {
+      second.add(i);
+    }
+    byte[] empty = new byte[0];
+    byte[] value2 = ObjectSerDeUtils.TDIGEST_SER_DE.serialize(second);
+
+    Map<String, String> functionParameters = new HashMap<>();
+    byte[] result = (byte[]) _aggregator.aggregate(empty, value2, functionParameters);
+
+    // Should return the non-empty side as-is
+    assertEquals(result, value2);
+    TDigest resultDigest = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(result);
+    assertEquals(resultDigest.size(), 50);
+  }
+
+  @Test
+  public void testAggregateWithSecondEmptyBytes() {
+    TDigest first = TDigest.createMergingDigest(100);
+    for (int i = 0; i < 50; i++) {
+      first.add(i);
+    }
+    byte[] value1 = ObjectSerDeUtils.TDIGEST_SER_DE.serialize(first);
+    byte[] empty = new byte[0];
+
+    Map<String, String> functionParameters = new HashMap<>();
+    byte[] result = (byte[]) _aggregator.aggregate(value1, empty, functionParameters);
+
+    // Should return the non-empty side as-is
+    assertEquals(result, value1);
+    TDigest resultDigest = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(result);
+    assertEquals(resultDigest.size(), 50);
+  }
+}
diff --git a/.../main/java/org/apache/pinot/plugin/minion/tasks/mergerollup/MergeRollupTaskGenerator.java b/.../main/java/org/apache/pinot/plugin/minion/tasks/mergerollup/MergeRollupTaskGenerator.java
@@ -504,7 +504,8 @@ public void validateTaskConfigs(TableConfig tableConfig, Schema schema, Map<Stri
     // check no mis-configured aggregation function parameters
     Set<String> allowedFunctionParameterNames = ImmutableSet.of(Constants.CPCSKETCH_LGK_KEY.toLowerCase(),
         Constants.THETA_TUPLE_SKETCH_SAMPLING_PROBABILITY.toLowerCase(),
-        Constants.THETA_TUPLE_SKETCH_NOMINAL_ENTRIES.toLowerCase());
+        Constants.THETA_TUPLE_SKETCH_NOMINAL_ENTRIES.toLowerCase(),
+        Constants.PERCENTILETDIGEST_COMPRESSION_FACTOR_KEY.toLowerCase());
     Map<String, Map<String, String>> aggregationFunctionParameters =
         MergeRollupTaskUtils.getAggregationFunctionParameters(taskConfigs);
     for (String fieldName : aggregationFunctionParameters.keySet()) {
@@ -515,10 +516,12 @@ public void validateTaskConfigs(TableConfig tableConfig, Schema schema, Map<Stri
       for (String functionParameterName : functionParameters.keySet()) {
         // check that function parameter name is valid
         Preconditions.checkState(allowedFunctionParameterNames.contains(functionParameterName.toLowerCase()),
-            "Aggregation function parameter name must be one of [lgK, samplingProbability, nominalEntries]!");
+            "Aggregation function parameter name must be one of [lgK, samplingProbability, nominalEntries,"
+                + " compressionFactor]!");
         // check that function parameter value is valid for nominal entries
         if (functionParameterName.equalsIgnoreCase(Constants.CPCSKETCH_LGK_KEY)
-            || functionParameterName.equalsIgnoreCase(Constants.THETA_TUPLE_SKETCH_NOMINAL_ENTRIES)) {
+            || functionParameterName.equalsIgnoreCase(Constants.THETA_TUPLE_SKETCH_NOMINAL_ENTRIES)
+            || functionParameterName.equalsIgnoreCase(Constants.PERCENTILETDIGEST_COMPRESSION_FACTOR_KEY)) {
           String value = functionParameters.get(functionParameterName);
           String err = "Aggregation function parameter \"" + functionParameterName + "\" on column \"" + fieldName
               + "\" has invalid value: " + value;