From 21004b466f18cf0e8f23438706eb2df38c1cacf0 Mon Sep 17 00:00:00 2001 From: GWphua Date: Fri, 7 Nov 2025 18:22:38 +0800 Subject: [PATCH 01/32] Add byte buffer tracking for underlying hash tables --- .../query/groupby/GroupByStatsProvider.java | 50 ++++++++++++++----- .../AbstractBufferHashGrouper.java | 9 ++++ .../epinephelinae/ByteBufferHashTable.java | 16 ++++++ .../epinephelinae/ConcurrentGrouper.java | 4 +- .../LimitedBufferHashGrouper.java | 14 ++++++ .../epinephelinae/SpillingGrouper.java | 15 +++++- .../server/metrics/GroupByStatsMonitor.java | 8 +-- .../metrics/GroupByStatsMonitorTest.java | 3 +- 8 files changed, 97 insertions(+), 22 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java index a5ce31cb5f98..344569053121 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java @@ -60,13 +60,16 @@ public synchronized void closeQuery(QueryResourceId resourceId) public synchronized AggregateStats getStatsSince() { - return aggregateStatsContainer.reset(); + AggregateStats aggregateStats = new AggregateStats(aggregateStatsContainer); + aggregateStatsContainer.reset(); + return aggregateStats; } public static class AggregateStats { private long mergeBufferQueries = 0; private long mergeBufferAcquisitionTimeNs = 0; + private long mergeBufferTotalUsage = 0; private long spilledQueries = 0; private long spilledBytes = 0; private long mergeDictionarySize = 0; @@ -75,9 +78,22 @@ public AggregateStats() { } + public AggregateStats(AggregateStats aggregateStats) + { + this( + aggregateStats.mergeBufferQueries, + aggregateStats.mergeBufferAcquisitionTimeNs, + aggregateStats.mergeBufferTotalUsage, + aggregateStats.spilledQueries, + aggregateStats.spilledBytes, + aggregateStats.mergeDictionarySize + ); + } + public AggregateStats( long mergeBufferQueries, long mergeBufferAcquisitionTimeNs, + long mergeBufferTotalUsage, long spilledQueries, long spilledBytes, long mergeDictionarySize @@ -85,6 +101,7 @@ public AggregateStats( { this.mergeBufferQueries = mergeBufferQueries; this.mergeBufferAcquisitionTimeNs = mergeBufferAcquisitionTimeNs; + this.mergeBufferTotalUsage = mergeBufferTotalUsage; this.spilledQueries = spilledQueries; this.spilledBytes = spilledBytes; this.mergeDictionarySize = mergeDictionarySize; @@ -100,6 +117,11 @@ public long getMergeBufferAcquisitionTimeNs() return mergeBufferAcquisitionTimeNs; } + public long getMergeBufferTotalUsage() + { + return mergeBufferTotalUsage; + } + public long getSpilledQueries() { return spilledQueries; @@ -120,6 +142,7 @@ public void addQueryStats(PerQueryStats perQueryStats) if (perQueryStats.getMergeBufferAcquisitionTimeNs() > 0) { mergeBufferQueries++; mergeBufferAcquisitionTimeNs += perQueryStats.getMergeBufferAcquisitionTimeNs(); + mergeBufferTotalUsage += perQueryStats.getMergeBufferTotalUsage(); } if (perQueryStats.getSpilledBytes() > 0) { @@ -130,30 +153,21 @@ public void addQueryStats(PerQueryStats perQueryStats) mergeDictionarySize += perQueryStats.getMergeDictionarySize(); } - public AggregateStats reset() + public void reset() { - AggregateStats aggregateStats = - new AggregateStats( - mergeBufferQueries, - mergeBufferAcquisitionTimeNs, - spilledQueries, - spilledBytes, - mergeDictionarySize - ); - this.mergeBufferQueries = 0; this.mergeBufferAcquisitionTimeNs = 0; + this.mergeBufferTotalUsage = 0; this.spilledQueries = 0; this.spilledBytes = 0; this.mergeDictionarySize = 0; - - return aggregateStats; } } public static class PerQueryStats { private final AtomicLong mergeBufferAcquisitionTimeNs = new AtomicLong(0); + private final AtomicLong mergeBufferTotalUsage = new AtomicLong(0); private final AtomicLong spilledBytes = new AtomicLong(0); private final AtomicLong mergeDictionarySize = new AtomicLong(0); @@ -162,6 +176,11 @@ public void mergeBufferAcquisitionTime(long delay) mergeBufferAcquisitionTimeNs.addAndGet(delay); } + public void mergeBufferTotalUsage(long bytes) + { + mergeBufferTotalUsage.addAndGet(bytes); + } + public void spilledBytes(long bytes) { spilledBytes.addAndGet(bytes); @@ -177,6 +196,11 @@ public long getMergeBufferAcquisitionTimeNs() return mergeBufferAcquisitionTimeNs.get(); } + public long getMergeBufferTotalUsage() + { + return mergeBufferTotalUsage.get(); + } + public long getSpilledBytes() { return spilledBytes.get(); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java index 70cf5832cf33..e07409430b3b 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java @@ -173,6 +173,15 @@ public void close() aggregators.reset(); } + /** + * This method is implemented to return the highest memory value claimed by the Grouper. This is only + * used for monitoring the size of the merge buffers used. + */ + public long getMergeBufferUsage() + { + return hashTable.getMaxTableBufferUsage(); + } + /** * Populate a {@link ReusableEntry} with values from a particular bucket. */ diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java index 62c65f7cecb7..0b93b5f5a5a9 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java @@ -79,6 +79,9 @@ public static int calculateTableArenaSizeWithFixedAdditionalSize( @Nullable protected BucketUpdateHandler bucketUpdateHandler; + // Keeps track on how many bytes is being used in the merge buffer. + protected long maxTableBufferUsage; + public ByteBufferHashTable( float maxLoadFactor, int initialBuckets, @@ -97,6 +100,7 @@ public ByteBufferHashTable( this.maxSizeForTesting = maxSizeForTesting; this.tableArenaSize = buffer.capacity(); this.bucketUpdateHandler = bucketUpdateHandler; + this.maxTableBufferUsage = 0; } public void reset() @@ -139,6 +143,7 @@ public void reset() bufferDup.position(tableStart); bufferDup.limit(tableStart + maxBuckets * bucketSizeWithHash); tableBuffer = bufferDup.slice(); + updateMaxTableBufferUsage(); // Clear used bits of new table for (int i = 0; i < maxBuckets; i++) { @@ -225,6 +230,7 @@ public void adjustTableWhenFull() maxBuckets = newBuckets; regrowthThreshold = newMaxSize; tableBuffer = newTableBuffer; + updateMaxTableBufferUsage(); tableStart = newTableStart; growthCount++; @@ -381,6 +387,16 @@ public int getGrowthCount() return growthCount; } + protected void updateMaxTableBufferUsage() + { + maxTableBufferUsage = Math.max(maxTableBufferUsage, tableBuffer.capacity()); + } + + public long getMaxTableBufferUsage() + { + return maxTableBufferUsage; + } + public interface BucketUpdateHandler { void handleNewBucket(int bucketOffset); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java index 8242c9d8cf5c..b4b4cb347019 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java @@ -332,7 +332,7 @@ public void reset() throw new ISE("Grouper is closed"); } - groupers.forEach(Grouper::reset); + groupers.forEach(SpillingGrouper::reset); } @Override @@ -496,7 +496,7 @@ public void close() { if (!closed) { closed = true; - groupers.forEach(Grouper::close); + groupers.forEach(SpillingGrouper::close); } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java index 0627fba0333d..9c0ec312475d 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java @@ -503,6 +503,7 @@ public AlternatingByteBufferHashTable( subHashTable2Buffer = subHashTable2Buffer.slice(); subHashTableBuffers = new ByteBuffer[]{subHashTable1Buffer, subHashTable2Buffer}; + updateMaxTableBufferUsage(); } @Override @@ -515,6 +516,7 @@ public void reset() subHashTableBuffers[0].put(i * bucketSizeWithHash, (byte) 0); } tableBuffer = subHashTableBuffers[0]; + updateMaxTableBufferUsage(); } @Override @@ -571,7 +573,19 @@ public void adjustTableWhenFull() size = numCopied; tableBuffer = newTableBuffer; + updateMaxTableBufferUsage(); growthCount++; } + + @Override + protected void updateMaxTableBufferUsage() + { + long currentBufferUsage = 0; + for (ByteBuffer buffer : subHashTableBuffers) { + currentBufferUsage += buffer.capacity(); + } + + maxTableBufferUsage = Math.max(maxTableBufferUsage, currentBufferUsage); + } } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java index fadcfa02c95d..c45fb4cd5250 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java @@ -68,7 +68,7 @@ public class SpillingGrouper implements Grouper "Not enough disk space to execute this query. Try raising druid.query.groupBy.maxOnDiskStorage." ); - private final Grouper grouper; + private final AbstractBufferHashGrouper grouper; private final KeySerde keySerde; private final LimitedTemporaryStorage temporaryStorage; private final ObjectMapper spillMapper; @@ -218,12 +218,23 @@ public void reset() @Override public void close() { - perQueryStats.dictionarySize(keySerde.getDictionarySize()); + perQueryStats.dictionarySize(getDictionarySizeEstimate()); + perQueryStats.mergeBufferTotalUsage(getMergeBufferUsage()); grouper.close(); keySerde.reset(); deleteFiles(); } + private long getMergeBufferUsage() + { + return grouper.getMergeBufferUsage(); + } + + private long getDictionarySizeEstimate() + { + return keySerde.getDictionarySize(); + } + /** * Returns a dictionary of string keys added to this grouper. Note that the dictionary of keySerde is spilled on * local storage whenever the inner grouper is spilled. If there are spilled dictionaries, this method loads them diff --git a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java index 0f07bd2894be..8b3aee188b02 100644 --- a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java +++ b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java @@ -74,10 +74,10 @@ public boolean doMonitor(ServiceEmitter emitter) if (statsContainer.getMergeBufferQueries() > 0) { emitter.emit(builder.setMetric("mergeBuffer/queries", statsContainer.getMergeBufferQueries())); - emitter.emit(builder.setMetric( - "mergeBuffer/acquisitionTimeNs", - statsContainer.getMergeBufferAcquisitionTimeNs() - )); + emitter.emit( + builder.setMetric("mergeBuffer/acquisitionTimeNs", statsContainer.getMergeBufferAcquisitionTimeNs()) + ); + emitter.emit(builder.setMetric("mergeBuffer/totalUsage", statsContainer.getMergeBufferTotalUsage())); } if (statsContainer.getSpilledQueries() > 0) { diff --git a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java index 5931fba677c1..3a1a294907c3 100644 --- a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java +++ b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java @@ -63,6 +63,7 @@ public synchronized AggregateStats getStatsSince() return new AggregateStats( 1L, 100L, + 200L, 2L, 200L, 300L @@ -70,7 +71,7 @@ public synchronized AggregateStats getStatsSince() } }; - mergeBufferPool = new DefaultBlockingPool(() -> ByteBuffer.allocate(1024), 5); + mergeBufferPool = new DefaultBlockingPool<>(() -> ByteBuffer.allocate(1024), 5); executorService = Executors.newSingleThreadExecutor(); } From c935ea6086f138402834aaaef0b4f218fa14cfa1 Mon Sep 17 00:00:00 2001 From: GWphua Date: Fri, 7 Nov 2025 18:39:18 +0800 Subject: [PATCH 02/32] Byte buffer tracking for underlying offset handlers --- .../druid/query/groupby/GroupByStatsProvider.java | 3 ++- .../groupby/epinephelinae/ByteBufferIntList.java | 9 +++++++++ .../epinephelinae/ByteBufferMinMaxOffsetHeap.java | 13 ++++++++++--- .../epinephelinae/LimitedBufferHashGrouper.java | 8 ++++++++ .../query/groupby/GroupByStatsProviderTest.java | 4 ++++ .../druid/server/metrics/GroupByStatsMonitor.java | 2 +- .../server/metrics/GroupByStatsMonitorTest.java | 3 ++- 7 files changed, 36 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java index 344569053121..e207b7df5168 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java @@ -27,7 +27,8 @@ import java.util.concurrent.atomic.AtomicLong; /** - * Metrics collector for groupBy queries like spilled bytes, merge buffer acquistion time, dictionary size. + * Metrics collector for groupBy queries like spilled bytes, merge buffer acquisition time, merge buffer memory usage, + * and dictionary footprint. */ @LazySingleton public class GroupByStatsProvider diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java index 28de255c13a0..d474dcbc7aa4 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java @@ -30,6 +30,8 @@ public class ByteBufferIntList private final int maxElements; private int numElements; + private int maxMergeBufferUsageBytes; + public ByteBufferIntList( ByteBuffer buffer, int maxElements @@ -38,6 +40,7 @@ public ByteBufferIntList( this.buffer = buffer; this.maxElements = maxElements; this.numElements = 0; + this.maxMergeBufferUsageBytes = 0; if (buffer.capacity() < (maxElements * Integer.BYTES)) { throw new IAE( @@ -55,6 +58,7 @@ public void add(int val) } buffer.putInt(numElements * Integer.BYTES, val); numElements++; + maxMergeBufferUsageBytes = Math.max(maxMergeBufferUsageBytes, numElements * Integer.BYTES); } public void set(int index, int val) @@ -71,4 +75,9 @@ public void reset() { numElements = 0; } + + public int getMaxMergeBufferUsageBytes() + { + return maxMergeBufferUsageBytes; + } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferMinMaxOffsetHeap.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferMinMaxOffsetHeap.java index cfa7295e6b43..d4585b34b41a 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferMinMaxOffsetHeap.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferMinMaxOffsetHeap.java @@ -44,6 +44,7 @@ public class ByteBufferMinMaxOffsetHeap private int heapSize; private int maxHeapSize; + private int maxMergeBufferUsageBytes; public ByteBufferMinMaxOffsetHeap( ByteBuffer buf, @@ -55,6 +56,7 @@ public ByteBufferMinMaxOffsetHeap( this.buf = buf; this.limit = limit; this.heapSize = 0; + this.maxMergeBufferUsageBytes = 0; this.minComparator = minComparator; this.maxComparator = Ordering.from(minComparator).reverse(); this.heapIndexUpdater = heapIndexUpdater; @@ -71,9 +73,9 @@ public int addOffset(int offset) int pos = heapSize; buf.putInt(pos * Integer.BYTES, offset); heapSize++; - if (heapSize > maxHeapSize) { - maxHeapSize = heapSize; - } + + maxHeapSize = Math.max(maxHeapSize, heapSize); + maxMergeBufferUsageBytes = Math.max(maxMergeBufferUsageBytes, maxHeapSize * Integer.BYTES); if (heapIndexUpdater != null) { heapIndexUpdater.updateHeapIndexForOffset(offset, pos); @@ -226,6 +228,11 @@ public int getHeapSize() return heapSize; } + public int getMaxMergeBufferUsageBytes() + { + return maxMergeBufferUsageBytes; + } + private void bubbleUp(int pos) { if (isEvenLevel(pos)) { diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java index 9c0ec312475d..57e29bfd6bd5 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java @@ -458,6 +458,14 @@ public boolean validateBufferCapacity(int bufferCapacity) } } + @Override + public long getMergeBufferUsage() + { + long hashTableUsage = super.getMergeBufferUsage(); + long offSetHeapUsage = offsetHeap.getMaxMergeBufferUsageBytes(); + return hashTableUsage + offSetHeapUsage; + } + private class AlternatingByteBufferHashTable extends ByteBufferHashTable { // The base buffer is split into two alternating halves, with one sub-buffer in use at a given time. diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java index 565a5ab97bc3..592506eee020 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java @@ -35,6 +35,7 @@ public void testMetricCollection() stats1.mergeBufferAcquisitionTime(300); stats1.mergeBufferAcquisitionTime(400); + stats1.mergeBufferTotalUsage(50); stats1.spilledBytes(200); stats1.spilledBytes(400); stats1.dictionarySize(100); @@ -45,6 +46,7 @@ public void testMetricCollection() stats2.mergeBufferAcquisitionTime(500); stats2.mergeBufferAcquisitionTime(600); + stats1.mergeBufferTotalUsage(100); stats2.spilledBytes(400); stats2.spilledBytes(600); stats2.dictionarySize(300); @@ -53,6 +55,7 @@ public void testMetricCollection() GroupByStatsProvider.AggregateStats aggregateStats = statsProvider.getStatsSince(); Assert.assertEquals(0L, aggregateStats.getMergeBufferQueries()); Assert.assertEquals(0L, aggregateStats.getMergeBufferAcquisitionTimeNs()); + Assert.assertEquals(0L, aggregateStats.getMergeBufferTotalUsage()); Assert.assertEquals(0L, aggregateStats.getSpilledQueries()); Assert.assertEquals(0L, aggregateStats.getSpilledBytes()); Assert.assertEquals(0L, aggregateStats.getMergeDictionarySize()); @@ -63,6 +66,7 @@ public void testMetricCollection() aggregateStats = statsProvider.getStatsSince(); Assert.assertEquals(2, aggregateStats.getMergeBufferQueries()); Assert.assertEquals(1800L, aggregateStats.getMergeBufferAcquisitionTimeNs()); + Assert.assertEquals(150L, aggregateStats.getMergeBufferTotalUsage()); Assert.assertEquals(2L, aggregateStats.getSpilledQueries()); Assert.assertEquals(1600L, aggregateStats.getSpilledBytes()); Assert.assertEquals(1000L, aggregateStats.getMergeDictionarySize()); diff --git a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java index 8b3aee188b02..e56af6723f3f 100644 --- a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java +++ b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java @@ -77,7 +77,7 @@ public boolean doMonitor(ServiceEmitter emitter) emitter.emit( builder.setMetric("mergeBuffer/acquisitionTimeNs", statsContainer.getMergeBufferAcquisitionTimeNs()) ); - emitter.emit(builder.setMetric("mergeBuffer/totalUsage", statsContainer.getMergeBufferTotalUsage())); + emitter.emit(builder.setMetric("mergeBuffer/totalBytesUsed", statsContainer.getMergeBufferTotalUsage())); } if (statsContainer.getSpilledQueries() > 0) { diff --git a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java index 3a1a294907c3..0ea2843a6c25 100644 --- a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java +++ b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java @@ -92,11 +92,12 @@ public void testMonitor() // Trigger metric emission monitor.doMonitor(emitter); - Assert.assertEquals(7, emitter.getNumEmittedEvents()); + Assert.assertEquals(8, emitter.getNumEmittedEvents()); emitter.verifyValue("mergeBuffer/pendingRequests", 0L); emitter.verifyValue("mergeBuffer/used", 0L); emitter.verifyValue("mergeBuffer/queries", 1L); emitter.verifyValue("mergeBuffer/acquisitionTimeNs", 100L); + emitter.verifyValue("mergeBuffer/totalBytesUsed", 400L); emitter.verifyValue("groupBy/spilledQueries", 2L); emitter.verifyValue("groupBy/spilledBytes", 200L); emitter.verifyValue("groupBy/mergeDictionarySize", 300L); From c7819104fb6e04e70a6c745725e217119a07e29e Mon Sep 17 00:00:00 2001 From: GWphua Date: Fri, 7 Nov 2025 18:40:37 +0800 Subject: [PATCH 03/32] Fix tests --- .../apache/druid/server/metrics/GroupByStatsMonitorTest.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java index 0ea2843a6c25..67025f7887d5 100644 --- a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java +++ b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java @@ -97,7 +97,7 @@ public void testMonitor() emitter.verifyValue("mergeBuffer/used", 0L); emitter.verifyValue("mergeBuffer/queries", 1L); emitter.verifyValue("mergeBuffer/acquisitionTimeNs", 100L); - emitter.verifyValue("mergeBuffer/totalBytesUsed", 400L); + emitter.verifyValue("mergeBuffer/totalBytesUsed", 200L); emitter.verifyValue("groupBy/spilledQueries", 2L); emitter.verifyValue("groupBy/spilledBytes", 200L); emitter.verifyValue("groupBy/mergeDictionarySize", 300L); @@ -135,11 +135,12 @@ public void testMonitorWithServiceDimensions() final Map dimFilters = Map.of( "taskId", List.of(taskId), "dataSource", List.of(dataSource), "id", List.of(taskId) ); - Assert.assertEquals(7, emitter.getNumEmittedEvents()); + Assert.assertEquals(8, emitter.getNumEmittedEvents()); emitter.verifyValue("mergeBuffer/pendingRequests", dimFilters, 0L); emitter.verifyValue("mergeBuffer/used", dimFilters, 0L); emitter.verifyValue("mergeBuffer/queries", dimFilters, 1L); emitter.verifyValue("mergeBuffer/acquisitionTimeNs", dimFilters, 100L); + emitter.verifyValue("mergeBuffer/totalBytesUsed", 200L); emitter.verifyValue("groupBy/spilledQueries", dimFilters, 2L); emitter.verifyValue("groupBy/spilledBytes", dimFilters, 200L); emitter.verifyValue("groupBy/mergeDictionarySize", dimFilters, 300L); From 7063d09e9641eeed28f3ccc86f1de578dd69b6a1 Mon Sep 17 00:00:00 2001 From: GWphua Date: Mon, 10 Nov 2025 11:46:25 +0800 Subject: [PATCH 04/32] Fix quidem tests --- .../druid/query/groupby/epinephelinae/SpillingGrouper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java index c45fb4cd5250..57e678de4480 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java @@ -227,7 +227,7 @@ public void close() private long getMergeBufferUsage() { - return grouper.getMergeBufferUsage(); + return grouper.isInitialized() ? grouper.getMergeBufferUsage() : 0L; } private long getDictionarySizeEstimate() From 19f6bc3e7e2bd72d12c56b84fa23a5e4e42897b1 Mon Sep 17 00:00:00 2001 From: GWphua Date: Mon, 10 Nov 2025 14:12:31 +0800 Subject: [PATCH 05/32] Documentation --- docs/operations/metrics.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index d16246ab6d4f..043ee868dbf0 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -89,6 +89,7 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/totalBytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| @@ -113,6 +114,7 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/totalBytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| @@ -140,6 +142,7 @@ to represent the task ID are deprecated and will be removed in a future release. |`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| +|`mergeBuffer/totalBytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| From 0fcb6a00641014435965f331a7857004a4768a19 Mon Sep 17 00:00:00 2001 From: GWphua Date: Tue, 11 Nov 2025 17:07:47 +0800 Subject: [PATCH 06/32] bytesUsed naming --- docs/operations/metrics.md | 6 +++--- .../apache/druid/server/metrics/GroupByStatsMonitor.java | 2 +- .../druid/server/metrics/GroupByStatsMonitorTest.java | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index 043ee868dbf0..a747334d4cd2 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -89,7 +89,7 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| -|`mergeBuffer/totalBytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| @@ -114,7 +114,7 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| -|`mergeBuffer/totalBytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| @@ -142,7 +142,7 @@ to represent the task ID are deprecated and will be removed in a future release. |`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| -|`mergeBuffer/totalBytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| +|`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| diff --git a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java index e56af6723f3f..ca7dc4b16c5b 100644 --- a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java +++ b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java @@ -77,7 +77,7 @@ public boolean doMonitor(ServiceEmitter emitter) emitter.emit( builder.setMetric("mergeBuffer/acquisitionTimeNs", statsContainer.getMergeBufferAcquisitionTimeNs()) ); - emitter.emit(builder.setMetric("mergeBuffer/totalBytesUsed", statsContainer.getMergeBufferTotalUsage())); + emitter.emit(builder.setMetric("mergeBuffer/bytesUsed", statsContainer.getMergeBufferTotalUsage())); } if (statsContainer.getSpilledQueries() > 0) { diff --git a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java index 67025f7887d5..606f6e981549 100644 --- a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java +++ b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java @@ -97,7 +97,7 @@ public void testMonitor() emitter.verifyValue("mergeBuffer/used", 0L); emitter.verifyValue("mergeBuffer/queries", 1L); emitter.verifyValue("mergeBuffer/acquisitionTimeNs", 100L); - emitter.verifyValue("mergeBuffer/totalBytesUsed", 200L); + emitter.verifyValue("mergeBuffer/bytesUsed", 200L); emitter.verifyValue("groupBy/spilledQueries", 2L); emitter.verifyValue("groupBy/spilledBytes", 200L); emitter.verifyValue("groupBy/mergeDictionarySize", 300L); @@ -140,7 +140,7 @@ public void testMonitorWithServiceDimensions() emitter.verifyValue("mergeBuffer/used", dimFilters, 0L); emitter.verifyValue("mergeBuffer/queries", dimFilters, 1L); emitter.verifyValue("mergeBuffer/acquisitionTimeNs", dimFilters, 100L); - emitter.verifyValue("mergeBuffer/totalBytesUsed", 200L); + emitter.verifyValue("mergeBuffer/bytesUsed", 200L); emitter.verifyValue("groupBy/spilledQueries", dimFilters, 2L); emitter.verifyValue("groupBy/spilledBytes", dimFilters, 200L); emitter.verifyValue("groupBy/mergeDictionarySize", dimFilters, 300L); From 25f10d2538d1ae0ab0a79e7ae0f410ca3740bd0f Mon Sep 17 00:00:00 2001 From: GWphua Date: Mon, 24 Nov 2025 17:37:42 +0800 Subject: [PATCH 07/32] Add max metrics --- docs/operations/metrics.md | 12 +++++ .../query/groupby/GroupByStatsProvider.java | 54 ++++++++++++++++++- .../server/metrics/GroupByStatsMonitor.java | 8 +++ .../metrics/GroupByStatsMonitorTest.java | 18 +++++-- 4 files changed, 87 insertions(+), 5 deletions(-) diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index a747334d4cd2..2fae933b1dab 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -90,9 +90,13 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/maxAcquisitionTimeNs`|Maximum time in nanoseconds to acquire merge buffer for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/maxBytesUsed`|Maximum number of bytes used by merge buffers for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`groupBy/maxSpilledBytes`|Maximum number of bytes spilled to disk by any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`groupBy/maxMergeDictionarySize`|Maximum size of the on-heap merge dictionary in bytes observed for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| ### Historical @@ -115,9 +119,13 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/maxAcquisitionTimeNs`|Maximum time in nanoseconds to acquire merge buffer for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/maxBytesUsed`|Maximum number of bytes used by merge buffers for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`groupBy/maxSpilledBytes`|Maximum number of bytes spilled to disk by any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`groupBy/maxMergeDictionarySize`|Maximum size of the on-heap merge dictionary in bytes observed for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| ### Real-time @@ -143,9 +151,13 @@ to represent the task ID are deprecated and will be removed in a future release. |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| +|`mergeBuffer/maxAcquisitionTimeNs`|Maximum time in nanoseconds to acquire merge buffer for any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| +|`mergeBuffer/maxBytesUsed`|Maximum number of bytes used by merge buffers for any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| +|`groupBy/maxSpilledBytes`|Maximum number of bytes spilled to disk by any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| +|`groupBy/maxMergeDictionarySize`|Maximum size of the on-heap merge dictionary in bytes observed for any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| ### Jetty diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java index e207b7df5168..ae1afd721274 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java @@ -71,9 +71,13 @@ public static class AggregateStats private long mergeBufferQueries = 0; private long mergeBufferAcquisitionTimeNs = 0; private long mergeBufferTotalUsage = 0; + private long maxMergeBufferAcquisitionTimeNs = 0; + private long maxMergeBufferUsage = 0; private long spilledQueries = 0; private long spilledBytes = 0; + private long maxSpilledBytes = 0; private long mergeDictionarySize = 0; + private long maxMergeDictionarySize = 0; public AggregateStats() { @@ -85,9 +89,13 @@ public AggregateStats(AggregateStats aggregateStats) aggregateStats.mergeBufferQueries, aggregateStats.mergeBufferAcquisitionTimeNs, aggregateStats.mergeBufferTotalUsage, + aggregateStats.maxMergeBufferAcquisitionTimeNs, + aggregateStats.maxMergeBufferUsage, aggregateStats.spilledQueries, aggregateStats.spilledBytes, - aggregateStats.mergeDictionarySize + aggregateStats.maxSpilledBytes, + aggregateStats.mergeDictionarySize, + aggregateStats.maxMergeDictionarySize ); } @@ -95,17 +103,25 @@ public AggregateStats( long mergeBufferQueries, long mergeBufferAcquisitionTimeNs, long mergeBufferTotalUsage, + long maxMergeBufferAcquisitionTimeNs, + long maxMergeBufferUsage, long spilledQueries, long spilledBytes, - long mergeDictionarySize + long maxSpilledBytes, + long mergeDictionarySize, + long maxMergeDictionarySize ) { this.mergeBufferQueries = mergeBufferQueries; this.mergeBufferAcquisitionTimeNs = mergeBufferAcquisitionTimeNs; this.mergeBufferTotalUsage = mergeBufferTotalUsage; + this.maxMergeBufferAcquisitionTimeNs = maxMergeBufferAcquisitionTimeNs; + this.maxMergeBufferUsage = maxMergeBufferUsage; this.spilledQueries = spilledQueries; this.spilledBytes = spilledBytes; + this.maxSpilledBytes = maxSpilledBytes; this.mergeDictionarySize = mergeDictionarySize; + this.maxMergeDictionarySize = maxMergeDictionarySize; } public long getMergeBufferQueries() @@ -123,6 +139,16 @@ public long getMergeBufferTotalUsage() return mergeBufferTotalUsage; } + public long getMaxMergeBufferAcquisitionTimeNs() + { + return maxMergeBufferAcquisitionTimeNs; + } + + public long getMaxMergeBufferUsage() + { + return maxMergeBufferUsage; + } + public long getSpilledQueries() { return spilledQueries; @@ -133,25 +159,45 @@ public long getSpilledBytes() return spilledBytes; } + public long getMaxSpilledBytes() + { + return maxSpilledBytes; + } + public long getMergeDictionarySize() { return mergeDictionarySize; } + public long getMaxMergeDictionarySize() + { + return maxMergeDictionarySize; + } + public void addQueryStats(PerQueryStats perQueryStats) { if (perQueryStats.getMergeBufferAcquisitionTimeNs() > 0) { mergeBufferQueries++; mergeBufferAcquisitionTimeNs += perQueryStats.getMergeBufferAcquisitionTimeNs(); mergeBufferTotalUsage += perQueryStats.getMergeBufferTotalUsage(); + maxMergeBufferAcquisitionTimeNs = Math.max( + maxMergeBufferAcquisitionTimeNs, + perQueryStats.getMergeBufferAcquisitionTimeNs() + ); + maxMergeBufferUsage = Math.max( + maxMergeBufferUsage, + perQueryStats.getMergeBufferTotalUsage() + ); } if (perQueryStats.getSpilledBytes() > 0) { spilledQueries++; spilledBytes += perQueryStats.getSpilledBytes(); + maxSpilledBytes = Math.max(maxSpilledBytes, perQueryStats.getSpilledBytes()); } mergeDictionarySize += perQueryStats.getMergeDictionarySize(); + maxMergeDictionarySize = Math.max(maxMergeDictionarySize, perQueryStats.getMergeDictionarySize()); } public void reset() @@ -159,9 +205,13 @@ public void reset() this.mergeBufferQueries = 0; this.mergeBufferAcquisitionTimeNs = 0; this.mergeBufferTotalUsage = 0; + this.maxMergeBufferAcquisitionTimeNs = 0; + this.maxMergeBufferUsage = 0; this.spilledQueries = 0; this.spilledBytes = 0; + this.maxSpilledBytes = 0; this.mergeDictionarySize = 0; + this.maxMergeDictionarySize = 0; } } diff --git a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java index ca7dc4b16c5b..e9f8bbfc9fbe 100644 --- a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java +++ b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java @@ -78,15 +78,23 @@ public boolean doMonitor(ServiceEmitter emitter) builder.setMetric("mergeBuffer/acquisitionTimeNs", statsContainer.getMergeBufferAcquisitionTimeNs()) ); emitter.emit(builder.setMetric("mergeBuffer/bytesUsed", statsContainer.getMergeBufferTotalUsage())); + emitter.emit( + builder.setMetric("mergeBuffer/maxAcquisitionTimeNs", statsContainer.getMaxMergeBufferAcquisitionTimeNs()) + ); + emitter.emit(builder.setMetric("mergeBuffer/maxBytesUsed", statsContainer.getMaxMergeBufferUsage())); } if (statsContainer.getSpilledQueries() > 0) { emitter.emit(builder.setMetric("groupBy/spilledQueries", statsContainer.getSpilledQueries())); emitter.emit(builder.setMetric("groupBy/spilledBytes", statsContainer.getSpilledBytes())); + emitter.emit(builder.setMetric("groupBy/maxSpilledBytes", statsContainer.getMaxSpilledBytes())); } if (statsContainer.getMergeDictionarySize() > 0) { emitter.emit(builder.setMetric("groupBy/mergeDictionarySize", statsContainer.getMergeDictionarySize())); + emitter.emit( + builder.setMetric("groupBy/maxMergeDictionarySize", statsContainer.getMaxMergeDictionarySize()) + ); } return true; diff --git a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java index 606f6e981549..5f8a61f6d023 100644 --- a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java +++ b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java @@ -64,8 +64,12 @@ public synchronized AggregateStats getStatsSince() 1L, 100L, 200L, + 100L, + 200L, 2L, 200L, + 200L, + 300L, 300L ); } @@ -92,15 +96,19 @@ public void testMonitor() // Trigger metric emission monitor.doMonitor(emitter); - Assert.assertEquals(8, emitter.getNumEmittedEvents()); + Assert.assertEquals(12, emitter.getNumEmittedEvents()); emitter.verifyValue("mergeBuffer/pendingRequests", 0L); emitter.verifyValue("mergeBuffer/used", 0L); emitter.verifyValue("mergeBuffer/queries", 1L); emitter.verifyValue("mergeBuffer/acquisitionTimeNs", 100L); emitter.verifyValue("mergeBuffer/bytesUsed", 200L); + emitter.verifyValue("mergeBuffer/maxAcquisitionTimeNs", 100L); + emitter.verifyValue("mergeBuffer/maxBytesUsed", 200L); emitter.verifyValue("groupBy/spilledQueries", 2L); emitter.verifyValue("groupBy/spilledBytes", 200L); + emitter.verifyValue("groupBy/maxSpilledBytes", 200L); emitter.verifyValue("groupBy/mergeDictionarySize", 300L); + emitter.verifyValue("groupBy/maxMergeDictionarySize", 300L); } @Test @@ -135,15 +143,19 @@ public void testMonitorWithServiceDimensions() final Map dimFilters = Map.of( "taskId", List.of(taskId), "dataSource", List.of(dataSource), "id", List.of(taskId) ); - Assert.assertEquals(8, emitter.getNumEmittedEvents()); + Assert.assertEquals(12, emitter.getNumEmittedEvents()); emitter.verifyValue("mergeBuffer/pendingRequests", dimFilters, 0L); emitter.verifyValue("mergeBuffer/used", dimFilters, 0L); emitter.verifyValue("mergeBuffer/queries", dimFilters, 1L); emitter.verifyValue("mergeBuffer/acquisitionTimeNs", dimFilters, 100L); - emitter.verifyValue("mergeBuffer/bytesUsed", 200L); + emitter.verifyValue("mergeBuffer/bytesUsed", dimFilters, 200L); + emitter.verifyValue("mergeBuffer/maxAcquisitionTimeNs", dimFilters, 100L); + emitter.verifyValue("mergeBuffer/maxBytesUsed", dimFilters, 200L); emitter.verifyValue("groupBy/spilledQueries", dimFilters, 2L); emitter.verifyValue("groupBy/spilledBytes", dimFilters, 200L); + emitter.verifyValue("groupBy/maxSpilledBytes", dimFilters, 200L); emitter.verifyValue("groupBy/mergeDictionarySize", dimFilters, 300L); + emitter.verifyValue("groupBy/maxMergeDictionarySize", dimFilters, 300L); } From b6ad3c271a0f79c801033c4f792ca4fb59119287 Mon Sep 17 00:00:00 2001 From: GWphua Date: Mon, 24 Nov 2025 17:46:37 +0800 Subject: [PATCH 08/32] Add missing calculation in BufferHashGrouper --- .../groupby/epinephelinae/BufferHashGrouper.java | 13 ++++++++++++- .../epinephelinae/LimitedBufferHashGrouper.java | 4 ++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java index 4970ebe9e83e..caa99bde0f34 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java @@ -50,7 +50,6 @@ public class BufferHashGrouper extends AbstractBufferHashGrouper> iterator(boolean sorted) { diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java index 57e29bfd6bd5..f40bdb746ca6 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java @@ -461,6 +461,10 @@ public boolean validateBufferCapacity(int bufferCapacity) @Override public long getMergeBufferUsage() { + if (!initialized) { + return 0L; + } + long hashTableUsage = super.getMergeBufferUsage(); long offSetHeapUsage = offsetHeap.getMaxMergeBufferUsageBytes(); return hashTableUsage + offSetHeapUsage; From 28719eb74f23c536d1ba81a4dc2580f455650a32 Mon Sep 17 00:00:00 2001 From: GWphua Date: Mon, 24 Nov 2025 17:52:28 +0800 Subject: [PATCH 09/32] Checkstyle --- .../epinephelinae/BufferHashGrouper.java | 22 ++++++++----------- .../epinephelinae/ByteBufferIntList.java | 5 +---- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java index caa99bde0f34..6c236f846c32 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java @@ -26,7 +26,6 @@ import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; -import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.AbstractList; import java.util.Collections; @@ -210,18 +209,15 @@ public int size() } // Sort offsets in-place. - Collections.sort( - wrappedOffsets, - (lhs, rhs) -> { - final ByteBuffer tableBuffer = hashTable.getTableBuffer(); - return comparator.compare( - tableBuffer, - tableBuffer, - lhs + HASH_SIZE, - rhs + HASH_SIZE - ); - } - ); + wrappedOffsets.sort((lhs, rhs) -> { + final ByteBuffer tableBuffer = hashTable.getTableBuffer(); + return comparator.compare( + tableBuffer, + tableBuffer, + lhs + HASH_SIZE, + rhs + HASH_SIZE + ); + }); return new CloseableIterator<>() { diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java index d474dcbc7aa4..e02004994edd 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java @@ -32,10 +32,7 @@ public class ByteBufferIntList private int maxMergeBufferUsageBytes; - public ByteBufferIntList( - ByteBuffer buffer, - int maxElements - ) + public ByteBufferIntList(ByteBuffer buffer, int maxElements) { this.buffer = buffer; this.maxElements = maxElements; From 59fe03c083b0f5e92daac1b59cbb5a1a21313c7b Mon Sep 17 00:00:00 2001 From: GWphua Date: Mon, 24 Nov 2025 17:53:47 +0800 Subject: [PATCH 10/32] Checkstyle --- .../apache/druid/query/groupby/GroupByStatsProvider.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java index ae1afd721274..a0e258e83729 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java @@ -179,15 +179,12 @@ public void addQueryStats(PerQueryStats perQueryStats) if (perQueryStats.getMergeBufferAcquisitionTimeNs() > 0) { mergeBufferQueries++; mergeBufferAcquisitionTimeNs += perQueryStats.getMergeBufferAcquisitionTimeNs(); - mergeBufferTotalUsage += perQueryStats.getMergeBufferTotalUsage(); maxMergeBufferAcquisitionTimeNs = Math.max( maxMergeBufferAcquisitionTimeNs, perQueryStats.getMergeBufferAcquisitionTimeNs() ); - maxMergeBufferUsage = Math.max( - maxMergeBufferUsage, - perQueryStats.getMergeBufferTotalUsage() - ); + mergeBufferTotalUsage += perQueryStats.getMergeBufferTotalUsage(); + maxMergeBufferUsage = Math.max(maxMergeBufferUsage, perQueryStats.getMergeBufferTotalUsage()); } if (perQueryStats.getSpilledBytes() > 0) { From 507eecde778dd82b2775133daf51d6c6cf435ec8 Mon Sep 17 00:00:00 2001 From: GWphua Date: Wed, 31 Dec 2025 14:52:39 +0800 Subject: [PATCH 11/32] GroupByStatsProvider javadocs --- .../org/apache/druid/query/groupby/GroupByStatsProvider.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java index a0e258e83729..fabf76fbf62a 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java @@ -27,8 +27,8 @@ import java.util.concurrent.atomic.AtomicLong; /** - * Metrics collector for groupBy queries like spilled bytes, merge buffer acquisition time, merge buffer memory usage, - * and dictionary footprint. + * Collects groupBy query metrics (spilled bytes, merge buffer usage, dictionary size) per-query, then + * aggregates them when queries complete. Stats are retrieved and reset periodically via {@link #getStatsSince()}. */ @LazySingleton public class GroupByStatsProvider From 9623e3a056dce1a4a73611c197de185c4b7191db Mon Sep 17 00:00:00 2001 From: GWphua Date: Mon, 12 Jan 2026 10:08:24 +0800 Subject: [PATCH 12/32] Fix GroupByStatsProviderTest comments --- .../groupby/GroupByStatsProviderTest.java | 63 ++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java index 592506eee020..37416614c511 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java @@ -46,7 +46,7 @@ public void testMetricCollection() stats2.mergeBufferAcquisitionTime(500); stats2.mergeBufferAcquisitionTime(600); - stats1.mergeBufferTotalUsage(100); + stats2.mergeBufferTotalUsage(100); stats2.spilledBytes(400); stats2.spilledBytes(600); stats2.dictionarySize(300); @@ -56,9 +56,13 @@ public void testMetricCollection() Assert.assertEquals(0L, aggregateStats.getMergeBufferQueries()); Assert.assertEquals(0L, aggregateStats.getMergeBufferAcquisitionTimeNs()); Assert.assertEquals(0L, aggregateStats.getMergeBufferTotalUsage()); + Assert.assertEquals(0L, aggregateStats.getMaxMergeBufferAcquisitionTimeNs()); + Assert.assertEquals(0L, aggregateStats.getMaxMergeBufferUsage()); Assert.assertEquals(0L, aggregateStats.getSpilledQueries()); Assert.assertEquals(0L, aggregateStats.getSpilledBytes()); + Assert.assertEquals(0L, aggregateStats.getMaxSpilledBytes()); Assert.assertEquals(0L, aggregateStats.getMergeDictionarySize()); + Assert.assertEquals(0L, aggregateStats.getMaxMergeDictionarySize()); statsProvider.closeQuery(id1); statsProvider.closeQuery(id2); @@ -67,8 +71,65 @@ public void testMetricCollection() Assert.assertEquals(2, aggregateStats.getMergeBufferQueries()); Assert.assertEquals(1800L, aggregateStats.getMergeBufferAcquisitionTimeNs()); Assert.assertEquals(150L, aggregateStats.getMergeBufferTotalUsage()); + Assert.assertEquals(1100L, aggregateStats.getMaxMergeBufferAcquisitionTimeNs()); + Assert.assertEquals(100L, aggregateStats.getMaxMergeBufferUsage()); Assert.assertEquals(2L, aggregateStats.getSpilledQueries()); Assert.assertEquals(1600L, aggregateStats.getSpilledBytes()); + Assert.assertEquals(1000L, aggregateStats.getMaxSpilledBytes()); Assert.assertEquals(1000L, aggregateStats.getMergeDictionarySize()); + Assert.assertEquals(700L, aggregateStats.getMaxMergeDictionarySize()); + } + + @Test + public void testMaxMetricsWithVaryingMaxPerMetric() + { + GroupByStatsProvider statsProvider = new GroupByStatsProvider(); + + QueryResourceId r1 = new QueryResourceId("r1"); + GroupByStatsProvider.PerQueryStats stats1 = statsProvider.getPerQueryStatsContainer(r1); + stats1.mergeBufferAcquisitionTime(2000); + stats1.mergeBufferTotalUsage(50); + stats1.spilledBytes(100); + stats1.dictionarySize(200); + + QueryResourceId r2 = new QueryResourceId("r2"); + GroupByStatsProvider.PerQueryStats stats2 = statsProvider.getPerQueryStatsContainer(r2); + stats2.mergeBufferAcquisitionTime(100); + stats2.mergeBufferTotalUsage(500); + stats2.spilledBytes(150); + stats2.dictionarySize(250); + + QueryResourceId r3 = new QueryResourceId("r3"); + GroupByStatsProvider.PerQueryStats stats3 = statsProvider.getPerQueryStatsContainer(r3); + stats3.mergeBufferAcquisitionTime(200); + stats3.mergeBufferTotalUsage(100); + stats3.spilledBytes(3000); + stats3.dictionarySize(300); + + QueryResourceId r4 = new QueryResourceId("r4"); + GroupByStatsProvider.PerQueryStats stats4 = statsProvider.getPerQueryStatsContainer(r4); + stats4.mergeBufferAcquisitionTime(300); + stats4.mergeBufferTotalUsage(75); + stats4.spilledBytes(200); + stats4.dictionarySize(1500); + + statsProvider.closeQuery(r1); + statsProvider.closeQuery(r2); + statsProvider.closeQuery(r3); + statsProvider.closeQuery(r4); + + GroupByStatsProvider.AggregateStats aggregateStats = statsProvider.getStatsSince(); + + Assert.assertEquals(2000L, aggregateStats.getMaxMergeBufferAcquisitionTimeNs()); + Assert.assertEquals(500L, aggregateStats.getMaxMergeBufferUsage()); + Assert.assertEquals(3000L, aggregateStats.getMaxSpilledBytes()); + Assert.assertEquals(1500L, aggregateStats.getMaxMergeDictionarySize()); + + Assert.assertEquals(4L, aggregateStats.getMergeBufferQueries()); + Assert.assertEquals(2600L, aggregateStats.getMergeBufferAcquisitionTimeNs()); + Assert.assertEquals(725L, aggregateStats.getMergeBufferTotalUsage()); + Assert.assertEquals(4L, aggregateStats.getSpilledQueries()); + Assert.assertEquals(3450L, aggregateStats.getSpilledBytes()); + Assert.assertEquals(2250L, aggregateStats.getMergeDictionarySize()); } } From ae40900f7b5ac5b6f08f1986ae86f453a85be87c Mon Sep 17 00:00:00 2001 From: GWphua Date: Mon, 12 Jan 2026 10:11:01 +0800 Subject: [PATCH 13/32] Fix doc order for GroupByStatsProvider metrics --- docs/operations/metrics.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index fff29b4aad27..32a0f67d9cf2 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -89,8 +89,8 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| -|`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`mergeBuffer/maxAcquisitionTimeNs`|Maximum time in nanoseconds to acquire merge buffer for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`mergeBuffer/maxBytesUsed`|Maximum number of bytes used by merge buffers for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| @@ -118,8 +118,8 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| -|`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`mergeBuffer/maxAcquisitionTimeNs`|Maximum time in nanoseconds to acquire merge buffer for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`mergeBuffer/maxBytesUsed`|Maximum number of bytes used by merge buffers for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| @@ -150,8 +150,8 @@ to represent the task ID are deprecated and will be removed in a future release. |`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| -|`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`mergeBuffer/maxAcquisitionTimeNs`|Maximum time in nanoseconds to acquire merge buffer for any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| +|`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`mergeBuffer/maxBytesUsed`|Maximum number of bytes used by merge buffers for any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| From 400d0f4f76891442f511a1443473cca4180c6051 Mon Sep 17 00:00:00 2001 From: GWphua Date: Mon, 12 Jan 2026 10:19:59 +0800 Subject: [PATCH 14/32] Fix test for GroupByStatsMonitorTest --- .../metrics/GroupByStatsMonitorTest.java | 52 ++++++++++++++++++- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java index b9bef646c0c5..fd38c669d376 100644 --- a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java +++ b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java @@ -25,6 +25,7 @@ import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; import org.apache.druid.java.util.metrics.StubServiceEmitter; import org.apache.druid.query.DruidMetrics; +import org.apache.druid.query.QueryResourceId; import org.apache.druid.query.groupby.GroupByStatsProvider; import org.junit.After; import org.junit.Assert; @@ -119,7 +120,6 @@ public void testMonitorWithServiceDimensions() emitter.start(); monitor.doMonitor(emitter); emitter.flush(); - // Trigger metric emission monitor.doMonitor(emitter); final Map dimFilters = Map.of( @@ -144,7 +144,6 @@ public void testMonitorWithServiceDimensions() verifyMetricValue(emitter, "groupBy/maxMergeDictionarySize", dimFilters, 300L); } - @Test public void testMonitoringMergeBuffer_acquiredCount() throws ExecutionException, InterruptedException, TimeoutException @@ -195,6 +194,55 @@ public void testMonitoringMergeBuffer_pendingRequests() } } + @Test + public void testMonitoringWithoutMockingGroupByStatsProvider() + { + GroupByStatsProvider statsProvider = new GroupByStatsProvider(); + + QueryResourceId r1 = new QueryResourceId("r1"); + GroupByStatsProvider.PerQueryStats stats1 = statsProvider.getPerQueryStatsContainer(r1); + stats1.mergeBufferAcquisitionTime(100); + stats1.mergeBufferTotalUsage(50); + stats1.spilledBytes(200); + stats1.dictionarySize(100); + + QueryResourceId r2 = new QueryResourceId("r2"); + GroupByStatsProvider.PerQueryStats stats2 = statsProvider.getPerQueryStatsContainer(r2); + stats2.mergeBufferAcquisitionTime(500); + stats2.mergeBufferTotalUsage(30); + stats2.spilledBytes(100); + stats2.dictionarySize(300); + + QueryResourceId r3 = new QueryResourceId("r3"); + GroupByStatsProvider.PerQueryStats stats3 = statsProvider.getPerQueryStatsContainer(r3); + stats3.mergeBufferAcquisitionTime(200); + stats3.mergeBufferTotalUsage(150); + stats3.spilledBytes(800); + stats3.dictionarySize(200); + + // Close all queries to aggregate stats (mimics GroupByMergingQueryRunner behavior) + statsProvider.closeQuery(r1); + statsProvider.closeQuery(r2); + statsProvider.closeQuery(r3); + + final GroupByStatsMonitor monitor = new GroupByStatsMonitor(statsProvider, mergeBufferPool); + final StubServiceEmitter emitter = new StubServiceEmitter("service", "host"); + emitter.start(); + monitor.doMonitor(emitter); + + emitter.verifyValue("mergeBuffer/queries", 3L); + emitter.verifyValue("mergeBuffer/acquisitionTimeNs", 800L); + emitter.verifyValue("mergeBuffer/bytesUsed", 230L); + emitter.verifyValue("groupBy/spilledQueries", 3L); + emitter.verifyValue("groupBy/spilledBytes", 1100L); + emitter.verifyValue("groupBy/mergeDictionarySize", 600L); + + emitter.verifyValue("mergeBuffer/maxAcquisitionTimeNs", 500L); + emitter.verifyValue("mergeBuffer/maxBytesUsed", 150L); + emitter.verifyValue("groupBy/maxSpilledBytes", 800L); + emitter.verifyValue("groupBy/maxMergeDictionarySize", 300L); + } + private void verifyMetricValue(StubServiceEmitter emitter, String metricName, Map dimFilters, Number expectedValue) { final List observedMetricEvents = emitter.getMetricEvents(metricName); From 8f7b2180b4cc17b95733baeeced8ee7b1ad5a5d2 Mon Sep 17 00:00:00 2001 From: Virushade Date: Wed, 14 Jan 2026 15:52:21 +0800 Subject: [PATCH 15/32] Update server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java Co-authored-by: Abhishek Radhakrishnan --- .../apache/druid/server/metrics/GroupByStatsMonitorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java index fd38c669d376..7e7c04657189 100644 --- a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java +++ b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java @@ -195,7 +195,7 @@ public void testMonitoringMergeBuffer_pendingRequests() } @Test - public void testMonitoringWithoutMockingGroupByStatsProvider() + public void testMonitoringWithMultipleResources() { GroupByStatsProvider statsProvider = new GroupByStatsProvider(); From df3bf70409f59d94684e5fc5d2a01b1857fd7d10 Mon Sep 17 00:00:00 2001 From: GWphua Date: Wed, 14 Jan 2026 16:01:57 +0800 Subject: [PATCH 16/32] Revert stylistic changes in BufferHashGrouper --- .../epinephelinae/BufferHashGrouper.java | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java index 6c236f846c32..85f2dc861cb7 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java @@ -209,15 +209,18 @@ public int size() } // Sort offsets in-place. - wrappedOffsets.sort((lhs, rhs) -> { - final ByteBuffer tableBuffer = hashTable.getTableBuffer(); - return comparator.compare( - tableBuffer, - tableBuffer, - lhs + HASH_SIZE, - rhs + HASH_SIZE - ); - }); + Collections.sort( + wrappedOffsets, + (lhs, rhs) -> { + final ByteBuffer tableBuffer = hashTable.getTableBuffer(); + return comparator.compare( + tableBuffer, + tableBuffer, + lhs + HASH_SIZE, + rhs + HASH_SIZE + ); + } + ); return new CloseableIterator<>() { From ac71a63daa454b934b669292d3e8d83b0daf02f4 Mon Sep 17 00:00:00 2001 From: GWphua Date: Wed, 14 Jan 2026 16:26:28 +0800 Subject: [PATCH 17/32] Rename mergeBufferUsage to mergeBufferUsedBytes --- .../query/groupby/GroupByStatsProvider.java | 46 +++++++++---------- .../AbstractBufferHashGrouper.java | 4 +- .../epinephelinae/BufferHashGrouper.java | 6 +-- .../epinephelinae/ByteBufferHashTable.java | 16 +++---- .../epinephelinae/ByteBufferIntList.java | 10 ++-- .../ByteBufferMinMaxOffsetHeap.java | 10 ++-- .../LimitedBufferHashGrouper.java | 20 ++++---- .../epinephelinae/SpillingGrouper.java | 6 +-- .../groupby/GroupByStatsProviderTest.java | 24 +++++----- .../server/metrics/GroupByStatsMonitor.java | 4 +- .../metrics/GroupByStatsMonitorTest.java | 6 +-- 11 files changed, 76 insertions(+), 76 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java index fabf76fbf62a..5b9658df6d0f 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java @@ -70,9 +70,9 @@ public static class AggregateStats { private long mergeBufferQueries = 0; private long mergeBufferAcquisitionTimeNs = 0; - private long mergeBufferTotalUsage = 0; + private long totalMergeBufferUsedBytes = 0; private long maxMergeBufferAcquisitionTimeNs = 0; - private long maxMergeBufferUsage = 0; + private long maxMergeBufferUsedBytes = 0; private long spilledQueries = 0; private long spilledBytes = 0; private long maxSpilledBytes = 0; @@ -88,9 +88,9 @@ public AggregateStats(AggregateStats aggregateStats) this( aggregateStats.mergeBufferQueries, aggregateStats.mergeBufferAcquisitionTimeNs, - aggregateStats.mergeBufferTotalUsage, + aggregateStats.totalMergeBufferUsedBytes, aggregateStats.maxMergeBufferAcquisitionTimeNs, - aggregateStats.maxMergeBufferUsage, + aggregateStats.maxMergeBufferUsedBytes, aggregateStats.spilledQueries, aggregateStats.spilledBytes, aggregateStats.maxSpilledBytes, @@ -102,9 +102,9 @@ public AggregateStats(AggregateStats aggregateStats) public AggregateStats( long mergeBufferQueries, long mergeBufferAcquisitionTimeNs, - long mergeBufferTotalUsage, + long totalMergeBufferUsedBytes, long maxMergeBufferAcquisitionTimeNs, - long maxMergeBufferUsage, + long maxMergeBufferUsedBytes, long spilledQueries, long spilledBytes, long maxSpilledBytes, @@ -114,9 +114,9 @@ public AggregateStats( { this.mergeBufferQueries = mergeBufferQueries; this.mergeBufferAcquisitionTimeNs = mergeBufferAcquisitionTimeNs; - this.mergeBufferTotalUsage = mergeBufferTotalUsage; + this.totalMergeBufferUsedBytes = totalMergeBufferUsedBytes; this.maxMergeBufferAcquisitionTimeNs = maxMergeBufferAcquisitionTimeNs; - this.maxMergeBufferUsage = maxMergeBufferUsage; + this.maxMergeBufferUsedBytes = maxMergeBufferUsedBytes; this.spilledQueries = spilledQueries; this.spilledBytes = spilledBytes; this.maxSpilledBytes = maxSpilledBytes; @@ -134,19 +134,19 @@ public long getMergeBufferAcquisitionTimeNs() return mergeBufferAcquisitionTimeNs; } - public long getMergeBufferTotalUsage() + public long getMaxMergeBufferAcquisitionTimeNs() { - return mergeBufferTotalUsage; + return maxMergeBufferAcquisitionTimeNs; } - public long getMaxMergeBufferAcquisitionTimeNs() + public long getTotalMergeBufferUsedBytes() { - return maxMergeBufferAcquisitionTimeNs; + return totalMergeBufferUsedBytes; } - public long getMaxMergeBufferUsage() + public long getMaxMergeBufferUsedBytes() { - return maxMergeBufferUsage; + return maxMergeBufferUsedBytes; } public long getSpilledQueries() @@ -183,8 +183,8 @@ public void addQueryStats(PerQueryStats perQueryStats) maxMergeBufferAcquisitionTimeNs, perQueryStats.getMergeBufferAcquisitionTimeNs() ); - mergeBufferTotalUsage += perQueryStats.getMergeBufferTotalUsage(); - maxMergeBufferUsage = Math.max(maxMergeBufferUsage, perQueryStats.getMergeBufferTotalUsage()); + totalMergeBufferUsedBytes += perQueryStats.getMergeBufferTotalUsedBytes(); + maxMergeBufferUsedBytes = Math.max(maxMergeBufferUsedBytes, perQueryStats.getMergeBufferTotalUsedBytes()); } if (perQueryStats.getSpilledBytes() > 0) { @@ -201,9 +201,9 @@ public void reset() { this.mergeBufferQueries = 0; this.mergeBufferAcquisitionTimeNs = 0; - this.mergeBufferTotalUsage = 0; this.maxMergeBufferAcquisitionTimeNs = 0; - this.maxMergeBufferUsage = 0; + this.totalMergeBufferUsedBytes = 0; + this.maxMergeBufferUsedBytes = 0; this.spilledQueries = 0; this.spilledBytes = 0; this.maxSpilledBytes = 0; @@ -215,7 +215,7 @@ public void reset() public static class PerQueryStats { private final AtomicLong mergeBufferAcquisitionTimeNs = new AtomicLong(0); - private final AtomicLong mergeBufferTotalUsage = new AtomicLong(0); + private final AtomicLong mergeBufferTotalUsedBytes = new AtomicLong(0); private final AtomicLong spilledBytes = new AtomicLong(0); private final AtomicLong mergeDictionarySize = new AtomicLong(0); @@ -224,9 +224,9 @@ public void mergeBufferAcquisitionTime(long delay) mergeBufferAcquisitionTimeNs.addAndGet(delay); } - public void mergeBufferTotalUsage(long bytes) + public void mergeBufferTotalUsedBytes(long bytes) { - mergeBufferTotalUsage.addAndGet(bytes); + mergeBufferTotalUsedBytes.addAndGet(bytes); } public void spilledBytes(long bytes) @@ -244,9 +244,9 @@ public long getMergeBufferAcquisitionTimeNs() return mergeBufferAcquisitionTimeNs.get(); } - public long getMergeBufferTotalUsage() + public long getMergeBufferTotalUsedBytes() { - return mergeBufferTotalUsage.get(); + return mergeBufferTotalUsedBytes.get(); } public long getSpilledBytes() diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java index e07409430b3b..2326e67d55b0 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java @@ -177,9 +177,9 @@ public void close() * This method is implemented to return the highest memory value claimed by the Grouper. This is only * used for monitoring the size of the merge buffers used. */ - public long getMergeBufferUsage() + public long getMergeBufferUsedBytes() { - return hashTable.getMaxTableBufferUsage(); + return hashTable.getMaxTableBufferUsedBytes(); } /** diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java index 85f2dc861cb7..2f4e8c9e6c8b 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java @@ -153,14 +153,14 @@ public void reset() } @Override - public long getMergeBufferUsage() + public long getMergeBufferUsedBytes() { if (!initialized) { return 0L; } - long hashTableUsage = hashTable.getMaxTableBufferUsage(); - long offSetListUsage = offsetList.getMaxMergeBufferUsageBytes(); + long hashTableUsage = hashTable.getMaxTableBufferUsedBytes(); + long offSetListUsage = offsetList.getMaxMergeBufferUsedBytes(); return hashTableUsage + offSetListUsage; } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java index 0b93b5f5a5a9..05465a6db79f 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java @@ -80,7 +80,7 @@ public static int calculateTableArenaSizeWithFixedAdditionalSize( protected BucketUpdateHandler bucketUpdateHandler; // Keeps track on how many bytes is being used in the merge buffer. - protected long maxTableBufferUsage; + protected long maxTableBufferUsedBytes; public ByteBufferHashTable( float maxLoadFactor, @@ -100,7 +100,7 @@ public ByteBufferHashTable( this.maxSizeForTesting = maxSizeForTesting; this.tableArenaSize = buffer.capacity(); this.bucketUpdateHandler = bucketUpdateHandler; - this.maxTableBufferUsage = 0; + this.maxTableBufferUsedBytes = 0; } public void reset() @@ -143,7 +143,7 @@ public void reset() bufferDup.position(tableStart); bufferDup.limit(tableStart + maxBuckets * bucketSizeWithHash); tableBuffer = bufferDup.slice(); - updateMaxTableBufferUsage(); + updateMaxTableBufferUsedBytes(); // Clear used bits of new table for (int i = 0; i < maxBuckets; i++) { @@ -230,7 +230,7 @@ public void adjustTableWhenFull() maxBuckets = newBuckets; regrowthThreshold = newMaxSize; tableBuffer = newTableBuffer; - updateMaxTableBufferUsage(); + updateMaxTableBufferUsedBytes(); tableStart = newTableStart; growthCount++; @@ -387,14 +387,14 @@ public int getGrowthCount() return growthCount; } - protected void updateMaxTableBufferUsage() + protected void updateMaxTableBufferUsedBytes() { - maxTableBufferUsage = Math.max(maxTableBufferUsage, tableBuffer.capacity()); + maxTableBufferUsedBytes = Math.max(maxTableBufferUsedBytes, tableBuffer.capacity()); } - public long getMaxTableBufferUsage() + public long getMaxTableBufferUsedBytes() { - return maxTableBufferUsage; + return maxTableBufferUsedBytes; } public interface BucketUpdateHandler diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java index e02004994edd..b6b15f1e5774 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java @@ -30,14 +30,14 @@ public class ByteBufferIntList private final int maxElements; private int numElements; - private int maxMergeBufferUsageBytes; + private int maxMergeBufferUsedBytes; public ByteBufferIntList(ByteBuffer buffer, int maxElements) { this.buffer = buffer; this.maxElements = maxElements; this.numElements = 0; - this.maxMergeBufferUsageBytes = 0; + this.maxMergeBufferUsedBytes = 0; if (buffer.capacity() < (maxElements * Integer.BYTES)) { throw new IAE( @@ -55,7 +55,7 @@ public void add(int val) } buffer.putInt(numElements * Integer.BYTES, val); numElements++; - maxMergeBufferUsageBytes = Math.max(maxMergeBufferUsageBytes, numElements * Integer.BYTES); + maxMergeBufferUsedBytes = Math.max(maxMergeBufferUsedBytes, numElements * Integer.BYTES); } public void set(int index, int val) @@ -73,8 +73,8 @@ public void reset() numElements = 0; } - public int getMaxMergeBufferUsageBytes() + public int getMaxMergeBufferUsedBytes() { - return maxMergeBufferUsageBytes; + return maxMergeBufferUsedBytes; } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferMinMaxOffsetHeap.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferMinMaxOffsetHeap.java index d4585b34b41a..ff2746bca29c 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferMinMaxOffsetHeap.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferMinMaxOffsetHeap.java @@ -44,7 +44,7 @@ public class ByteBufferMinMaxOffsetHeap private int heapSize; private int maxHeapSize; - private int maxMergeBufferUsageBytes; + private int maxMergeBufferUsedBytes; public ByteBufferMinMaxOffsetHeap( ByteBuffer buf, @@ -56,7 +56,7 @@ public ByteBufferMinMaxOffsetHeap( this.buf = buf; this.limit = limit; this.heapSize = 0; - this.maxMergeBufferUsageBytes = 0; + this.maxMergeBufferUsedBytes = 0; this.minComparator = minComparator; this.maxComparator = Ordering.from(minComparator).reverse(); this.heapIndexUpdater = heapIndexUpdater; @@ -75,7 +75,7 @@ public int addOffset(int offset) heapSize++; maxHeapSize = Math.max(maxHeapSize, heapSize); - maxMergeBufferUsageBytes = Math.max(maxMergeBufferUsageBytes, maxHeapSize * Integer.BYTES); + maxMergeBufferUsedBytes = Math.max(maxMergeBufferUsedBytes, maxHeapSize * Integer.BYTES); if (heapIndexUpdater != null) { heapIndexUpdater.updateHeapIndexForOffset(offset, pos); @@ -228,9 +228,9 @@ public int getHeapSize() return heapSize; } - public int getMaxMergeBufferUsageBytes() + public int getMaxMergeBufferUsedBytes() { - return maxMergeBufferUsageBytes; + return maxMergeBufferUsedBytes; } private void bubbleUp(int pos) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java index f40bdb746ca6..e1281e404b45 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java @@ -459,14 +459,14 @@ public boolean validateBufferCapacity(int bufferCapacity) } @Override - public long getMergeBufferUsage() + public long getMergeBufferUsedBytes() { if (!initialized) { return 0L; } - long hashTableUsage = super.getMergeBufferUsage(); - long offSetHeapUsage = offsetHeap.getMaxMergeBufferUsageBytes(); + long hashTableUsage = super.getMergeBufferUsedBytes(); + long offSetHeapUsage = offsetHeap.getMaxMergeBufferUsedBytes(); return hashTableUsage + offSetHeapUsage; } @@ -515,7 +515,7 @@ public AlternatingByteBufferHashTable( subHashTable2Buffer = subHashTable2Buffer.slice(); subHashTableBuffers = new ByteBuffer[]{subHashTable1Buffer, subHashTable2Buffer}; - updateMaxTableBufferUsage(); + updateMaxTableBufferUsedBytes(); } @Override @@ -528,7 +528,7 @@ public void reset() subHashTableBuffers[0].put(i * bucketSizeWithHash, (byte) 0); } tableBuffer = subHashTableBuffers[0]; - updateMaxTableBufferUsage(); + updateMaxTableBufferUsedBytes(); } @Override @@ -585,19 +585,19 @@ public void adjustTableWhenFull() size = numCopied; tableBuffer = newTableBuffer; - updateMaxTableBufferUsage(); + updateMaxTableBufferUsedBytes(); growthCount++; } @Override - protected void updateMaxTableBufferUsage() + protected void updateMaxTableBufferUsedBytes() { - long currentBufferUsage = 0; + long currentBufferUsedBytes = 0; for (ByteBuffer buffer : subHashTableBuffers) { - currentBufferUsage += buffer.capacity(); + currentBufferUsedBytes += buffer.capacity(); } - maxTableBufferUsage = Math.max(maxTableBufferUsage, currentBufferUsage); + maxTableBufferUsedBytes = Math.max(maxTableBufferUsedBytes, currentBufferUsedBytes); } } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java index 57e678de4480..160e07df69d0 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java @@ -219,15 +219,15 @@ public void reset() public void close() { perQueryStats.dictionarySize(getDictionarySizeEstimate()); - perQueryStats.mergeBufferTotalUsage(getMergeBufferUsage()); + perQueryStats.mergeBufferTotalUsedBytes(getMergeBufferUsedBytes()); grouper.close(); keySerde.reset(); deleteFiles(); } - private long getMergeBufferUsage() + private long getMergeBufferUsedBytes() { - return grouper.isInitialized() ? grouper.getMergeBufferUsage() : 0L; + return grouper.isInitialized() ? grouper.getMergeBufferUsedBytes() : 0L; } private long getDictionarySizeEstimate() diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java index 37416614c511..f055da3b80ef 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java @@ -35,7 +35,7 @@ public void testMetricCollection() stats1.mergeBufferAcquisitionTime(300); stats1.mergeBufferAcquisitionTime(400); - stats1.mergeBufferTotalUsage(50); + stats1.mergeBufferTotalUsedBytes(50); stats1.spilledBytes(200); stats1.spilledBytes(400); stats1.dictionarySize(100); @@ -46,7 +46,7 @@ public void testMetricCollection() stats2.mergeBufferAcquisitionTime(500); stats2.mergeBufferAcquisitionTime(600); - stats2.mergeBufferTotalUsage(100); + stats2.mergeBufferTotalUsedBytes(100); stats2.spilledBytes(400); stats2.spilledBytes(600); stats2.dictionarySize(300); @@ -55,9 +55,9 @@ public void testMetricCollection() GroupByStatsProvider.AggregateStats aggregateStats = statsProvider.getStatsSince(); Assert.assertEquals(0L, aggregateStats.getMergeBufferQueries()); Assert.assertEquals(0L, aggregateStats.getMergeBufferAcquisitionTimeNs()); - Assert.assertEquals(0L, aggregateStats.getMergeBufferTotalUsage()); + Assert.assertEquals(0L, aggregateStats.getTotalMergeBufferUsedBytes()); Assert.assertEquals(0L, aggregateStats.getMaxMergeBufferAcquisitionTimeNs()); - Assert.assertEquals(0L, aggregateStats.getMaxMergeBufferUsage()); + Assert.assertEquals(0L, aggregateStats.getMaxMergeBufferUsedBytes()); Assert.assertEquals(0L, aggregateStats.getSpilledQueries()); Assert.assertEquals(0L, aggregateStats.getSpilledBytes()); Assert.assertEquals(0L, aggregateStats.getMaxSpilledBytes()); @@ -70,9 +70,9 @@ public void testMetricCollection() aggregateStats = statsProvider.getStatsSince(); Assert.assertEquals(2, aggregateStats.getMergeBufferQueries()); Assert.assertEquals(1800L, aggregateStats.getMergeBufferAcquisitionTimeNs()); - Assert.assertEquals(150L, aggregateStats.getMergeBufferTotalUsage()); + Assert.assertEquals(150L, aggregateStats.getTotalMergeBufferUsedBytes()); Assert.assertEquals(1100L, aggregateStats.getMaxMergeBufferAcquisitionTimeNs()); - Assert.assertEquals(100L, aggregateStats.getMaxMergeBufferUsage()); + Assert.assertEquals(100L, aggregateStats.getMaxMergeBufferUsedBytes()); Assert.assertEquals(2L, aggregateStats.getSpilledQueries()); Assert.assertEquals(1600L, aggregateStats.getSpilledBytes()); Assert.assertEquals(1000L, aggregateStats.getMaxSpilledBytes()); @@ -88,28 +88,28 @@ public void testMaxMetricsWithVaryingMaxPerMetric() QueryResourceId r1 = new QueryResourceId("r1"); GroupByStatsProvider.PerQueryStats stats1 = statsProvider.getPerQueryStatsContainer(r1); stats1.mergeBufferAcquisitionTime(2000); - stats1.mergeBufferTotalUsage(50); + stats1.mergeBufferTotalUsedBytes(50); stats1.spilledBytes(100); stats1.dictionarySize(200); QueryResourceId r2 = new QueryResourceId("r2"); GroupByStatsProvider.PerQueryStats stats2 = statsProvider.getPerQueryStatsContainer(r2); stats2.mergeBufferAcquisitionTime(100); - stats2.mergeBufferTotalUsage(500); + stats2.mergeBufferTotalUsedBytes(500); stats2.spilledBytes(150); stats2.dictionarySize(250); QueryResourceId r3 = new QueryResourceId("r3"); GroupByStatsProvider.PerQueryStats stats3 = statsProvider.getPerQueryStatsContainer(r3); stats3.mergeBufferAcquisitionTime(200); - stats3.mergeBufferTotalUsage(100); + stats3.mergeBufferTotalUsedBytes(100); stats3.spilledBytes(3000); stats3.dictionarySize(300); QueryResourceId r4 = new QueryResourceId("r4"); GroupByStatsProvider.PerQueryStats stats4 = statsProvider.getPerQueryStatsContainer(r4); stats4.mergeBufferAcquisitionTime(300); - stats4.mergeBufferTotalUsage(75); + stats4.mergeBufferTotalUsedBytes(75); stats4.spilledBytes(200); stats4.dictionarySize(1500); @@ -121,13 +121,13 @@ public void testMaxMetricsWithVaryingMaxPerMetric() GroupByStatsProvider.AggregateStats aggregateStats = statsProvider.getStatsSince(); Assert.assertEquals(2000L, aggregateStats.getMaxMergeBufferAcquisitionTimeNs()); - Assert.assertEquals(500L, aggregateStats.getMaxMergeBufferUsage()); + Assert.assertEquals(500L, aggregateStats.getMaxMergeBufferUsedBytes()); Assert.assertEquals(3000L, aggregateStats.getMaxSpilledBytes()); Assert.assertEquals(1500L, aggregateStats.getMaxMergeDictionarySize()); Assert.assertEquals(4L, aggregateStats.getMergeBufferQueries()); Assert.assertEquals(2600L, aggregateStats.getMergeBufferAcquisitionTimeNs()); - Assert.assertEquals(725L, aggregateStats.getMergeBufferTotalUsage()); + Assert.assertEquals(725L, aggregateStats.getTotalMergeBufferUsedBytes()); Assert.assertEquals(4L, aggregateStats.getSpilledQueries()); Assert.assertEquals(3450L, aggregateStats.getSpilledBytes()); Assert.assertEquals(2250L, aggregateStats.getMergeDictionarySize()); diff --git a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java index 37b518b3d9de..08f0d151cd52 100644 --- a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java +++ b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java @@ -68,11 +68,11 @@ public boolean doMonitor(ServiceEmitter emitter) emitter.emit( builder.setMetric("mergeBuffer/acquisitionTimeNs", statsContainer.getMergeBufferAcquisitionTimeNs()) ); - emitter.emit(builder.setMetric("mergeBuffer/bytesUsed", statsContainer.getMergeBufferTotalUsage())); + emitter.emit(builder.setMetric("mergeBuffer/bytesUsed", statsContainer.getTotalMergeBufferUsedBytes())); emitter.emit( builder.setMetric("mergeBuffer/maxAcquisitionTimeNs", statsContainer.getMaxMergeBufferAcquisitionTimeNs()) ); - emitter.emit(builder.setMetric("mergeBuffer/maxBytesUsed", statsContainer.getMaxMergeBufferUsage())); + emitter.emit(builder.setMetric("mergeBuffer/maxBytesUsed", statsContainer.getMaxMergeBufferUsedBytes())); } if (statsContainer.getSpilledQueries() > 0) { diff --git a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java index 7e7c04657189..ccd8ca828ce9 100644 --- a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java +++ b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java @@ -202,21 +202,21 @@ public void testMonitoringWithMultipleResources() QueryResourceId r1 = new QueryResourceId("r1"); GroupByStatsProvider.PerQueryStats stats1 = statsProvider.getPerQueryStatsContainer(r1); stats1.mergeBufferAcquisitionTime(100); - stats1.mergeBufferTotalUsage(50); + stats1.mergeBufferTotalUsedBytes(50); stats1.spilledBytes(200); stats1.dictionarySize(100); QueryResourceId r2 = new QueryResourceId("r2"); GroupByStatsProvider.PerQueryStats stats2 = statsProvider.getPerQueryStatsContainer(r2); stats2.mergeBufferAcquisitionTime(500); - stats2.mergeBufferTotalUsage(30); + stats2.mergeBufferTotalUsedBytes(30); stats2.spilledBytes(100); stats2.dictionarySize(300); QueryResourceId r3 = new QueryResourceId("r3"); GroupByStatsProvider.PerQueryStats stats3 = statsProvider.getPerQueryStatsContainer(r3); stats3.mergeBufferAcquisitionTime(200); - stats3.mergeBufferTotalUsage(150); + stats3.mergeBufferTotalUsedBytes(150); stats3.spilledBytes(800); stats3.dictionarySize(200); From 003da9c6bce180ea871902aae5e3f00b101d7da3 Mon Sep 17 00:00:00 2001 From: GWphua Date: Wed, 14 Jan 2026 16:56:49 +0800 Subject: [PATCH 18/32] Order of maxAcquisitionTimeNs --- .../druid/query/groupby/GroupByStatsProviderTest.java | 4 ++-- .../apache/druid/server/metrics/GroupByStatsMonitor.java | 6 ++---- .../druid/server/metrics/GroupByStatsMonitorTest.java | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java index f055da3b80ef..bb1ca0cdc884 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java @@ -55,8 +55,8 @@ public void testMetricCollection() GroupByStatsProvider.AggregateStats aggregateStats = statsProvider.getStatsSince(); Assert.assertEquals(0L, aggregateStats.getMergeBufferQueries()); Assert.assertEquals(0L, aggregateStats.getMergeBufferAcquisitionTimeNs()); - Assert.assertEquals(0L, aggregateStats.getTotalMergeBufferUsedBytes()); Assert.assertEquals(0L, aggregateStats.getMaxMergeBufferAcquisitionTimeNs()); + Assert.assertEquals(0L, aggregateStats.getTotalMergeBufferUsedBytes()); Assert.assertEquals(0L, aggregateStats.getMaxMergeBufferUsedBytes()); Assert.assertEquals(0L, aggregateStats.getSpilledQueries()); Assert.assertEquals(0L, aggregateStats.getSpilledBytes()); @@ -70,8 +70,8 @@ public void testMetricCollection() aggregateStats = statsProvider.getStatsSince(); Assert.assertEquals(2, aggregateStats.getMergeBufferQueries()); Assert.assertEquals(1800L, aggregateStats.getMergeBufferAcquisitionTimeNs()); + Assert.assertEquals(1100L, aggregateStats.getMaxMergeBufferAcquisitionTimeNs()); Assert.assertEquals(150L, aggregateStats.getTotalMergeBufferUsedBytes()); - Assert.assertEquals(1100L, aggregateStats.getMaxMergeBufferAcquisitionTimeNs()); Assert.assertEquals(100L, aggregateStats.getMaxMergeBufferUsedBytes()); Assert.assertEquals(2L, aggregateStats.getSpilledQueries()); Assert.assertEquals(1600L, aggregateStats.getSpilledBytes()); diff --git a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java index 08f0d151cd52..e9ed498a72bd 100644 --- a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java +++ b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java @@ -68,10 +68,10 @@ public boolean doMonitor(ServiceEmitter emitter) emitter.emit( builder.setMetric("mergeBuffer/acquisitionTimeNs", statsContainer.getMergeBufferAcquisitionTimeNs()) ); - emitter.emit(builder.setMetric("mergeBuffer/bytesUsed", statsContainer.getTotalMergeBufferUsedBytes())); emitter.emit( builder.setMetric("mergeBuffer/maxAcquisitionTimeNs", statsContainer.getMaxMergeBufferAcquisitionTimeNs()) ); + emitter.emit(builder.setMetric("mergeBuffer/bytesUsed", statsContainer.getTotalMergeBufferUsedBytes())); emitter.emit(builder.setMetric("mergeBuffer/maxBytesUsed", statsContainer.getMaxMergeBufferUsedBytes())); } @@ -83,9 +83,7 @@ public boolean doMonitor(ServiceEmitter emitter) if (statsContainer.getMergeDictionarySize() > 0) { emitter.emit(builder.setMetric("groupBy/mergeDictionarySize", statsContainer.getMergeDictionarySize())); - emitter.emit( - builder.setMetric("groupBy/maxMergeDictionarySize", statsContainer.getMaxMergeDictionarySize()) - ); + emitter.emit(builder.setMetric("groupBy/maxMergeDictionarySize", statsContainer.getMaxMergeDictionarySize())); } return true; diff --git a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java index ccd8ca828ce9..65ac3ba5bc3a 100644 --- a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java +++ b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java @@ -98,8 +98,8 @@ public void testMonitor() emitter.verifyValue("mergeBuffer/used", 0L); emitter.verifyValue("mergeBuffer/queries", 1L); emitter.verifyValue("mergeBuffer/acquisitionTimeNs", 100L); - emitter.verifyValue("mergeBuffer/bytesUsed", 200L); emitter.verifyValue("mergeBuffer/maxAcquisitionTimeNs", 100L); + emitter.verifyValue("mergeBuffer/bytesUsed", 200L); emitter.verifyValue("mergeBuffer/maxBytesUsed", 200L); emitter.verifyValue("groupBy/spilledQueries", 2L); emitter.verifyValue("groupBy/spilledBytes", 200L); @@ -134,8 +134,8 @@ public void testMonitorWithServiceDimensions() verifyMetricValue(emitter, "mergeBuffer/used", dimFilters, 0L); verifyMetricValue(emitter, "mergeBuffer/queries", dimFilters, 1L); verifyMetricValue(emitter, "mergeBuffer/acquisitionTimeNs", dimFilters, 100L); - verifyMetricValue(emitter, "mergeBuffer/bytesUsed", dimFilters, 200L); verifyMetricValue(emitter, "mergeBuffer/maxAcquisitionTimeNs", dimFilters, 100L); + verifyMetricValue(emitter, "mergeBuffer/bytesUsed", dimFilters, 200L); verifyMetricValue(emitter, "mergeBuffer/maxBytesUsed", dimFilters, 200L); verifyMetricValue(emitter, "groupBy/spilledQueries", dimFilters, 2L); verifyMetricValue(emitter, "groupBy/spilledBytes", dimFilters, 200L); From e416867cfc27e03886fbad751f28ccc7a9189b4a Mon Sep 17 00:00:00 2001 From: GWphua Date: Wed, 14 Jan 2026 17:52:46 +0800 Subject: [PATCH 19/32] Track the open addressing hash table --- .../groupby/epinephelinae/ByteBufferHashTable.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java index 05465a6db79f..d8fc90299ebb 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java @@ -26,6 +26,14 @@ import java.nio.ByteBuffer; +/** + * A fixed-width, open-addressing hash table that lives inside a caller-provided byte buffer. + *

+ * The table uses a contiguous slice of the merge buffer as its backing store. Each bucket holds at most one entry, + * and occupies {@code bucketSizeWithHash} number of bytes. Collisions are resolved by continuously probing the + * next bucket to find an empty bucket to slot the new entry. The current table view is maintained as a + * {@link ByteBuffer} slice that moves and grows within the arena as the table expands. + */ public class ByteBufferHashTable { public static int calculateTableArenaSizeWithPerBucketAdditionalSize( @@ -230,7 +238,6 @@ public void adjustTableWhenFull() maxBuckets = newBuckets; regrowthThreshold = newMaxSize; tableBuffer = newTableBuffer; - updateMaxTableBufferUsedBytes(); tableStart = newTableStart; growthCount++; @@ -251,6 +258,7 @@ protected void initializeNewBucketKey( tableBuffer.putInt(Groupers.getUsedFlag(keyHash)); tableBuffer.put(keyBuffer); size++; + updateMaxTableBufferUsedBytes(); if (bucketUpdateHandler != null) { bucketUpdateHandler.handleNewBucket(offset); @@ -389,7 +397,7 @@ public int getGrowthCount() protected void updateMaxTableBufferUsedBytes() { - maxTableBufferUsedBytes = Math.max(maxTableBufferUsedBytes, tableBuffer.capacity()); + maxTableBufferUsedBytes = Math.max(maxTableBufferUsedBytes, (long) size * bucketSizeWithHash); } public long getMaxTableBufferUsedBytes() From a26c40a8f24f2a786e145f31b9a9353fb6e7b6c2 Mon Sep 17 00:00:00 2001 From: GWphua Date: Wed, 21 Jan 2026 11:30:20 +0800 Subject: [PATCH 20/32] Remove max metrics, push them in another PR... --- docs/operations/metrics.md | 9 ----- .../query/groupby/GroupByStatsProvider.java | 40 +------------------ .../server/metrics/GroupByStatsMonitor.java | 5 --- .../metrics/GroupByStatsMonitorTest.java | 14 +------ 4 files changed, 3 insertions(+), 65 deletions(-) diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index 8983b79c5413..524d2ecd42ea 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -89,14 +89,11 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| -|`mergeBuffer/maxAcquisitionTimeNs`|Maximum time in nanoseconds to acquire merge buffer for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`mergeBuffer/maxBytesUsed`|Maximum number of bytes used by merge buffers for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| -|`groupBy/maxSpilledBytes`|Maximum number of bytes spilled to disk by any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| -|`groupBy/maxMergeDictionarySize`|Maximum size of the on-heap merge dictionary in bytes observed for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| ### Historical @@ -118,14 +115,11 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| -|`mergeBuffer/maxAcquisitionTimeNs`|Maximum time in nanoseconds to acquire merge buffer for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`mergeBuffer/maxBytesUsed`|Maximum number of bytes used by merge buffers for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| -|`groupBy/maxSpilledBytes`|Maximum number of bytes spilled to disk by any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| -|`groupBy/maxMergeDictionarySize`|Maximum size of the on-heap merge dictionary in bytes observed for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| ### Real-time @@ -150,14 +144,11 @@ to represent the task ID are deprecated and will be removed in a future release. |`mergeBuffer/used`|Number of merge buffers used from the merge buffer pool. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| -|`mergeBuffer/maxAcquisitionTimeNs`|Maximum time in nanoseconds to acquire merge buffer for any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`mergeBuffer/maxBytesUsed`|Maximum number of bytes used by merge buffers for any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| -|`groupBy/maxSpilledBytes`|Maximum number of bytes spilled to disk by any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/mergeDictionarySize`|Size of on-heap merge dictionary in bytes. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| -|`groupBy/maxMergeDictionarySize`|Maximum size of the on-heap merge dictionary in bytes observed for any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| ### Jetty diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java index 5b9658df6d0f..52326bc0fe8d 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java @@ -71,13 +71,10 @@ public static class AggregateStats private long mergeBufferQueries = 0; private long mergeBufferAcquisitionTimeNs = 0; private long totalMergeBufferUsedBytes = 0; - private long maxMergeBufferAcquisitionTimeNs = 0; private long maxMergeBufferUsedBytes = 0; private long spilledQueries = 0; private long spilledBytes = 0; - private long maxSpilledBytes = 0; private long mergeDictionarySize = 0; - private long maxMergeDictionarySize = 0; public AggregateStats() { @@ -89,13 +86,10 @@ public AggregateStats(AggregateStats aggregateStats) aggregateStats.mergeBufferQueries, aggregateStats.mergeBufferAcquisitionTimeNs, aggregateStats.totalMergeBufferUsedBytes, - aggregateStats.maxMergeBufferAcquisitionTimeNs, aggregateStats.maxMergeBufferUsedBytes, aggregateStats.spilledQueries, aggregateStats.spilledBytes, - aggregateStats.maxSpilledBytes, - aggregateStats.mergeDictionarySize, - aggregateStats.maxMergeDictionarySize + aggregateStats.mergeDictionarySize ); } @@ -103,25 +97,19 @@ public AggregateStats( long mergeBufferQueries, long mergeBufferAcquisitionTimeNs, long totalMergeBufferUsedBytes, - long maxMergeBufferAcquisitionTimeNs, long maxMergeBufferUsedBytes, long spilledQueries, long spilledBytes, - long maxSpilledBytes, - long mergeDictionarySize, - long maxMergeDictionarySize + long mergeDictionarySize ) { this.mergeBufferQueries = mergeBufferQueries; this.mergeBufferAcquisitionTimeNs = mergeBufferAcquisitionTimeNs; this.totalMergeBufferUsedBytes = totalMergeBufferUsedBytes; - this.maxMergeBufferAcquisitionTimeNs = maxMergeBufferAcquisitionTimeNs; this.maxMergeBufferUsedBytes = maxMergeBufferUsedBytes; this.spilledQueries = spilledQueries; this.spilledBytes = spilledBytes; - this.maxSpilledBytes = maxSpilledBytes; this.mergeDictionarySize = mergeDictionarySize; - this.maxMergeDictionarySize = maxMergeDictionarySize; } public long getMergeBufferQueries() @@ -134,11 +122,6 @@ public long getMergeBufferAcquisitionTimeNs() return mergeBufferAcquisitionTimeNs; } - public long getMaxMergeBufferAcquisitionTimeNs() - { - return maxMergeBufferAcquisitionTimeNs; - } - public long getTotalMergeBufferUsedBytes() { return totalMergeBufferUsedBytes; @@ -159,30 +142,16 @@ public long getSpilledBytes() return spilledBytes; } - public long getMaxSpilledBytes() - { - return maxSpilledBytes; - } - public long getMergeDictionarySize() { return mergeDictionarySize; } - public long getMaxMergeDictionarySize() - { - return maxMergeDictionarySize; - } - public void addQueryStats(PerQueryStats perQueryStats) { if (perQueryStats.getMergeBufferAcquisitionTimeNs() > 0) { mergeBufferQueries++; mergeBufferAcquisitionTimeNs += perQueryStats.getMergeBufferAcquisitionTimeNs(); - maxMergeBufferAcquisitionTimeNs = Math.max( - maxMergeBufferAcquisitionTimeNs, - perQueryStats.getMergeBufferAcquisitionTimeNs() - ); totalMergeBufferUsedBytes += perQueryStats.getMergeBufferTotalUsedBytes(); maxMergeBufferUsedBytes = Math.max(maxMergeBufferUsedBytes, perQueryStats.getMergeBufferTotalUsedBytes()); } @@ -190,25 +159,20 @@ public void addQueryStats(PerQueryStats perQueryStats) if (perQueryStats.getSpilledBytes() > 0) { spilledQueries++; spilledBytes += perQueryStats.getSpilledBytes(); - maxSpilledBytes = Math.max(maxSpilledBytes, perQueryStats.getSpilledBytes()); } mergeDictionarySize += perQueryStats.getMergeDictionarySize(); - maxMergeDictionarySize = Math.max(maxMergeDictionarySize, perQueryStats.getMergeDictionarySize()); } public void reset() { this.mergeBufferQueries = 0; this.mergeBufferAcquisitionTimeNs = 0; - this.maxMergeBufferAcquisitionTimeNs = 0; this.totalMergeBufferUsedBytes = 0; this.maxMergeBufferUsedBytes = 0; this.spilledQueries = 0; this.spilledBytes = 0; - this.maxSpilledBytes = 0; this.mergeDictionarySize = 0; - this.maxMergeDictionarySize = 0; } } diff --git a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java index e9ed498a72bd..6dea5a9110ed 100644 --- a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java +++ b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java @@ -68,9 +68,6 @@ public boolean doMonitor(ServiceEmitter emitter) emitter.emit( builder.setMetric("mergeBuffer/acquisitionTimeNs", statsContainer.getMergeBufferAcquisitionTimeNs()) ); - emitter.emit( - builder.setMetric("mergeBuffer/maxAcquisitionTimeNs", statsContainer.getMaxMergeBufferAcquisitionTimeNs()) - ); emitter.emit(builder.setMetric("mergeBuffer/bytesUsed", statsContainer.getTotalMergeBufferUsedBytes())); emitter.emit(builder.setMetric("mergeBuffer/maxBytesUsed", statsContainer.getMaxMergeBufferUsedBytes())); } @@ -78,12 +75,10 @@ public boolean doMonitor(ServiceEmitter emitter) if (statsContainer.getSpilledQueries() > 0) { emitter.emit(builder.setMetric("groupBy/spilledQueries", statsContainer.getSpilledQueries())); emitter.emit(builder.setMetric("groupBy/spilledBytes", statsContainer.getSpilledBytes())); - emitter.emit(builder.setMetric("groupBy/maxSpilledBytes", statsContainer.getMaxSpilledBytes())); } if (statsContainer.getMergeDictionarySize() > 0) { emitter.emit(builder.setMetric("groupBy/mergeDictionarySize", statsContainer.getMergeDictionarySize())); - emitter.emit(builder.setMetric("groupBy/maxMergeDictionarySize", statsContainer.getMaxMergeDictionarySize())); } return true; diff --git a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java index 65ac3ba5bc3a..d2b16f4b2c78 100644 --- a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java +++ b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java @@ -61,12 +61,9 @@ public synchronized AggregateStats getStatsSince() 1L, 100L, 200L, - 100L, 200L, 2L, 200L, - 200L, - 300L, 300L ); } @@ -93,19 +90,16 @@ public void testMonitor() // Trigger metric emission monitor.doMonitor(emitter); - Assert.assertEquals(12, emitter.getNumEmittedEvents()); + Assert.assertEquals(9, emitter.getNumEmittedEvents()); emitter.verifyValue("mergeBuffer/pendingRequests", 0L); emitter.verifyValue("mergeBuffer/used", 0L); emitter.verifyValue("mergeBuffer/queries", 1L); emitter.verifyValue("mergeBuffer/acquisitionTimeNs", 100L); - emitter.verifyValue("mergeBuffer/maxAcquisitionTimeNs", 100L); emitter.verifyValue("mergeBuffer/bytesUsed", 200L); emitter.verifyValue("mergeBuffer/maxBytesUsed", 200L); emitter.verifyValue("groupBy/spilledQueries", 2L); emitter.verifyValue("groupBy/spilledBytes", 200L); - emitter.verifyValue("groupBy/maxSpilledBytes", 200L); emitter.verifyValue("groupBy/mergeDictionarySize", 300L); - emitter.verifyValue("groupBy/maxMergeDictionarySize", 300L); } @Test @@ -134,14 +128,11 @@ public void testMonitorWithServiceDimensions() verifyMetricValue(emitter, "mergeBuffer/used", dimFilters, 0L); verifyMetricValue(emitter, "mergeBuffer/queries", dimFilters, 1L); verifyMetricValue(emitter, "mergeBuffer/acquisitionTimeNs", dimFilters, 100L); - verifyMetricValue(emitter, "mergeBuffer/maxAcquisitionTimeNs", dimFilters, 100L); verifyMetricValue(emitter, "mergeBuffer/bytesUsed", dimFilters, 200L); verifyMetricValue(emitter, "mergeBuffer/maxBytesUsed", dimFilters, 200L); verifyMetricValue(emitter, "groupBy/spilledQueries", dimFilters, 2L); verifyMetricValue(emitter, "groupBy/spilledBytes", dimFilters, 200L); - verifyMetricValue(emitter, "groupBy/maxSpilledBytes", dimFilters, 200L); verifyMetricValue(emitter, "groupBy/mergeDictionarySize", dimFilters, 300L); - verifyMetricValue(emitter, "groupBy/maxMergeDictionarySize", dimFilters, 300L); } @Test @@ -237,10 +228,7 @@ public void testMonitoringWithMultipleResources() emitter.verifyValue("groupBy/spilledBytes", 1100L); emitter.verifyValue("groupBy/mergeDictionarySize", 600L); - emitter.verifyValue("mergeBuffer/maxAcquisitionTimeNs", 500L); emitter.verifyValue("mergeBuffer/maxBytesUsed", 150L); - emitter.verifyValue("groupBy/maxSpilledBytes", 800L); - emitter.verifyValue("groupBy/maxMergeDictionarySize", 300L); } private void verifyMetricValue(StubServiceEmitter emitter, String metricName, Map dimFilters, Number expectedValue) From 972553207c2053a859ee0d734c7e19327a16706d Mon Sep 17 00:00:00 2001 From: GWphua Date: Wed, 21 Jan 2026 11:39:10 +0800 Subject: [PATCH 21/32] Remove max metrics in GroupByStatsProviderTest --- .../groupby/epinephelinae/ByteBufferIntList.java | 5 ++++- .../query/groupby/GroupByStatsProviderTest.java | 13 ++----------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java index b6b15f1e5774..33a79451993e 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java @@ -32,7 +32,10 @@ public class ByteBufferIntList private int maxMergeBufferUsedBytes; - public ByteBufferIntList(ByteBuffer buffer, int maxElements) + public ByteBufferIntList( + ByteBuffer buffer, + int maxElements + ) { this.buffer = buffer; this.maxElements = maxElements; diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java index bb1ca0cdc884..f71678b9bd0c 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java @@ -55,14 +55,11 @@ public void testMetricCollection() GroupByStatsProvider.AggregateStats aggregateStats = statsProvider.getStatsSince(); Assert.assertEquals(0L, aggregateStats.getMergeBufferQueries()); Assert.assertEquals(0L, aggregateStats.getMergeBufferAcquisitionTimeNs()); - Assert.assertEquals(0L, aggregateStats.getMaxMergeBufferAcquisitionTimeNs()); Assert.assertEquals(0L, aggregateStats.getTotalMergeBufferUsedBytes()); Assert.assertEquals(0L, aggregateStats.getMaxMergeBufferUsedBytes()); Assert.assertEquals(0L, aggregateStats.getSpilledQueries()); Assert.assertEquals(0L, aggregateStats.getSpilledBytes()); - Assert.assertEquals(0L, aggregateStats.getMaxSpilledBytes()); Assert.assertEquals(0L, aggregateStats.getMergeDictionarySize()); - Assert.assertEquals(0L, aggregateStats.getMaxMergeDictionarySize()); statsProvider.closeQuery(id1); statsProvider.closeQuery(id2); @@ -70,18 +67,15 @@ public void testMetricCollection() aggregateStats = statsProvider.getStatsSince(); Assert.assertEquals(2, aggregateStats.getMergeBufferQueries()); Assert.assertEquals(1800L, aggregateStats.getMergeBufferAcquisitionTimeNs()); - Assert.assertEquals(1100L, aggregateStats.getMaxMergeBufferAcquisitionTimeNs()); Assert.assertEquals(150L, aggregateStats.getTotalMergeBufferUsedBytes()); Assert.assertEquals(100L, aggregateStats.getMaxMergeBufferUsedBytes()); Assert.assertEquals(2L, aggregateStats.getSpilledQueries()); Assert.assertEquals(1600L, aggregateStats.getSpilledBytes()); - Assert.assertEquals(1000L, aggregateStats.getMaxSpilledBytes()); - Assert.assertEquals(1000L, aggregateStats.getMergeDictionarySize()); - Assert.assertEquals(700L, aggregateStats.getMaxMergeDictionarySize()); + Assert.assertEquals(1000L, aggregateStats.getMergeDictionarySize()); } @Test - public void testMaxMetricsWithVaryingMaxPerMetric() + public void testMetricsWithMultipleQueries() { GroupByStatsProvider statsProvider = new GroupByStatsProvider(); @@ -120,10 +114,7 @@ public void testMaxMetricsWithVaryingMaxPerMetric() GroupByStatsProvider.AggregateStats aggregateStats = statsProvider.getStatsSince(); - Assert.assertEquals(2000L, aggregateStats.getMaxMergeBufferAcquisitionTimeNs()); Assert.assertEquals(500L, aggregateStats.getMaxMergeBufferUsedBytes()); - Assert.assertEquals(3000L, aggregateStats.getMaxSpilledBytes()); - Assert.assertEquals(1500L, aggregateStats.getMaxMergeDictionarySize()); Assert.assertEquals(4L, aggregateStats.getMergeBufferQueries()); Assert.assertEquals(2600L, aggregateStats.getMergeBufferAcquisitionTimeNs()); From 145571237d038d78fe14260a995974f666fba1d1 Mon Sep 17 00:00:00 2001 From: GWphua Date: Wed, 21 Jan 2026 16:45:25 +0800 Subject: [PATCH 22/32] LimitedBufferHashGrouper to use parent method to report maxTableBufferUsedBytes --- .../epinephelinae/ByteBufferHashTable.java | 8 ++++---- .../epinephelinae/LimitedBufferHashGrouper.java | 16 ++-------------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java index d8fc90299ebb..a7a23bb53891 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java @@ -29,10 +29,10 @@ /** * A fixed-width, open-addressing hash table that lives inside a caller-provided byte buffer. *

- * The table uses a contiguous slice of the merge buffer as its backing store. Each bucket holds at most one entry, - * and occupies {@code bucketSizeWithHash} number of bytes. Collisions are resolved by continuously probing the - * next bucket to find an empty bucket to slot the new entry. The current table view is maintained as a - * {@link ByteBuffer} slice that moves and grows within the arena as the table expands. + * The table uses a contiguous slice of the input {@link ByteBuffer} as its backing store. Each bucket holds + * at most one entry, and occupies {@code bucketSizeWithHash} number of bytes. Collisions are resolved by continuously + * probing the next bucket to find an empty bucket to slot the new entry. The current table view {@code tableBuffer} + * is maintained as a {@link ByteBuffer} slice that moves and grows within the arena as the table expands. */ public class ByteBufferHashTable { diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java index e1281e404b45..f527fc73c4d6 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java @@ -515,7 +515,6 @@ public AlternatingByteBufferHashTable( subHashTable2Buffer = subHashTable2Buffer.slice(); subHashTableBuffers = new ByteBuffer[]{subHashTable1Buffer, subHashTable2Buffer}; - updateMaxTableBufferUsedBytes(); } @Override @@ -528,7 +527,7 @@ public void reset() subHashTableBuffers[0].put(i * bucketSizeWithHash, (byte) 0); } tableBuffer = subHashTableBuffers[0]; - updateMaxTableBufferUsedBytes(); + this.updateMaxTableBufferUsedBytes(); } @Override @@ -584,20 +583,9 @@ public void adjustTableWhenFull() } size = numCopied; + this.updateMaxTableBufferUsedBytes(); tableBuffer = newTableBuffer; - updateMaxTableBufferUsedBytes(); growthCount++; } - - @Override - protected void updateMaxTableBufferUsedBytes() - { - long currentBufferUsedBytes = 0; - for (ByteBuffer buffer : subHashTableBuffers) { - currentBufferUsedBytes += buffer.capacity(); - } - - maxTableBufferUsedBytes = Math.max(maxTableBufferUsedBytes, currentBufferUsedBytes); - } } } From 5db69c566b5c7bf39a8cbe49df2d467e03d102c8 Mon Sep 17 00:00:00 2001 From: GWphua Date: Wed, 21 Jan 2026 16:52:37 +0800 Subject: [PATCH 23/32] Standardised merge buffer names --- .../AbstractBufferHashGrouper.java | 2 +- .../epinephelinae/BufferHashGrouper.java | 2 +- .../epinephelinae/ByteBufferHashTable.java | 18 +++++++++--------- .../LimitedBufferHashGrouper.java | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java index 2326e67d55b0..6b085f871a84 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java @@ -179,7 +179,7 @@ public void close() */ public long getMergeBufferUsedBytes() { - return hashTable.getMaxTableBufferUsedBytes(); + return hashTable.getMaxMergeBufferUsedBytes(); } /** diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java index 2f4e8c9e6c8b..227a1b4211e1 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java @@ -159,7 +159,7 @@ public long getMergeBufferUsedBytes() return 0L; } - long hashTableUsage = hashTable.getMaxTableBufferUsedBytes(); + long hashTableUsage = hashTable.getMaxMergeBufferUsedBytes(); long offSetListUsage = offsetList.getMaxMergeBufferUsedBytes(); return hashTableUsage + offSetListUsage; } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java index a7a23bb53891..0f15a4afcda3 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java @@ -87,8 +87,8 @@ public static int calculateTableArenaSizeWithFixedAdditionalSize( @Nullable protected BucketUpdateHandler bucketUpdateHandler; - // Keeps track on how many bytes is being used in the merge buffer. - protected long maxTableBufferUsedBytes; + // Tracks maximum bytes used for the entire lifecycle of this hash table. + protected long maxMergeBufferUsedBytes; public ByteBufferHashTable( float maxLoadFactor, @@ -108,7 +108,7 @@ public ByteBufferHashTable( this.maxSizeForTesting = maxSizeForTesting; this.tableArenaSize = buffer.capacity(); this.bucketUpdateHandler = bucketUpdateHandler; - this.maxTableBufferUsedBytes = 0; + this.maxMergeBufferUsedBytes = 0; } public void reset() @@ -151,7 +151,7 @@ public void reset() bufferDup.position(tableStart); bufferDup.limit(tableStart + maxBuckets * bucketSizeWithHash); tableBuffer = bufferDup.slice(); - updateMaxTableBufferUsedBytes(); + updateMaxMergeBufferUsedBytes(); // Clear used bits of new table for (int i = 0; i < maxBuckets; i++) { @@ -258,7 +258,7 @@ protected void initializeNewBucketKey( tableBuffer.putInt(Groupers.getUsedFlag(keyHash)); tableBuffer.put(keyBuffer); size++; - updateMaxTableBufferUsedBytes(); + updateMaxMergeBufferUsedBytes(); if (bucketUpdateHandler != null) { bucketUpdateHandler.handleNewBucket(offset); @@ -395,14 +395,14 @@ public int getGrowthCount() return growthCount; } - protected void updateMaxTableBufferUsedBytes() + protected void updateMaxMergeBufferUsedBytes() { - maxTableBufferUsedBytes = Math.max(maxTableBufferUsedBytes, (long) size * bucketSizeWithHash); + maxMergeBufferUsedBytes = Math.max(maxMergeBufferUsedBytes, (long) size * bucketSizeWithHash); } - public long getMaxTableBufferUsedBytes() + public long getMaxMergeBufferUsedBytes() { - return maxTableBufferUsedBytes; + return maxMergeBufferUsedBytes; } public interface BucketUpdateHandler diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java index f527fc73c4d6..dd737632516a 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java @@ -522,12 +522,12 @@ public void reset() { size = 0; growthCount = 0; + updateMaxMergeBufferUsedBytes(); // clear the used bits of the first buffer for (int i = 0; i < maxBuckets; i++) { subHashTableBuffers[0].put(i * bucketSizeWithHash, (byte) 0); } tableBuffer = subHashTableBuffers[0]; - this.updateMaxTableBufferUsedBytes(); } @Override @@ -583,7 +583,7 @@ public void adjustTableWhenFull() } size = numCopied; - this.updateMaxTableBufferUsedBytes(); + updateMaxMergeBufferUsedBytes(); tableBuffer = newTableBuffer; growthCount++; } From 9ce074adfe8d2347c97bf4c544df13bc79523b19 Mon Sep 17 00:00:00 2001 From: GWphua Date: Wed, 21 Jan 2026 17:38:07 +0800 Subject: [PATCH 24/32] Tests for buffer hash grouper Tests for buffer hash grouper --- .../epinephelinae/BufferHashGrouperTest.java | 55 ++++++++++++++++++ .../LimitedBufferHashGrouperTest.java | 57 +++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java index c96bc50dd78d..e07357cccc6c 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java @@ -156,6 +156,61 @@ public void testNoGrowing() } } + @Test + public void testMaxMergeBufferUsedBytesWorksNormally() + { + final GroupByTestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory(); + final BufferHashGrouper grouper = new BufferHashGrouper<>( + Suppliers.ofInstance(ByteBuffer.allocate(1000)), + GrouperTestUtil.intKeySerde(), + AggregatorAdapters.factorizeBuffered( + columnSelectorFactory, + ImmutableList.of( + new LongSumAggregatorFactory("valueSum", "value"), + new CountAggregatorFactory("count") + ) + ), + Integer.MAX_VALUE, + 0, + 0, + true + ); + grouper.init(); + + long initialUsage = grouper.getMergeBufferUsedBytes(); + Assert.assertEquals(0L, initialUsage); + + columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L))); + + grouper.aggregate(new IntKey(1)); + final long expectedBucketSize = grouper.getMergeBufferUsedBytes(); + + grouper.aggregate(new IntKey(2)); + grouper.aggregate(new IntKey(3)); + + Assert.assertEquals(3L * expectedBucketSize, grouper.getMergeBufferUsedBytes()); + + grouper.aggregate(new IntKey(4)); + grouper.aggregate(new IntKey(5)); + + Assert.assertEquals(5L * expectedBucketSize, grouper.getMergeBufferUsedBytes()); + + grouper.reset(); + Assert.assertEquals(0, grouper.getSize()); + Assert.assertEquals(5L * expectedBucketSize, grouper.getMergeBufferUsedBytes()); + + grouper.aggregate(new IntKey(6)); + grouper.aggregate(new IntKey(7)); + grouper.aggregate(new IntKey(8)); + grouper.aggregate(new IntKey(9)); + grouper.aggregate(new IntKey(10)); + grouper.aggregate(new IntKey(11)); + + Assert.assertEquals(6L * expectedBucketSize, grouper.getMergeBufferUsedBytes()); + + grouper.close(); + } + private ResourceHolder> makeGrouper( GroupByTestColumnSelectorFactory columnSelectorFactory, int bufferSize, diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java index 07631840231c..03e53a3fe925 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java @@ -307,6 +307,63 @@ public void testIteratorOrderByAggsDesc() Assert.assertEquals(LIMIT, i); } + @Test + public void testMaxMergeBufferUsedBytesTracksMaxUsageAfterReset() + { + final GroupByTestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory(); + final LimitedBufferHashGrouper grouper = makeGrouper(columnSelectorFactory, 20000); + + Assert.assertEquals(0L, grouper.getMergeBufferUsedBytes()); + columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L))); + + Assert.assertTrue(String.valueOf(KEY_BASE), grouper.aggregate(new IntKey(KEY_BASE)).isOk()); + final long usagePerEntry = grouper.getMergeBufferUsedBytes(); + + grouper.reset(); + Assert.assertEquals(0, grouper.getSize()); + Assert.assertEquals(usagePerEntry, grouper.getMergeBufferUsedBytes()); + + // Add 10 entries after reset + for (int i = 0; i < 10; i++) { + Assert.assertTrue(String.valueOf(i + KEY_BASE), grouper.aggregate(new IntKey(i + KEY_BASE)).isOk()); + } + + Assert.assertEquals(10 * usagePerEntry, grouper.getMergeBufferUsedBytes()); + } + + @Test + public void testMaxMergeBufferUsedBytesAfterBufferSwap() + { + // This test closely follows the flow of testLimitAndBufferSwapping(). + final GroupByTestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory(); + final LimitedBufferHashGrouper grouper = makeGrouper(columnSelectorFactory, 20000); + + columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L))); + + // Calculate usage per entry from first entry + Assert.assertTrue(String.valueOf(KEY_BASE), grouper.aggregate(new IntKey(KEY_BASE)).isOk()); + final long usagePerEntry = grouper.getMergeBufferUsedBytes(); + + // This results in 13 swaps and final size of 116 (100 keys + 16 new keys after last swap) + for (int i = 1; i < NUM_ROWS; i++) { + Assert.assertTrue(String.valueOf(i + KEY_BASE), grouper.aggregate(new IntKey(i + KEY_BASE)).isOk()); + } + + Assert.assertEquals(13, grouper.getGrowthCount()); + Assert.assertEquals(116, grouper.getSize()); + Assert.assertEquals(168, grouper.getMaxSize()); + + // Peak usage is the sum of hash table peak and heap peak, which peak at different sizes... + // Hash table peak: maxSize (168) * bucketSizeWithHash + // Heap peak: (LIMIT + 1) * Integer.BYTES (4) = 404 (heap can temporarily have LIMIT + 1 before removing one) + final long bucketSizeWithHash = usagePerEntry - Integer.BYTES; + final long hashTablePeak = grouper.getMaxSize() * bucketSizeWithHash; + final long heapPeak = (LIMIT + 1) * Integer.BYTES; + final long expectedPeakUsage = hashTablePeak + heapPeak; + + Assert.assertEquals(expectedPeakUsage, grouper.getMergeBufferUsedBytes()); + } + private static LimitedBufferHashGrouper makeGrouper( GroupByTestColumnSelectorFactory columnSelectorFactory, int bufferSize From 4f4a10a324047f1cff641cc4779c091b3ae72356 Mon Sep 17 00:00:00 2001 From: GWphua Date: Thu, 22 Jan 2026 12:03:25 +0800 Subject: [PATCH 25/32] Address multiplication cast --- .../groupby/epinephelinae/LimitedBufferHashGrouperTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java index 03e53a3fe925..6ab7bef51d3d 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java @@ -358,7 +358,7 @@ public void testMaxMergeBufferUsedBytesAfterBufferSwap() // Heap peak: (LIMIT + 1) * Integer.BYTES (4) = 404 (heap can temporarily have LIMIT + 1 before removing one) final long bucketSizeWithHash = usagePerEntry - Integer.BYTES; final long hashTablePeak = grouper.getMaxSize() * bucketSizeWithHash; - final long heapPeak = (LIMIT + 1) * Integer.BYTES; + final long heapPeak = ((long) LIMIT + 1) * Integer.BYTES; final long expectedPeakUsage = hashTablePeak + heapPeak; Assert.assertEquals(expectedPeakUsage, grouper.getMergeBufferUsedBytes()); From e92357d12bb139bcdf5c237714f616e4e3032784 Mon Sep 17 00:00:00 2001 From: GWphua Date: Thu, 22 Jan 2026 15:02:02 +0800 Subject: [PATCH 26/32] Javadocs for getMergeBufferUsedBytes --- .../groupby/epinephelinae/AbstractBufferHashGrouper.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java index 6b085f871a84..9de75f7e2822 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java @@ -174,8 +174,11 @@ public void close() } /** - * This method is implemented to return the highest memory value claimed by the Grouper. This is only - * used for monitoring the size of the merge buffers used. + * Retrieves the size of the merge buffers used for this groupby query. This value is retrieved when + * {@link SpillingGrouper#close()} is called. + *

+ * This method is implemented to return the highest memory value used, this is helpful especially in + * reporting the highest number of bytes used throughout the entire query lifecycle. */ public long getMergeBufferUsedBytes() { From d55e4028e2f4c7a4712a5ef1a0f600e7e62f34ac Mon Sep 17 00:00:00 2001 From: GWphua Date: Fri, 23 Jan 2026 10:00:22 +0800 Subject: [PATCH 27/32] Remix comments in test for peak calculations --- .../groupby/epinephelinae/LimitedBufferHashGrouperTest.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java index 6ab7bef51d3d..0909d26eabb3 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java @@ -353,12 +353,11 @@ public void testMaxMergeBufferUsedBytesAfterBufferSwap() Assert.assertEquals(116, grouper.getSize()); Assert.assertEquals(168, grouper.getMaxSize()); - // Peak usage is the sum of hash table peak and heap peak, which peak at different sizes... - // Hash table peak: maxSize (168) * bucketSizeWithHash - // Heap peak: (LIMIT + 1) * Integer.BYTES (4) = 404 (heap can temporarily have LIMIT + 1 before removing one) final long bucketSizeWithHash = usagePerEntry - Integer.BYTES; final long hashTablePeak = grouper.getMaxSize() * bucketSizeWithHash; + // Heap can temporarily have LIMIT + 1 before removing one final long heapPeak = ((long) LIMIT + 1) * Integer.BYTES; + // Peak usage is the sum of hash table peak and heap peak, which peak at different sizes... final long expectedPeakUsage = hashTablePeak + heapPeak; Assert.assertEquals(expectedPeakUsage, grouper.getMergeBufferUsedBytes()); From 988de0945e26cc8cf24e2b1dac8d165a40f0c0ef Mon Sep 17 00:00:00 2001 From: GWphua Date: Mon, 26 Jan 2026 10:16:19 +0800 Subject: [PATCH 28/32] Clean up after merging conflicts --- .../query/groupby/GroupByStatsProviderTest.java | 1 - .../druid/server/metrics/GroupByStatsMonitor.java | 14 ++------------ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java index b7c83162b2ff..8e025a4599ff 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java @@ -131,6 +131,5 @@ public void testMetricsWithMultipleQueries() Assert.assertEquals(4L, aggregateStats.getSpilledQueries()); Assert.assertEquals(3450L, aggregateStats.getSpilledBytes()); Assert.assertEquals(2250L, aggregateStats.getMergeDictionarySize()); - Assert.assertEquals(1000L, aggregateStats.getMergeDictionarySize()); } } diff --git a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java index 1eb8e14aa296..e5f46020fe09 100644 --- a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java +++ b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java @@ -58,24 +58,14 @@ public boolean doMonitor(ServiceEmitter emitter) final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder(); emitter.emit(builder.setMetric("mergeBuffer/pendingRequests", mergeBufferPool.getPendingRequests())); - emitter.emit(builder.setMetric("mergeBuffer/used", mergeBufferPool.getUsedResourcesCount())); GroupByStatsProvider.AggregateStats statsContainer = groupByStatsProvider.getStatsSince(); if (statsContainer.getMergeBufferQueries() > 0) { emitter.emit(builder.setMetric("mergeBuffer/queries", statsContainer.getMergeBufferQueries())); - emitter.emit(builder.setMetric( - "mergeBuffer/acquisitionTimeNs", - statsContainer.getMergeBufferAcquisitionTimeNs() - )); - emitter.emit(builder.setMetric( - "mergeBuffer/maxAcquisitionTimeNs", - statsContainer.getMaxMergeBufferAcquisitionTimeNs() - )); - emitter.emit( - builder.setMetric("mergeBuffer/acquisitionTimeNs", statsContainer.getMergeBufferAcquisitionTimeNs()) - ); + emitter.emit(builder.setMetric("mergeBuffer/acquisitionTimeNs", statsContainer.getMergeBufferAcquisitionTimeNs())); + emitter.emit(builder.setMetric("mergeBuffer/maxAcquisitionTimeNs", statsContainer.getMaxMergeBufferAcquisitionTimeNs())); emitter.emit(builder.setMetric("mergeBuffer/bytesUsed", statsContainer.getTotalMergeBufferUsedBytes())); emitter.emit(builder.setMetric("mergeBuffer/maxBytesUsed", statsContainer.getMaxMergeBufferUsedBytes())); } From ce0590098e03b219e081bcf1a6e5a6f654f3376a Mon Sep 17 00:00:00 2001 From: GWphua Date: Thu, 26 Feb 2026 16:42:55 +0800 Subject: [PATCH 29/32] Standardize maxMergeBufferUsedBytes --- .../druid/query/groupby/GroupByStatsProvider.java | 14 +++++++------- .../epinephelinae/AbstractBufferHashGrouper.java | 2 +- .../groupby/epinephelinae/BufferHashGrouper.java | 2 +- .../epinephelinae/LimitedBufferHashGrouper.java | 4 ++-- .../groupby/epinephelinae/SpillingGrouper.java | 6 +++--- .../query/groupby/GroupByStatsProviderTest.java | 12 ++++++------ .../epinephelinae/BufferHashGrouperTest.java | 12 ++++++------ .../LimitedBufferHashGrouperTest.java | 12 ++++++------ .../server/metrics/GroupByStatsMonitorTest.java | 6 +++--- 9 files changed, 35 insertions(+), 35 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java index 6074933cac97..f6b92a7b62c1 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java @@ -183,8 +183,8 @@ public void addQueryStats(PerQueryStats perQueryStats) maxMergeBufferAcquisitionTimeNs, perQueryStats.getMergeBufferAcquisitionTimeNs() ); - totalMergeBufferUsedBytes += perQueryStats.getMergeBufferTotalUsedBytes(); - maxMergeBufferUsedBytes = Math.max(maxMergeBufferUsedBytes, perQueryStats.getMergeBufferTotalUsedBytes()); + totalMergeBufferUsedBytes += perQueryStats.getMaxMergeBufferUsedBytes(); + maxMergeBufferUsedBytes = Math.max(maxMergeBufferUsedBytes, perQueryStats.getMaxMergeBufferUsedBytes()); } if (perQueryStats.getSpilledBytes() > 0) { @@ -215,7 +215,7 @@ public void reset() public static class PerQueryStats { private final AtomicLong mergeBufferAcquisitionTimeNs = new AtomicLong(0); - private final AtomicLong mergeBufferTotalUsedBytes = new AtomicLong(0); + private final AtomicLong maxMergeBufferUsedBytes = new AtomicLong(0); private final AtomicLong spilledBytes = new AtomicLong(0); private final AtomicLong mergeDictionarySize = new AtomicLong(0); @@ -224,9 +224,9 @@ public void mergeBufferAcquisitionTime(long delay) mergeBufferAcquisitionTimeNs.addAndGet(delay); } - public void mergeBufferTotalUsedBytes(long bytes) + public void maxMergeBufferUsedBytes(long bytes) { - mergeBufferTotalUsedBytes.addAndGet(bytes); + maxMergeBufferUsedBytes.addAndGet(bytes); } public void spilledBytes(long bytes) @@ -244,9 +244,9 @@ public long getMergeBufferAcquisitionTimeNs() return mergeBufferAcquisitionTimeNs.get(); } - public long getMergeBufferTotalUsedBytes() + public long getMaxMergeBufferUsedBytes() { - return mergeBufferTotalUsedBytes.get(); + return maxMergeBufferUsedBytes.get(); } public long getSpilledBytes() diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java index 9de75f7e2822..a5edb38cfa4b 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java @@ -180,7 +180,7 @@ public void close() * This method is implemented to return the highest memory value used, this is helpful especially in * reporting the highest number of bytes used throughout the entire query lifecycle. */ - public long getMergeBufferUsedBytes() + public long getMaxMergeBufferUsedBytes() { return hashTable.getMaxMergeBufferUsedBytes(); } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java index 227a1b4211e1..670a03cb2dee 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java @@ -153,7 +153,7 @@ public void reset() } @Override - public long getMergeBufferUsedBytes() + public long getMaxMergeBufferUsedBytes() { if (!initialized) { return 0L; diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java index dd737632516a..a9b993efa669 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java @@ -459,13 +459,13 @@ public boolean validateBufferCapacity(int bufferCapacity) } @Override - public long getMergeBufferUsedBytes() + public long getMaxMergeBufferUsedBytes() { if (!initialized) { return 0L; } - long hashTableUsage = super.getMergeBufferUsedBytes(); + long hashTableUsage = super.getMaxMergeBufferUsedBytes(); long offSetHeapUsage = offsetHeap.getMaxMergeBufferUsedBytes(); return hashTableUsage + offSetHeapUsage; } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java index 160e07df69d0..688c9f065661 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java @@ -219,15 +219,15 @@ public void reset() public void close() { perQueryStats.dictionarySize(getDictionarySizeEstimate()); - perQueryStats.mergeBufferTotalUsedBytes(getMergeBufferUsedBytes()); + perQueryStats.maxMergeBufferUsedBytes(getMaxMergeBufferUsedBytes()); grouper.close(); keySerde.reset(); deleteFiles(); } - private long getMergeBufferUsedBytes() + private long getMaxMergeBufferUsedBytes() { - return grouper.isInitialized() ? grouper.getMergeBufferUsedBytes() : 0L; + return grouper.isInitialized() ? grouper.getMaxMergeBufferUsedBytes() : 0L; } private long getDictionarySizeEstimate() diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java index 8e025a4599ff..207b37d65f4b 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByStatsProviderTest.java @@ -35,7 +35,7 @@ public void testMetricCollection() stats1.mergeBufferAcquisitionTime(300); stats1.mergeBufferAcquisitionTime(400); - stats1.mergeBufferTotalUsedBytes(50); + stats1.maxMergeBufferUsedBytes(50); stats1.spilledBytes(200); stats1.spilledBytes(400); stats1.dictionarySize(100); @@ -46,7 +46,7 @@ public void testMetricCollection() stats2.mergeBufferAcquisitionTime(500); stats2.mergeBufferAcquisitionTime(600); - stats2.mergeBufferTotalUsedBytes(100); + stats2.maxMergeBufferUsedBytes(100); stats2.spilledBytes(400); stats2.spilledBytes(600); stats2.dictionarySize(300); @@ -88,28 +88,28 @@ public void testMetricsWithMultipleQueries() QueryResourceId r1 = new QueryResourceId("r1"); GroupByStatsProvider.PerQueryStats stats1 = statsProvider.getPerQueryStatsContainer(r1); stats1.mergeBufferAcquisitionTime(2000); - stats1.mergeBufferTotalUsedBytes(50); + stats1.maxMergeBufferUsedBytes(50); stats1.spilledBytes(100); stats1.dictionarySize(200); QueryResourceId r2 = new QueryResourceId("r2"); GroupByStatsProvider.PerQueryStats stats2 = statsProvider.getPerQueryStatsContainer(r2); stats2.mergeBufferAcquisitionTime(100); - stats2.mergeBufferTotalUsedBytes(500); + stats2.maxMergeBufferUsedBytes(500); stats2.spilledBytes(150); stats2.dictionarySize(250); QueryResourceId r3 = new QueryResourceId("r3"); GroupByStatsProvider.PerQueryStats stats3 = statsProvider.getPerQueryStatsContainer(r3); stats3.mergeBufferAcquisitionTime(200); - stats3.mergeBufferTotalUsedBytes(100); + stats3.maxMergeBufferUsedBytes(100); stats3.spilledBytes(3000); stats3.dictionarySize(300); QueryResourceId r4 = new QueryResourceId("r4"); GroupByStatsProvider.PerQueryStats stats4 = statsProvider.getPerQueryStatsContainer(r4); stats4.mergeBufferAcquisitionTime(300); - stats4.mergeBufferTotalUsedBytes(75); + stats4.maxMergeBufferUsedBytes(75); stats4.spilledBytes(200); stats4.dictionarySize(1500); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java index e07357cccc6c..7253b9b0b6ab 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java @@ -177,27 +177,27 @@ public void testMaxMergeBufferUsedBytesWorksNormally() ); grouper.init(); - long initialUsage = grouper.getMergeBufferUsedBytes(); + long initialUsage = grouper.getMaxMergeBufferUsedBytes(); Assert.assertEquals(0L, initialUsage); columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L))); grouper.aggregate(new IntKey(1)); - final long expectedBucketSize = grouper.getMergeBufferUsedBytes(); + final long expectedBucketSize = grouper.getMaxMergeBufferUsedBytes(); grouper.aggregate(new IntKey(2)); grouper.aggregate(new IntKey(3)); - Assert.assertEquals(3L * expectedBucketSize, grouper.getMergeBufferUsedBytes()); + Assert.assertEquals(3L * expectedBucketSize, grouper.getMaxMergeBufferUsedBytes()); grouper.aggregate(new IntKey(4)); grouper.aggregate(new IntKey(5)); - Assert.assertEquals(5L * expectedBucketSize, grouper.getMergeBufferUsedBytes()); + Assert.assertEquals(5L * expectedBucketSize, grouper.getMaxMergeBufferUsedBytes()); grouper.reset(); Assert.assertEquals(0, grouper.getSize()); - Assert.assertEquals(5L * expectedBucketSize, grouper.getMergeBufferUsedBytes()); + Assert.assertEquals(5L * expectedBucketSize, grouper.getMaxMergeBufferUsedBytes()); grouper.aggregate(new IntKey(6)); grouper.aggregate(new IntKey(7)); @@ -206,7 +206,7 @@ public void testMaxMergeBufferUsedBytesWorksNormally() grouper.aggregate(new IntKey(10)); grouper.aggregate(new IntKey(11)); - Assert.assertEquals(6L * expectedBucketSize, grouper.getMergeBufferUsedBytes()); + Assert.assertEquals(6L * expectedBucketSize, grouper.getMaxMergeBufferUsedBytes()); grouper.close(); } diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java index 0909d26eabb3..df9851ba829b 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouperTest.java @@ -313,22 +313,22 @@ public void testMaxMergeBufferUsedBytesTracksMaxUsageAfterReset() final GroupByTestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory(); final LimitedBufferHashGrouper grouper = makeGrouper(columnSelectorFactory, 20000); - Assert.assertEquals(0L, grouper.getMergeBufferUsedBytes()); + Assert.assertEquals(0L, grouper.getMaxMergeBufferUsedBytes()); columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L))); Assert.assertTrue(String.valueOf(KEY_BASE), grouper.aggregate(new IntKey(KEY_BASE)).isOk()); - final long usagePerEntry = grouper.getMergeBufferUsedBytes(); + final long usagePerEntry = grouper.getMaxMergeBufferUsedBytes(); grouper.reset(); Assert.assertEquals(0, grouper.getSize()); - Assert.assertEquals(usagePerEntry, grouper.getMergeBufferUsedBytes()); + Assert.assertEquals(usagePerEntry, grouper.getMaxMergeBufferUsedBytes()); // Add 10 entries after reset for (int i = 0; i < 10; i++) { Assert.assertTrue(String.valueOf(i + KEY_BASE), grouper.aggregate(new IntKey(i + KEY_BASE)).isOk()); } - Assert.assertEquals(10 * usagePerEntry, grouper.getMergeBufferUsedBytes()); + Assert.assertEquals(10 * usagePerEntry, grouper.getMaxMergeBufferUsedBytes()); } @Test @@ -342,7 +342,7 @@ public void testMaxMergeBufferUsedBytesAfterBufferSwap() // Calculate usage per entry from first entry Assert.assertTrue(String.valueOf(KEY_BASE), grouper.aggregate(new IntKey(KEY_BASE)).isOk()); - final long usagePerEntry = grouper.getMergeBufferUsedBytes(); + final long usagePerEntry = grouper.getMaxMergeBufferUsedBytes(); // This results in 13 swaps and final size of 116 (100 keys + 16 new keys after last swap) for (int i = 1; i < NUM_ROWS; i++) { @@ -360,7 +360,7 @@ public void testMaxMergeBufferUsedBytesAfterBufferSwap() // Peak usage is the sum of hash table peak and heap peak, which peak at different sizes... final long expectedPeakUsage = hashTablePeak + heapPeak; - Assert.assertEquals(expectedPeakUsage, grouper.getMergeBufferUsedBytes()); + Assert.assertEquals(expectedPeakUsage, grouper.getMaxMergeBufferUsedBytes()); } private static LimitedBufferHashGrouper makeGrouper( diff --git a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java index 877cd335a34f..eaca043e02e7 100644 --- a/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java +++ b/server/src/test/java/org/apache/druid/server/metrics/GroupByStatsMonitorTest.java @@ -202,21 +202,21 @@ public void testMonitoringWithMultipleResources() QueryResourceId r1 = new QueryResourceId("r1"); GroupByStatsProvider.PerQueryStats stats1 = statsProvider.getPerQueryStatsContainer(r1); stats1.mergeBufferAcquisitionTime(100); - stats1.mergeBufferTotalUsedBytes(50); + stats1.maxMergeBufferUsedBytes(50); stats1.spilledBytes(200); stats1.dictionarySize(100); QueryResourceId r2 = new QueryResourceId("r2"); GroupByStatsProvider.PerQueryStats stats2 = statsProvider.getPerQueryStatsContainer(r2); stats2.mergeBufferAcquisitionTime(500); - stats2.mergeBufferTotalUsedBytes(30); + stats2.maxMergeBufferUsedBytes(30); stats2.spilledBytes(100); stats2.dictionarySize(300); QueryResourceId r3 = new QueryResourceId("r3"); GroupByStatsProvider.PerQueryStats stats3 = statsProvider.getPerQueryStatsContainer(r3); stats3.mergeBufferAcquisitionTime(200); - stats3.mergeBufferTotalUsedBytes(150); + stats3.maxMergeBufferUsedBytes(150); stats3.spilledBytes(800); stats3.dictionarySize(200); From 32c1ed1cce9da1b99dbcc4fe7deba1cfed6169af Mon Sep 17 00:00:00 2001 From: GWphua Date: Thu, 26 Feb 2026 16:46:01 +0800 Subject: [PATCH 30/32] Test duplicate buffer adds --- .../query/groupby/epinephelinae/BufferHashGrouperTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java index 7253b9b0b6ab..b8c16d366572 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java @@ -157,7 +157,7 @@ public void testNoGrowing() } @Test - public void testMaxMergeBufferUsedBytesWorksNormally() + public void testMaxMergeBufferUsedBytes() { final GroupByTestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory(); final BufferHashGrouper grouper = new BufferHashGrouper<>( @@ -193,6 +193,10 @@ public void testMaxMergeBufferUsedBytesWorksNormally() grouper.aggregate(new IntKey(4)); grouper.aggregate(new IntKey(5)); + // Duplicate adds will not affect merge buffer used. + grouper.aggregate(new IntKey(1)); + grouper.aggregate(new IntKey(5)); + Assert.assertEquals(5L * expectedBucketSize, grouper.getMaxMergeBufferUsedBytes()); grouper.reset(); From 08c235acb8bab9d8b3b77baadfee1a637c3a5017 Mon Sep 17 00:00:00 2001 From: GWphua Date: Thu, 26 Feb 2026 17:01:13 +0800 Subject: [PATCH 31/32] Test --- .../query/groupby/epinephelinae/BufferHashGrouperTest.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java index b8c16d366572..ec5d64794cc7 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouperTest.java @@ -193,22 +193,18 @@ public void testMaxMergeBufferUsedBytes() grouper.aggregate(new IntKey(4)); grouper.aggregate(new IntKey(5)); - // Duplicate adds will not affect merge buffer used. - grouper.aggregate(new IntKey(1)); - grouper.aggregate(new IntKey(5)); - Assert.assertEquals(5L * expectedBucketSize, grouper.getMaxMergeBufferUsedBytes()); grouper.reset(); Assert.assertEquals(0, grouper.getSize()); Assert.assertEquals(5L * expectedBucketSize, grouper.getMaxMergeBufferUsedBytes()); + grouper.aggregate(new IntKey(1)); grouper.aggregate(new IntKey(6)); grouper.aggregate(new IntKey(7)); grouper.aggregate(new IntKey(8)); grouper.aggregate(new IntKey(9)); grouper.aggregate(new IntKey(10)); - grouper.aggregate(new IntKey(11)); Assert.assertEquals(6L * expectedBucketSize, grouper.getMaxMergeBufferUsedBytes()); From dd0267ba2101b5a8b3723d14a94030d73d56ca14 Mon Sep 17 00:00:00 2001 From: GWphua Date: Thu, 26 Feb 2026 17:06:58 +0800 Subject: [PATCH 32/32] Add javadocs for update --- .../query/groupby/epinephelinae/ByteBufferHashTable.java | 4 ++++ .../query/groupby/epinephelinae/LimitedBufferHashGrouper.java | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java index 0f15a4afcda3..f348c6ba7fb0 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferHashTable.java @@ -395,6 +395,10 @@ public int getGrowthCount() return growthCount; } + /** + * To maintain an accurate tracking of the maximum bytes used per query, this function is to be called immediately + * whenever either of {@link #size} or {@link #bucketSizeWithHash} is changed. + */ protected void updateMaxMergeBufferUsedBytes() { maxMergeBufferUsedBytes = Math.max(maxMergeBufferUsedBytes, (long) size * bucketSizeWithHash); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java index a9b993efa669..873dbc776bda 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java @@ -521,8 +521,8 @@ public AlternatingByteBufferHashTable( public void reset() { size = 0; - growthCount = 0; updateMaxMergeBufferUsedBytes(); + growthCount = 0; // clear the used bits of the first buffer for (int i = 0; i < maxBuckets; i++) { subHashTableBuffers[0].put(i * bucketSizeWithHash, (byte) 0);