diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index 1a15a5314f88..8983b79c5413 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -90,6 +90,8 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`mergeBuffer/maxAcquisitionTimeNs`|Maximum time in nanoseconds to acquire merge buffer for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/maxBytesUsed`|Maximum number of bytes used by merge buffers for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/maxSpilledBytes`|Maximum number of bytes spilled to disk by any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| @@ -117,6 +119,8 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool.|This metric is only available if the `GroupByStatsMonitor` module is included.|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`mergeBuffer/maxAcquisitionTimeNs`|Maximum time in nanoseconds to acquire merge buffer for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| +|`mergeBuffer/maxBytesUsed`|Maximum number of bytes used by merge buffers for any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| |`groupBy/maxSpilledBytes`|Maximum number of bytes spilled to disk by any single groupBy query within the emission period.|This metric is only available if the `GroupByStatsMonitor` module is included.|Varies| @@ -147,6 +151,8 @@ to represent the task ID are deprecated and will be removed in a future release. |`mergeBuffer/queries`|Number of groupBy queries that acquired a batch of buffers from the merge buffer pool. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Depends on the number of groupBy queries needing merge buffers.| |`mergeBuffer/acquisitionTimeNs`|Total time in nanoseconds to acquire merge buffer for groupBy queries. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`mergeBuffer/maxAcquisitionTimeNs`|Maximum time in nanoseconds to acquire merge buffer for any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| +|`mergeBuffer/bytesUsed`|Number of bytes used by merge buffers to process groupBy queries.|This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| +|`mergeBuffer/maxBytesUsed`|Maximum number of bytes used by merge buffers for any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/spilledQueries`|Number of groupBy queries that have spilled onto the disk. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/spilledBytes`|Number of bytes spilled on the disk by the groupBy queries. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| |`groupBy/maxSpilledBytes`|Maximum number of bytes spilled to disk by any single groupBy query within the emission period. This metric is only available if the `GroupByStatsMonitor` module is included.|`dataSource`, `taskId`|Varies| diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java index 51f564005555..f6b92a7b62c1 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByStatsProvider.java @@ -27,7 +27,8 @@ import java.util.concurrent.atomic.AtomicLong; /** - * Metrics collector for groupBy queries like spilled bytes, merge buffer acquistion time, dictionary size. + * Collects groupBy query metrics (spilled bytes, merge buffer usage, dictionary size) per-query, then + * aggregates them when queries complete. Stats are retrieved and reset periodically via {@link #getStatsSince()}. */ @LazySingleton public class GroupByStatsProvider @@ -60,7 +61,9 @@ public synchronized void closeQuery(QueryResourceId resourceId) public synchronized AggregateStats getStatsSince() { - return aggregateStatsContainer.reset(); + AggregateStats aggregateStats = new AggregateStats(aggregateStatsContainer); + aggregateStatsContainer.reset(); + return aggregateStats; } public static class AggregateStats @@ -68,6 +71,8 @@ public static class AggregateStats private long mergeBufferQueries = 0; private long mergeBufferAcquisitionTimeNs = 0; private long maxMergeBufferAcquisitionTimeNs = 0; + private long totalMergeBufferUsedBytes = 0; + private long maxMergeBufferUsedBytes = 0; private long spilledQueries = 0; private long spilledBytes = 0; private long maxSpilledBytes = 0; @@ -78,10 +83,28 @@ public AggregateStats() { } + public AggregateStats(AggregateStats aggregateStats) + { + this( + aggregateStats.mergeBufferQueries, + aggregateStats.mergeBufferAcquisitionTimeNs, + aggregateStats.maxMergeBufferAcquisitionTimeNs, + aggregateStats.totalMergeBufferUsedBytes, + aggregateStats.maxMergeBufferUsedBytes, + aggregateStats.spilledQueries, + aggregateStats.spilledBytes, + aggregateStats.maxSpilledBytes, + aggregateStats.mergeDictionarySize, + aggregateStats.maxMergeDictionarySize + ); + } + public AggregateStats( long mergeBufferQueries, long mergeBufferAcquisitionTimeNs, long maxMergeBufferAcquisitionTimeNs, + long totalMergeBufferUsedBytes, + long maxMergeBufferUsedBytes, long spilledQueries, long spilledBytes, long maxSpilledBytes, @@ -92,6 +115,8 @@ public AggregateStats( this.mergeBufferQueries = mergeBufferQueries; this.mergeBufferAcquisitionTimeNs = mergeBufferAcquisitionTimeNs; this.maxMergeBufferAcquisitionTimeNs = maxMergeBufferAcquisitionTimeNs; + this.totalMergeBufferUsedBytes = totalMergeBufferUsedBytes; + this.maxMergeBufferUsedBytes = maxMergeBufferUsedBytes; this.spilledQueries = spilledQueries; this.spilledBytes = spilledBytes; this.maxSpilledBytes = maxSpilledBytes; @@ -114,6 +139,16 @@ public long getMaxMergeBufferAcquisitionTimeNs() return maxMergeBufferAcquisitionTimeNs; } + public long getTotalMergeBufferUsedBytes() + { + return totalMergeBufferUsedBytes; + } + + public long getMaxMergeBufferUsedBytes() + { + return maxMergeBufferUsedBytes; + } + public long getSpilledQueries() { return spilledQueries; @@ -148,6 +183,8 @@ public void addQueryStats(PerQueryStats perQueryStats) maxMergeBufferAcquisitionTimeNs, perQueryStats.getMergeBufferAcquisitionTimeNs() ); + totalMergeBufferUsedBytes += perQueryStats.getMaxMergeBufferUsedBytes(); + maxMergeBufferUsedBytes = Math.max(maxMergeBufferUsedBytes, perQueryStats.getMaxMergeBufferUsedBytes()); } if (perQueryStats.getSpilledBytes() > 0) { @@ -160,36 +197,25 @@ public void addQueryStats(PerQueryStats perQueryStats) maxMergeDictionarySize = Math.max(maxMergeDictionarySize, perQueryStats.getMergeDictionarySize()); } - public AggregateStats reset() + public void reset() { - AggregateStats aggregateStats = - new AggregateStats( - mergeBufferQueries, - mergeBufferAcquisitionTimeNs, - maxMergeBufferAcquisitionTimeNs, - spilledQueries, - spilledBytes, - maxSpilledBytes, - mergeDictionarySize, - maxMergeDictionarySize - ); - this.mergeBufferQueries = 0; this.mergeBufferAcquisitionTimeNs = 0; this.maxMergeBufferAcquisitionTimeNs = 0; + this.totalMergeBufferUsedBytes = 0; + this.maxMergeBufferUsedBytes = 0; this.spilledQueries = 0; this.spilledBytes = 0; this.maxSpilledBytes = 0; this.mergeDictionarySize = 0; this.maxMergeDictionarySize = 0; - - return aggregateStats; } } public static class PerQueryStats { private final AtomicLong mergeBufferAcquisitionTimeNs = new AtomicLong(0); + private final AtomicLong maxMergeBufferUsedBytes = new AtomicLong(0); private final AtomicLong spilledBytes = new AtomicLong(0); private final AtomicLong mergeDictionarySize = new AtomicLong(0); @@ -198,6 +224,11 @@ public void mergeBufferAcquisitionTime(long delay) mergeBufferAcquisitionTimeNs.addAndGet(delay); } + public void maxMergeBufferUsedBytes(long bytes) + { + maxMergeBufferUsedBytes.addAndGet(bytes); + } + public void spilledBytes(long bytes) { spilledBytes.addAndGet(bytes); @@ -213,6 +244,11 @@ public long getMergeBufferAcquisitionTimeNs() return mergeBufferAcquisitionTimeNs.get(); } + public long getMaxMergeBufferUsedBytes() + { + return maxMergeBufferUsedBytes.get(); + } + public long getSpilledBytes() { return spilledBytes.get(); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java index 70cf5832cf33..a5edb38cfa4b 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java @@ -173,6 +173,18 @@ public void close() aggregators.reset(); } + /** + * Retrieves the size of the merge buffers used for this groupby query. This value is retrieved when + * {@link SpillingGrouper#close()} is called. + *
+ * This method is implemented to return the highest memory value used, this is helpful especially in + * reporting the highest number of bytes used throughout the entire query lifecycle. + */ + public long getMaxMergeBufferUsedBytes() + { + return hashTable.getMaxMergeBufferUsedBytes(); + } + /** * Populate a {@link ReusableEntry} with values from a particular bucket. */ diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java index 4970ebe9e83e..670a03cb2dee 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java @@ -26,7 +26,6 @@ import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; -import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.AbstractList; import java.util.Collections; @@ -50,7 +49,6 @@ public class BufferHashGrouper
+ * The table uses a contiguous slice of the input {@link ByteBuffer} as its backing store. Each bucket holds
+ * at most one entry, and occupies {@code bucketSizeWithHash} number of bytes. Collisions are resolved by continuously
+ * probing the next bucket to find an empty bucket to slot the new entry. The current table view {@code tableBuffer}
+ * is maintained as a {@link ByteBuffer} slice that moves and grows within the arena as the table expands.
+ */
public class ByteBufferHashTable
{
public static int calculateTableArenaSizeWithPerBucketAdditionalSize(
@@ -79,6 +87,9 @@ public static int calculateTableArenaSizeWithFixedAdditionalSize(
@Nullable
protected BucketUpdateHandler bucketUpdateHandler;
+ // Tracks maximum bytes used for the entire lifecycle of this hash table.
+ protected long maxMergeBufferUsedBytes;
+
public ByteBufferHashTable(
float maxLoadFactor,
int initialBuckets,
@@ -97,6 +108,7 @@ public ByteBufferHashTable(
this.maxSizeForTesting = maxSizeForTesting;
this.tableArenaSize = buffer.capacity();
this.bucketUpdateHandler = bucketUpdateHandler;
+ this.maxMergeBufferUsedBytes = 0;
}
public void reset()
@@ -139,6 +151,7 @@ public void reset()
bufferDup.position(tableStart);
bufferDup.limit(tableStart + maxBuckets * bucketSizeWithHash);
tableBuffer = bufferDup.slice();
+ updateMaxMergeBufferUsedBytes();
// Clear used bits of new table
for (int i = 0; i < maxBuckets; i++) {
@@ -245,6 +258,7 @@ protected void initializeNewBucketKey(
tableBuffer.putInt(Groupers.getUsedFlag(keyHash));
tableBuffer.put(keyBuffer);
size++;
+ updateMaxMergeBufferUsedBytes();
if (bucketUpdateHandler != null) {
bucketUpdateHandler.handleNewBucket(offset);
@@ -381,6 +395,20 @@ public int getGrowthCount()
return growthCount;
}
+ /**
+ * To maintain an accurate tracking of the maximum bytes used per query, this function is to be called immediately
+ * whenever either of {@link #size} or {@link #bucketSizeWithHash} is changed.
+ */
+ protected void updateMaxMergeBufferUsedBytes()
+ {
+ maxMergeBufferUsedBytes = Math.max(maxMergeBufferUsedBytes, (long) size * bucketSizeWithHash);
+ }
+
+ public long getMaxMergeBufferUsedBytes()
+ {
+ return maxMergeBufferUsedBytes;
+ }
+
public interface BucketUpdateHandler
{
void handleNewBucket(int bucketOffset);
diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java
index 28de255c13a0..33a79451993e 100644
--- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java
+++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferIntList.java
@@ -30,6 +30,8 @@ public class ByteBufferIntList
private final int maxElements;
private int numElements;
+ private int maxMergeBufferUsedBytes;
+
public ByteBufferIntList(
ByteBuffer buffer,
int maxElements
@@ -38,6 +40,7 @@ public ByteBufferIntList(
this.buffer = buffer;
this.maxElements = maxElements;
this.numElements = 0;
+ this.maxMergeBufferUsedBytes = 0;
if (buffer.capacity() < (maxElements * Integer.BYTES)) {
throw new IAE(
@@ -55,6 +58,7 @@ public void add(int val)
}
buffer.putInt(numElements * Integer.BYTES, val);
numElements++;
+ maxMergeBufferUsedBytes = Math.max(maxMergeBufferUsedBytes, numElements * Integer.BYTES);
}
public void set(int index, int val)
@@ -71,4 +75,9 @@ public void reset()
{
numElements = 0;
}
+
+ public int getMaxMergeBufferUsedBytes()
+ {
+ return maxMergeBufferUsedBytes;
+ }
}
diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferMinMaxOffsetHeap.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferMinMaxOffsetHeap.java
index cfa7295e6b43..ff2746bca29c 100644
--- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferMinMaxOffsetHeap.java
+++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ByteBufferMinMaxOffsetHeap.java
@@ -44,6 +44,7 @@ public class ByteBufferMinMaxOffsetHeap
private int heapSize;
private int maxHeapSize;
+ private int maxMergeBufferUsedBytes;
public ByteBufferMinMaxOffsetHeap(
ByteBuffer buf,
@@ -55,6 +56,7 @@ public ByteBufferMinMaxOffsetHeap(
this.buf = buf;
this.limit = limit;
this.heapSize = 0;
+ this.maxMergeBufferUsedBytes = 0;
this.minComparator = minComparator;
this.maxComparator = Ordering.from(minComparator).reverse();
this.heapIndexUpdater = heapIndexUpdater;
@@ -71,9 +73,9 @@ public int addOffset(int offset)
int pos = heapSize;
buf.putInt(pos * Integer.BYTES, offset);
heapSize++;
- if (heapSize > maxHeapSize) {
- maxHeapSize = heapSize;
- }
+
+ maxHeapSize = Math.max(maxHeapSize, heapSize);
+ maxMergeBufferUsedBytes = Math.max(maxMergeBufferUsedBytes, maxHeapSize * Integer.BYTES);
if (heapIndexUpdater != null) {
heapIndexUpdater.updateHeapIndexForOffset(offset, pos);
@@ -226,6 +228,11 @@ public int getHeapSize()
return heapSize;
}
+ public int getMaxMergeBufferUsedBytes()
+ {
+ return maxMergeBufferUsedBytes;
+ }
+
private void bubbleUp(int pos)
{
if (isEvenLevel(pos)) {
diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java
index 8242c9d8cf5c..b4b4cb347019 100644
--- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java
+++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/ConcurrentGrouper.java
@@ -332,7 +332,7 @@ public void reset()
throw new ISE("Grouper is closed");
}
- groupers.forEach(Grouper::reset);
+ groupers.forEach(SpillingGrouper::reset);
}
@Override
@@ -496,7 +496,7 @@ public void close()
{
if (!closed) {
closed = true;
- groupers.forEach(Grouper::close);
+ groupers.forEach(SpillingGrouper::close);
}
}
diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java
index 0627fba0333d..873dbc776bda 100644
--- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java
+++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java
@@ -458,6 +458,18 @@ public boolean validateBufferCapacity(int bufferCapacity)
}
}
+ @Override
+ public long getMaxMergeBufferUsedBytes()
+ {
+ if (!initialized) {
+ return 0L;
+ }
+
+ long hashTableUsage = super.getMaxMergeBufferUsedBytes();
+ long offSetHeapUsage = offsetHeap.getMaxMergeBufferUsedBytes();
+ return hashTableUsage + offSetHeapUsage;
+ }
+
private class AlternatingByteBufferHashTable extends ByteBufferHashTable
{
// The base buffer is split into two alternating halves, with one sub-buffer in use at a given time.
@@ -509,6 +521,7 @@ public AlternatingByteBufferHashTable(
public void reset()
{
size = 0;
+ updateMaxMergeBufferUsedBytes();
growthCount = 0;
// clear the used bits of the first buffer
for (int i = 0; i < maxBuckets; i++) {
@@ -570,6 +583,7 @@ public void adjustTableWhenFull()
}
size = numCopied;
+ updateMaxMergeBufferUsedBytes();
tableBuffer = newTableBuffer;
growthCount++;
}
diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java
index fadcfa02c95d..688c9f065661 100644
--- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java
+++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/SpillingGrouper.java
@@ -68,7 +68,7 @@ public class SpillingGrouper