Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.query.groupby.epinephelinae;

import javax.annotation.Nullable;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;

/**
* OutputStream that starts buffering in a heap byte array and switches to a disk file via
* {@link LimitedTemporaryStorage} once the written bytes exceed the threshold. This avoids
* the createFile/delete round-trip for small spills while bounding peak extra heap to the
* threshold size.
*/
public class SpillOutputStream extends OutputStream
{
private static final int INITIAL_BUFFER_SIZE = 4096;

private final LimitedTemporaryStorage temporaryStorage;
private final long threshold;
@Nullable
private ByteArrayOutputStream memoryBuffer;
private LimitedTemporaryStorage.LimitedOutputStream fileOut;
private boolean thresholdExceeded;

SpillOutputStream(LimitedTemporaryStorage temporaryStorage, long threshold)
{
this.temporaryStorage = temporaryStorage;
this.threshold = threshold;
this.memoryBuffer = new ByteArrayOutputStream((int) Math.min(threshold, INITIAL_BUFFER_SIZE));
}

@Override
public void write(int b) throws IOException
{
checkThreshold(1);
if (fileOut != null) {
fileOut.write(b);
} else {
memoryBuffer.write(b);
}
}

@Override
public void write(byte[] b, int off, int len) throws IOException
{
checkThreshold(len);
if (fileOut != null) {
fileOut.write(b, off, len);
} else {
memoryBuffer.write(b, off, len);
}
}

@Override
public void flush() throws IOException
{
if (fileOut != null) {
fileOut.flush();
}
}

@Override
public void close() throws IOException
{
if (fileOut != null) {
fileOut.close();
}
}

boolean isInMemory()
{
return fileOut == null;
}

byte[] toByteArray()
{
return memoryBuffer.toByteArray();
}

File getFile()
{
return fileOut.getFile();
}

private void checkThreshold(int count) throws IOException
{
if (!thresholdExceeded && memoryBuffer.size() + count > threshold) {
thresholdExceeded = true;
switchToDisk();
}
}

private void switchToDisk() throws IOException
{
final LimitedTemporaryStorage.LimitedOutputStream out = temporaryStorage.createFile();
try {
memoryBuffer.writeTo(out);
}
catch (IOException e) {
out.close();
throw e;
}
fileOut = out;
memoryBuffer = null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.file.Files;
import java.util.ArrayList;
Expand Down Expand Up @@ -374,28 +375,24 @@ public CloseableIterator<Entry<KeyType>> iterator(final boolean sorted)

private void spill() throws IOException
{
// Stream directly to a temp file first, then check the file size. If the file is small
// (serialized size much smaller than the pre-allocated buffer, e.g. HLL sketches in List mode),
// read it back into memory for batching to avoid creating thousands of tiny disk files.
// If the file is already large enough, keep it on disk as-is.
final File file;
final SpillOutputStream spillOut = new SpillOutputStream(temporaryStorage, minSpillFileSize);
try (CloseableIterator<Entry<KeyType>> iterator = grouper.iterator(true)) {
file = spill(iterator);
serializeToStream(iterator, spillOut);
}

pendingDictionaryEntries.addAll(keySerde.getDictionary());
grouper.reset();

final long fileSize = file.length();
if (fileSize < minSpillFileSize) {
pendingSpillRuns.add(Files.readAllBytes(file.toPath()));
pendingSpillBytes += fileSize;
temporaryStorage.delete(file);
if (spillOut.isInMemory()) {
final byte[] bytes = spillOut.toByteArray();
pendingSpillRuns.add(bytes);
pendingSpillBytes += bytes.length;

if (pendingSpillBytes >= minSpillFileSize) {
flushPendingRunsToDisk();
}
} else {
files.add(file);
files.add(spillOut.getFile());
dictionaryFiles.add(spill(pendingDictionaryEntries.iterator()));
pendingDictionaryEntries.clear();
}
Expand Down Expand Up @@ -483,20 +480,24 @@ public Entry<KeyType> apply(Entry<KeyType> entry)
);
}

private <T> File spill(Iterator<T> iterator) throws IOException
private <T> void serializeToStream(Iterator<T> iterator, OutputStream out) throws IOException
{
try (
final LimitedTemporaryStorage.LimitedOutputStream out = temporaryStorage.createFile();
final LZ4BlockOutputStream compressedOut = new LZ4BlockOutputStream(out);
final JsonGenerator jsonGenerator = spillMapper.getFactory().createGenerator(compressedOut)
) {
final SerializerProvider serializers = spillMapper.getSerializerProviderInstance();

while (iterator.hasNext()) {
BaseQuery.checkInterrupted();
JacksonUtils.writeObjectUsingSerializerProvider(jsonGenerator, serializers, iterator.next());
}
}
}

private <T> File spill(Iterator<T> iterator) throws IOException
{
try (final LimitedTemporaryStorage.LimitedOutputStream out = temporaryStorage.createFile()) {
serializeToStream(iterator, out);
return out.getFile();
}
}
Expand Down
Loading
Loading