diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index b472b4062b7..87140b21fe9 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -137,10 +137,13 @@ def _gold_tests(self, gold_dir):
skip.add("Go")
skip.add("JS")
skip.add("Rust")
- if name == 'zstd':
- skip.add("Java")
+
+ # See https://github.com/apache/arrow/pull/9822 for how to
+ # disable specific compression type tests.
+
if prefix == '4.0.0-shareddict':
skip.add("Go")
+
yield datagen.File(name, None, None, skip=skip, path=out_path)
def _run_test_cases(self, producer, consumer, case_runner,
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 2e062122ba7..1c6262274e6 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -126,7 +126,7 @@ IPC Format
+-----------------------------+-------+-------+-------+------------+-------+-------+-------+
| Sparse tensors | ✓ | | | | | | |
+-----------------------------+-------+-------+-------+------------+-------+-------+-------+
-| Buffer compression | ✓ | | | | | | ✓ |
+| Buffer compression | ✓ | ✓ (3) | | | | | ✓ |
+-----------------------------+-------+-------+-------+------------+-------+-------+-------+
| Endianness conversion | ✓ (2) | | | | | | |
+-----------------------------+-------+-------+-------+------------+-------+-------+-------+
@@ -139,6 +139,8 @@ Notes:
* \(2) Data with non-native endianness can be byte-swapped automatically when reading.
+* \(3) LZ4 Codec currently is quite inefficient. ARROW-11901 tracks improving performance.
+
.. seealso::
The :ref:`format-ipc` specification.
@@ -221,7 +223,7 @@ Third-Party Data Formats
+-----------------------------+---------+---------+-------+------------+-------+---------+-------+
| ORC | R | | | | | | |
+-----------------------------+---------+---------+-------+------------+-------+---------+-------+
-| Parquet | R/W | | | | | R/W (1) | |
+| Parquet | R/W | R (2) | | | | R/W (1) | |
+-----------------------------+---------+---------+-------+------------+-------+---------+-------+
Notes:
@@ -231,3 +233,5 @@ Notes:
* *W* = Write supported
* \(1) Nested read/write not supported
+
+* \(2) Through JNI bindings to datasets.
diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index 9a6ab3508ed..dc0a9586539 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -44,8 +44,9 @@
1.20
- io.netty
- netty-common
-
+ com.github.luben
+ zstd-jni
+ 1.4.9-1
+
diff --git a/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java b/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java
index 4becbbe78c9..867e9f418b2 100644
--- a/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java
+++ b/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java
@@ -21,7 +21,9 @@
import org.apache.arrow.vector.compression.CompressionUtil;
/**
- * A factory implementation based on Apache Commons library.
+ * Default implementation of factory supported LZ4 and ZSTD compression.
+ *
+ * // TODO(ARROW-12115): Rename this class.
*/
public class CommonsCompressionFactory implements CompressionCodec.Factory {
@@ -32,6 +34,8 @@ public CompressionCodec createCodec(CompressionUtil.CodecType codecType) {
switch (codecType) {
case LZ4_FRAME:
return new Lz4CompressionCodec();
+ case ZSTD:
+ return new ZstdCompressionCodec();
default:
throw new IllegalArgumentException("Compression type not supported: " + codecType);
}
diff --git a/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java b/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
index 2d4b48e5ee5..daa35b7e15b 100644
--- a/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
+++ b/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
@@ -32,8 +32,6 @@
import org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorOutputStream;
import org.apache.commons.compress.utils.IOUtils;
-import io.netty.util.internal.PlatformDependent;
-
/**
* Compression codec for the LZ4 algorithm.
*/
@@ -45,7 +43,7 @@ protected ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBu
"The uncompressed buffer size exceeds the integer limit %s.", Integer.MAX_VALUE);
byte[] inBytes = new byte[(int) uncompressedBuffer.writerIndex()];
- PlatformDependent.copyMemory(uncompressedBuffer.memoryAddress(), inBytes, 0, uncompressedBuffer.writerIndex());
+ uncompressedBuffer.getBytes(/*index=*/0, inBytes);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (InputStream in = new ByteArrayInputStream(inBytes);
OutputStream out = new FramedLZ4CompressorOutputStream(baos)) {
@@ -57,8 +55,7 @@ protected ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBu
byte[] outBytes = baos.toByteArray();
ArrowBuf compressedBuffer = allocator.buffer(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + outBytes.length);
- PlatformDependent.copyMemory(
- outBytes, 0, compressedBuffer.memoryAddress() + CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, outBytes.length);
+ compressedBuffer.setBytes(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, outBytes);
compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + outBytes.length);
return compressedBuffer;
}
@@ -71,8 +68,7 @@ protected ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBu
long decompressedLength = readUncompressedLength(compressedBuffer);
byte[] inBytes = new byte[(int) (compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH)];
- PlatformDependent.copyMemory(
- compressedBuffer.memoryAddress() + CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, inBytes, 0, inBytes.length);
+ compressedBuffer.getBytes(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, inBytes);
ByteArrayOutputStream out = new ByteArrayOutputStream((int) decompressedLength);
try (InputStream in = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(inBytes))) {
IOUtils.copy(in, out);
@@ -82,8 +78,7 @@ protected ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBu
byte[] outBytes = out.toByteArray();
ArrowBuf decompressedBuffer = allocator.buffer(outBytes.length);
- PlatformDependent.copyMemory(outBytes, 0, decompressedBuffer.memoryAddress(), outBytes.length);
- decompressedBuffer.writerIndex(decompressedLength);
+ decompressedBuffer.setBytes(/*index=*/0, outBytes);
return decompressedBuffer;
}
diff --git a/java/compression/src/main/java/org/apache/arrow/compression/ZstdCompressionCodec.java b/java/compression/src/main/java/org/apache/arrow/compression/ZstdCompressionCodec.java
new file mode 100644
index 00000000000..38717843ef8
--- /dev/null
+++ b/java/compression/src/main/java/org/apache/arrow/compression/ZstdCompressionCodec.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.compression.AbstractCompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+
+import com.github.luben.zstd.Zstd;
+
+/**
+ * Compression codec for the ZSTD algorithm.
+ */
+public class ZstdCompressionCodec extends AbstractCompressionCodec {
+
+ @Override
+ protected ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) {
+ long maxSize = Zstd.compressBound(uncompressedBuffer.writerIndex());
+ long dstSize = CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + maxSize;
+ ArrowBuf compressedBuffer = allocator.buffer(dstSize);
+ long bytesWritten = Zstd.compressUnsafe(
+ compressedBuffer.memoryAddress() + CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, dstSize,
+ /*src*/uncompressedBuffer.memoryAddress(), /*srcSize=*/uncompressedBuffer.writerIndex(),
+ /*level=*/3);
+ if (Zstd.isError(bytesWritten)) {
+ compressedBuffer.close();
+ throw new RuntimeException("Error compressing: " + Zstd.getErrorName(bytesWritten));
+ }
+ compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + bytesWritten);
+ return compressedBuffer;
+ }
+
+ @Override
+ protected ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBuffer) {
+ long decompressedLength = readUncompressedLength(compressedBuffer);
+ ArrowBuf uncompressedBuffer = allocator.buffer(decompressedLength);
+ long decompressedSize = Zstd.decompressUnsafe(uncompressedBuffer.memoryAddress(), decompressedLength,
+ /*src=*/compressedBuffer.memoryAddress() + CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH,
+ compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH);
+ if (Zstd.isError(decompressedSize)) {
+ uncompressedBuffer.close();
+ throw new RuntimeException("Error decompressing: " + Zstd.getErrorName(decompressedLength));
+ }
+ if (decompressedLength != decompressedSize) {
+ uncompressedBuffer.close();
+ throw new RuntimeException("Expected != actual decompressed length: " +
+ decompressedLength + " != " + decompressedSize);
+ }
+ uncompressedBuffer.writerIndex(decompressedLength);
+ return uncompressedBuffer;
+ }
+
+ @Override
+ public CompressionUtil.CodecType getCodecType() {
+ return CompressionUtil.CodecType.ZSTD;
+ }
+}
diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
index 52f24e20533..1f6d64d4761 100644
--- a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
+++ b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
@@ -80,6 +80,10 @@ public static Collection