From 8ea77fdaa89c7e6c4fa717e99a8b9f8dca1ce5a2 Mon Sep 17 00:00:00 2001 From: Zoltan Ratkai Date: Tue, 7 Mar 2023 18:07:07 +0100 Subject: [PATCH 1/2] ORC-1384 Fix ArrayIndexOutOfBoundsException when reading dictionary stream bigger then dictionary ### What changes were proposed in this pull request? Avoid ArrayIndexOutOfBoundsException when reading dictionary stream bigger then dictionary. Check the size of the dictionary and input and read only the min of those. ### Why are the changes needed? In Hive when reading with LLAP data is read in 4kB blocks which leads to ArrayIndexOutOfBoundsException when the dictionary is smaller. ### How was this patch tested? It is tested with HIVE's qtest, since here we do not have the necessary subclasses. This closes #1384 --- .../java/org/apache/orc/impl/TreeReaderFactory.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java index ecc02fb8de..f0849c2b8a 100644 --- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java +++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java @@ -2292,10 +2292,15 @@ private void readDictionaryStream(InStream in) throws IOException { int dictionaryBufferSize = dictionaryOffsets[dictionaryOffsets.length - 1]; dictionaryBuffer = new byte[dictionaryBufferSize]; int pos = 0; - int chunkSize = in.available(); - byte[] chunkBytes = new byte[chunkSize]; + //check if dictionary size is smaller than available stream size + // to avoid ArrayIndexOutOfBoundsException + int readSize = Math.min(in.available(), dictionaryBufferSize); + byte[] chunkBytes = new byte[readSize]; while (pos < dictionaryBufferSize) { - int currentLength = in.read(chunkBytes, 0, chunkSize); + int currentLength = in.read(chunkBytes, 0, readSize); + //check if dictionary size is smaller than available stream size + // to avoid ArrayIndexOutOfBoundsException + currentLength = Math.min(currentLength, dictionaryBufferSize - pos); System.arraycopy(chunkBytes, 0, dictionaryBuffer, pos, currentLength); pos += currentLength; } From 86c9a03112a2db184985c4d6cf73c86a18c25c92 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 9 Mar 2023 12:49:31 -0800 Subject: [PATCH 2/2] Update TreeReaderFactory.java --- java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java index f0849c2b8a..2a2adf50d7 100644 --- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java +++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java @@ -2292,13 +2292,13 @@ private void readDictionaryStream(InStream in) throws IOException { int dictionaryBufferSize = dictionaryOffsets[dictionaryOffsets.length - 1]; dictionaryBuffer = new byte[dictionaryBufferSize]; int pos = 0; - //check if dictionary size is smaller than available stream size + // check if dictionary size is smaller than available stream size // to avoid ArrayIndexOutOfBoundsException int readSize = Math.min(in.available(), dictionaryBufferSize); byte[] chunkBytes = new byte[readSize]; while (pos < dictionaryBufferSize) { int currentLength = in.read(chunkBytes, 0, readSize); - //check if dictionary size is smaller than available stream size + // check if dictionary size is smaller than available stream size // to avoid ArrayIndexOutOfBoundsException currentLength = Math.min(currentLength, dictionaryBufferSize - pos); System.arraycopy(chunkBytes, 0, dictionaryBuffer, pos, currentLength);