From bbede2de7af805eba70355c56446750b791ebb5b Mon Sep 17 00:00:00 2001 From: Divjot Arora Date: Tue, 26 May 2026 15:22:29 +0000 Subject: [PATCH] [GH-3587] Fix parquet.thrift.string.size.limit validation --- .../java/org/apache/parquet/format/Util.java | 4 +- .../TestParquetFileReaderMaxMessageSize.java | 48 +++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java index 776fb45576..419d757702 100644 --- a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java +++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java @@ -417,8 +417,8 @@ private static InterningProtocol protocol(TIOStreamTransport t, int configuredMa // Set to default 100 MB maxMessageSize = DEFAULT_MAX_MESSAGE_SIZE; } - if (configuredMaxMessageSize <= 0) { - throw new NumberFormatException("Max message size must be positive: " + configuredMaxMessageSize); + if (maxMessageSize <= 0) { + throw new NumberFormatException("Max message size must be positive: " + maxMessageSize); } TConfiguration config = t.getConfiguration(); diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderMaxMessageSize.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderMaxMessageSize.java index f9f121b998..7b6088e58a 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderMaxMessageSize.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderMaxMessageSize.java @@ -143,4 +143,52 @@ public void testInsufficientMaxMessageSizeError() throws IOException { || e.getCause().getMessage().contains("MaxMessageSize reached")); } } + + /** + * The -1 sentinel must be honored as "use the default max message size", + * not rejected as a non-positive value. + */ + @Test + public void testReadAcceptsMinusOneAsDefaultMaxMessageSize() throws IOException { + Configuration readConf = new Configuration(); + readConf.setInt("parquet.thrift.string.size.limit", -1); + ParquetReadOptions options = HadoopReadOptions.builder(readConf).build(); + + try (ParquetFileReader reader = + ParquetFileReader.open(HadoopInputFile.fromPath(TEST_FILE, readConf), options)) { + ParquetMetadata metadata = reader.getFooter(); + assertNotNull(metadata); + assertEquals(schema, metadata.getFileMetaData().getSchema()); + } + } + + @Test + public void testReadRejectsZeroMaxMessageSize() throws IOException { + assertRejectsNonPositiveMaxMessageSize(0); + } + + @Test + public void testReadRejectsInvalidNegativeMaxMessageSize() throws IOException { + assertRejectsNonPositiveMaxMessageSize(-5); + } + + private void assertRejectsNonPositiveMaxMessageSize(int maxMessageSize) throws IOException { + Configuration readConf = new Configuration(); + readConf.setInt("parquet.thrift.string.size.limit", maxMessageSize); + ParquetReadOptions options = HadoopReadOptions.builder(readConf).build(); + + try (ParquetFileReader reader = + ParquetFileReader.open(HadoopInputFile.fromPath(TEST_FILE, readConf), options)) { + fail("Expected failure for non-positive max message size: " + maxMessageSize); + } catch (RuntimeException | IOException e) { + String message = e.getMessage() == null ? "" : e.getMessage(); + String causeMessage = e.getCause() == null || e.getCause().getMessage() == null + ? "" + : e.getCause().getMessage(); + assertTrue( + "Expected 'Max message size must be positive' in " + message + " / " + causeMessage, + message.contains("Max message size must be positive") + || causeMessage.contains("Max message size must be positive")); + } + } }