From bbede2de7af805eba70355c56446750b791ebb5b Mon Sep 17 00:00:00 2001
From: Divjot Arora
Date: Tue, 26 May 2026 15:22:29 +0000
Subject: [PATCH] [GH-3587] Fix parquet.thrift.string.size.limit validation
---
.../java/org/apache/parquet/format/Util.java | 4 +-
.../TestParquetFileReaderMaxMessageSize.java | 48 +++++++++++++++++++
2 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java
index 776fb45576..419d757702 100644
--- a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java
+++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java
@@ -417,8 +417,8 @@ private static InterningProtocol protocol(TIOStreamTransport t, int configuredMa
// Set to default 100 MB
maxMessageSize = DEFAULT_MAX_MESSAGE_SIZE;
}
- if (configuredMaxMessageSize <= 0) {
- throw new NumberFormatException("Max message size must be positive: " + configuredMaxMessageSize);
+ if (maxMessageSize <= 0) {
+ throw new NumberFormatException("Max message size must be positive: " + maxMessageSize);
}
TConfiguration config = t.getConfiguration();
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderMaxMessageSize.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderMaxMessageSize.java
index f9f121b998..7b6088e58a 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderMaxMessageSize.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderMaxMessageSize.java
@@ -143,4 +143,52 @@ public void testInsufficientMaxMessageSizeError() throws IOException {
|| e.getCause().getMessage().contains("MaxMessageSize reached"));
}
}
+
+ /**
+ * The -1 sentinel must be honored as "use the default max message size",
+ * not rejected as a non-positive value.
+ */
+ @Test
+ public void testReadAcceptsMinusOneAsDefaultMaxMessageSize() throws IOException {
+ Configuration readConf = new Configuration();
+ readConf.setInt("parquet.thrift.string.size.limit", -1);
+ ParquetReadOptions options = HadoopReadOptions.builder(readConf).build();
+
+ try (ParquetFileReader reader =
+ ParquetFileReader.open(HadoopInputFile.fromPath(TEST_FILE, readConf), options)) {
+ ParquetMetadata metadata = reader.getFooter();
+ assertNotNull(metadata);
+ assertEquals(schema, metadata.getFileMetaData().getSchema());
+ }
+ }
+
+ @Test
+ public void testReadRejectsZeroMaxMessageSize() throws IOException {
+ assertRejectsNonPositiveMaxMessageSize(0);
+ }
+
+ @Test
+ public void testReadRejectsInvalidNegativeMaxMessageSize() throws IOException {
+ assertRejectsNonPositiveMaxMessageSize(-5);
+ }
+
+ private void assertRejectsNonPositiveMaxMessageSize(int maxMessageSize) throws IOException {
+ Configuration readConf = new Configuration();
+ readConf.setInt("parquet.thrift.string.size.limit", maxMessageSize);
+ ParquetReadOptions options = HadoopReadOptions.builder(readConf).build();
+
+ try (ParquetFileReader reader =
+ ParquetFileReader.open(HadoopInputFile.fromPath(TEST_FILE, readConf), options)) {
+ fail("Expected failure for non-positive max message size: " + maxMessageSize);
+ } catch (RuntimeException | IOException e) {
+ String message = e.getMessage() == null ? "" : e.getMessage();
+ String causeMessage = e.getCause() == null || e.getCause().getMessage() == null
+ ? ""
+ : e.getCause().getMessage();
+ assertTrue(
+ "Expected 'Max message size must be positive' in " + message + " / " + causeMessage,
+ message.contains("Max message size must be positive")
+ || causeMessage.contains("Max message size must be positive"));
+ }
+ }
}