From e9b098de9015240a23d88972a04f760622045a20 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sun, 31 May 2026 21:46:19 +0200 Subject: [PATCH] [SPARK-57184][SQL] Recurse into CalendarInterval children when appending a null struct ### What changes were proposed in this pull request? Include `CalendarIntervalType` in the recursion guard of `WritableColumnVector.appendStruct(boolean isNull)`, so that appending a NULL parent struct cascades `appendStruct(true)` into a `CalendarInterval` child column and advances all three of its grandchild columns (months/days/microseconds). ### Why are the changes needed? A `CalendarInterval` column is struct-shaped: it is backed by three grandchild primitive columns. The recursion guard only handled `StructType` and `VariantType`, so an interval child took the `appendNull()` branch, advancing only the interval's own cursor and leaving its three grandchild cursors behind. For a struct column with a `CalendarInterval` field, a NULL parent row then caused a subsequent non-null row to write its months/days/ microseconds into the wrong grandchild slots, returning a skewed value on read - silent data corruption for the nested struct-of-interval case. ### Does this PR introduce _any_ user-facing change? Yes. Reading back a struct-of-interval column that contains a NULL parent row followed by a non-null row now returns the correct interval value instead of a skewed one. ### How was this patch tested? Added a unit test to `ColumnarBatchSuite` that converts a null parent struct followed by non-null struct-of-interval rows and verifies the interval values are read back correctly. ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Claude Opus 4.8 Co-authored-by: Max Gekk --- .../vectorized/WritableColumnVector.java | 3 +- .../vectorized/ColumnarBatchSuite.scala | 31 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java index 0f5b23ad85390..a723aea00f9ec 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java @@ -751,7 +751,8 @@ public final int appendStruct(boolean isNull) { putNull(elementsAppended); elementsAppended++; for (WritableColumnVector c: childColumns) { - if (c.type instanceof StructType || c.type instanceof VariantType) { + if (c.type instanceof StructType || c.type instanceof VariantType + || c.type instanceof CalendarIntervalType) { c.appendStruct(true); } else { c.appendNull(); diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala index 40f73450eb21d..110fd67b266ad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala @@ -1951,6 +1951,37 @@ class ColumnarBatchSuite extends SparkFunSuite { } } + test("SPARK-57184: appendStruct(true) recurses into CalendarInterval child columns") { + // A struct column whose only field is a CalendarInterval. When the parent struct is + // appended as null, the recursion must also advance the interval child's grandchild + // columns (months / days / microseconds). Otherwise a subsequent non-null row's interval + // would be read from skewed grandchild slots. + val schema = new StructType() + .add("s", new StructType().add("cal", CalendarIntervalType)) + val converter = new RowToColumnConverter(schema) + val columns = OnHeapColumnVector.allocateColumns(3, schema) + try { + // row 0: null parent struct. + converter.convert(new GenericInternalRow(Array[Any](null)), columns.toArray) + // row 1: non-null struct holding interval (1, 2, 3). + converter.convert( + new GenericInternalRow(Array[Any]( + new GenericInternalRow(Array[Any](new CalendarInterval(1, 2, 3))))), + columns.toArray) + // row 2: non-null struct holding interval (4, 5, 6). + converter.convert( + new GenericInternalRow(Array[Any]( + new GenericInternalRow(Array[Any](new CalendarInterval(4, 5, 6))))), + columns.toArray) + + assert(columns(0).isNullAt(0)) + assert(columns(0).getStruct(1).getInterval(0) === new CalendarInterval(1, 2, 3)) + assert(columns(0).getStruct(2).getInterval(0) === new CalendarInterval(4, 5, 6)) + } finally { + columns.foreach(_.close()) + } + } + testVector("Decimal API", 4, DecimalType.IntDecimal) { column =>