diff --git a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk21-results.txt b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk21-results.txt index 07373d576633d..ed39b7f69c7cf 100644 --- a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk21-results.txt +++ b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk21-results.txt @@ -2,83 +2,86 @@ Identity Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor Identity Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -BooleanUpdater 0 0 0 16990.6 0.1 1.0X -ByteUpdater (INT32 -> Byte) 0 0 0 3765.0 0.3 0.2X -ShortUpdater (INT32 -> Short) 1 1 0 1682.9 0.6 0.1X -IntegerUpdater 0 0 0 7756.2 0.1 0.5X -LongUpdater 0 0 0 3870.4 0.3 0.2X -FloatUpdater 0 0 0 7758.5 0.1 0.5X -DoubleUpdater 0 0 0 3875.9 0.3 0.2X -BinaryUpdater 15 15 0 70.4 14.2 0.0X +BooleanUpdater 0 0 0 16982.4 0.1 1.0X +ByteUpdater (INT32 -> Byte) 0 0 0 3744.9 0.3 0.2X +ShortUpdater (INT32 -> Short) 1 1 0 1675.0 0.6 0.1X +IntegerUpdater 0 0 0 10248.0 0.1 0.6X +LongUpdater 0 0 0 5141.4 0.2 0.3X +FloatUpdater 0 0 0 10286.2 0.1 0.6X +DoubleUpdater 0 0 0 5139.3 0.2 0.3X +BinaryUpdater 15 15 0 71.1 14.1 0.0X ================================================================================================ Type-converting Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor Type-converting Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -IntegerToLongUpdater 0 0 0 5133.8 0.2 1.0X -IntegerToDoubleUpdater 0 0 0 6090.4 0.2 1.2X -FloatToDoubleUpdater 0 0 0 2527.1 0.4 0.5X -DateToTimestampNTZUpdater 1 1 0 934.8 1.1 0.2X -DowncastLongUpdater (INT64 -> Decimal(9,2)) 0 0 0 5108.5 0.2 1.0X +IntegerToLongUpdater 0 0 0 6158.1 0.2 1.0X +IntegerToDoubleUpdater 0 0 0 6228.1 0.2 1.0X +FloatToDoubleUpdater 0 0 0 2525.4 0.4 0.4X +DateToTimestampNTZUpdater 1 1 0 932.9 1.1 0.2X +LongAsNanosUpdater (TimeType) 1 1 0 1228.5 0.8 0.2X +DowncastLongUpdater (INT64 -> Decimal(9,2)) 0 0 0 5861.5 0.2 1.0X ================================================================================================ Rebase Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor -Rebase Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative -------------------------------------------------------------------------------------------------------------------------------- -IntegerWithRebaseUpdater (DATE legacy) 0 0 0 3263.0 0.3 1.0X -LongWithRebaseUpdater (TIMESTAMP_MICROS legacy) 0 0 0 2282.0 0.4 0.7X -LongAsMicrosUpdater (TIMESTAMP_MILLIS) 2 3 0 420.5 2.4 0.1X +Rebase Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +IntegerWithRebaseUpdater (DATE legacy) 0 0 0 3647.5 0.3 1.0X +LongWithRebaseUpdater (TIMESTAMP_MICROS legacy) 0 0 0 2668.9 0.4 0.7X +LongAsMicrosUpdater (TIMESTAMP_MILLIS) 1 1 0 1228.3 0.8 0.3X +DateToTimestampNTZWithRebaseUpdater (DATE legacy) 1 1 0 797.7 1.3 0.2X +LongAsMicrosRebaseUpdater (TIMESTAMP_MILLIS legacy) 1 1 0 1099.3 0.9 0.3X ================================================================================================ Unsigned Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor Unsigned Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------- -UnsignedIntegerUpdater (UINT32 -> Long) 0 0 0 5112.2 0.2 1.0X -UnsignedLongUpdater (UINT64 -> Decimal(20,0)) 16 17 0 63.9 15.7 0.0X +UnsignedIntegerUpdater (UINT32 -> Long) 0 0 0 5894.2 0.2 1.0X +UnsignedLongUpdater (UINT64 -> Decimal(20,0)) 17 18 1 60.3 16.6 0.0X ================================================================================================ Decimal Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor Decimal Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -IntegerToDecimalUpdater 0 0 0 7750.4 0.1 1.0X -LongToDecimalUpdater 0 0 0 3866.5 0.3 0.5X -FixedLenByteArrayToDecimalUpdater 21 21 0 50.1 19.9 0.0X +IntegerToDecimalUpdater 0 0 0 10291.3 0.1 1.0X +LongToDecimalUpdater 0 0 0 5139.6 0.2 0.5X +FixedLenByteArrayToDecimalUpdater 21 21 0 49.6 20.2 0.0X ================================================================================================ FixedLenByteArray Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor FixedLenByteArray Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------------- -FixedLenByteArrayUpdater (len=16 -> Binary) 20 20 0 51.7 19.3 1.0X -FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2)) 7 7 1 160.1 6.2 3.1X -FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4)) 8 8 0 133.2 7.5 2.6X +FixedLenByteArrayUpdater (len=16 -> Binary) 20 21 2 51.9 19.3 1.0X +FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2)) 7 7 0 160.2 6.2 3.1X +FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4)) 8 8 0 133.1 7.5 2.6X diff --git a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk25-results.txt b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk25-results.txt index e03dae8c072ac..16ff18ac2e9d1 100644 --- a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk25-results.txt +++ b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk25-results.txt @@ -2,57 +2,60 @@ Identity Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor Identity Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -BooleanUpdater 0 0 0 17171.8 0.1 1.0X -ByteUpdater (INT32 -> Byte) 0 0 0 3679.8 0.3 0.2X -ShortUpdater (INT32 -> Short) 1 1 0 1662.3 0.6 0.1X -IntegerUpdater 0 0 0 10261.9 0.1 0.6X -LongUpdater 0 0 0 5130.7 0.2 0.3X -FloatUpdater 0 0 0 10255.9 0.1 0.6X -DoubleUpdater 0 0 0 5127.2 0.2 0.3X -BinaryUpdater 15 16 0 67.7 14.8 0.0X +BooleanUpdater 0 0 0 17151.8 0.1 1.0X +ByteUpdater (INT32 -> Byte) 0 0 0 3702.7 0.3 0.2X +ShortUpdater (INT32 -> Short) 1 1 0 1662.6 0.6 0.1X +IntegerUpdater 0 0 0 7747.5 0.1 0.5X +LongUpdater 0 0 0 5099.0 0.2 0.3X +FloatUpdater 0 0 0 7751.0 0.1 0.5X +DoubleUpdater 0 0 0 3795.5 0.3 0.2X +BinaryUpdater 16 16 0 66.4 15.1 0.0X ================================================================================================ Type-converting Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor Type-converting Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -IntegerToLongUpdater 0 0 0 6438.7 0.2 1.0X -IntegerToDoubleUpdater 0 0 0 6441.2 0.2 1.0X -FloatToDoubleUpdater 0 0 0 3199.5 0.3 0.5X -DateToTimestampNTZUpdater 1 1 0 884.9 1.1 0.1X -DowncastLongUpdater (INT64 -> Decimal(9,2)) 0 0 0 6713.8 0.1 1.0X +IntegerToLongUpdater 0 0 0 4994.1 0.2 1.0X +IntegerToDoubleUpdater 0 0 0 6589.1 0.2 1.3X +FloatToDoubleUpdater 0 0 0 3199.0 0.3 0.6X +DateToTimestampNTZUpdater 1 1 0 1213.3 0.8 0.2X +LongAsNanosUpdater (TimeType) 1 1 0 1115.4 0.9 0.2X +DowncastLongUpdater (INT64 -> Decimal(9,2)) 0 0 0 4930.3 0.2 1.0X ================================================================================================ Rebase Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor -Rebase Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative -------------------------------------------------------------------------------------------------------------------------------- -IntegerWithRebaseUpdater (DATE legacy) 0 0 0 3664.5 0.3 1.0X -LongWithRebaseUpdater (TIMESTAMP_MICROS legacy) 0 0 0 2668.7 0.4 0.7X -LongAsMicrosUpdater (TIMESTAMP_MILLIS) 3 3 0 371.3 2.7 0.1X +Rebase Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +IntegerWithRebaseUpdater (DATE legacy) 0 0 0 3665.1 0.3 1.0X +LongWithRebaseUpdater (TIMESTAMP_MICROS legacy) 0 0 0 2667.6 0.4 0.7X +LongAsMicrosUpdater (TIMESTAMP_MILLIS) 1 1 0 1228.5 0.8 0.3X +DateToTimestampNTZWithRebaseUpdater (DATE legacy) 1 1 0 719.8 1.4 0.2X +LongAsMicrosRebaseUpdater (TIMESTAMP_MILLIS legacy) 1 1 0 1092.7 0.9 0.3X ================================================================================================ Unsigned Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor Unsigned Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------- -UnsignedIntegerUpdater (UINT32 -> Long) 0 0 0 6183.9 0.2 1.0X +UnsignedIntegerUpdater (UINT32 -> Long) 0 0 0 4931.9 0.2 1.0X UnsignedLongUpdater (UINT64 -> Decimal(20,0)) 17 17 0 60.4 16.6 0.0X @@ -60,25 +63,25 @@ UnsignedLongUpdater (UINT64 -> Decimal(20,0)) 17 17 Decimal Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor Decimal Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -IntegerToDecimalUpdater 0 0 0 10268.1 0.1 1.0X -LongToDecimalUpdater 0 0 0 5122.2 0.2 0.5X -FixedLenByteArrayToDecimalUpdater 21 21 0 50.9 19.7 0.0X +IntegerToDecimalUpdater 0 0 0 7752.7 0.1 1.0X +LongToDecimalUpdater 0 0 0 5144.8 0.2 0.7X +FixedLenByteArrayToDecimalUpdater 21 21 3 50.7 19.7 0.0X ================================================================================================ FixedLenByteArray Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor FixedLenByteArray Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------------- -FixedLenByteArrayUpdater (len=16 -> Binary) 21 21 1 50.3 19.9 1.0X -FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2)) 7 7 0 152.7 6.6 3.0X -FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4)) 8 8 0 127.7 7.8 2.5X +FixedLenByteArrayUpdater (len=16 -> Binary) 21 21 1 50.2 19.9 1.0X +FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2)) 7 7 0 152.6 6.6 3.0X +FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4)) 8 8 0 127.6 7.8 2.5X diff --git a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt index 828e685788773..58d3ac10aa97f 100644 --- a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt +++ b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt @@ -2,83 +2,86 @@ Identity Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor Identity Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -BooleanUpdater 0 0 0 14640.0 0.1 1.0X -ByteUpdater (INT32 -> Byte) 0 0 0 3686.8 0.3 0.3X -ShortUpdater (INT32 -> Short) 1 1 0 2054.1 0.5 0.1X -IntegerUpdater 0 0 0 7759.1 0.1 0.5X -LongUpdater 0 0 0 3876.1 0.3 0.3X -FloatUpdater 0 0 0 7762.5 0.1 0.5X -DoubleUpdater 0 0 0 5123.2 0.2 0.3X -BinaryUpdater 15 15 0 70.1 14.3 0.0X +BooleanUpdater 0 0 0 15918.4 0.1 1.0X +ByteUpdater (INT32 -> Byte) 0 0 0 3983.1 0.3 0.3X +ShortUpdater (INT32 -> Short) 0 1 0 2227.2 0.4 0.1X +IntegerUpdater 0 0 0 8412.7 0.1 0.5X +LongUpdater 0 0 0 5077.8 0.2 0.3X +FloatUpdater 0 0 0 8391.8 0.1 0.5X +DoubleUpdater 0 0 0 5568.9 0.2 0.3X +BinaryUpdater 15 16 0 70.8 14.1 0.0X ================================================================================================ Type-converting Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor Type-converting Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -IntegerToLongUpdater 1 1 0 1281.0 0.8 1.0X -IntegerToDoubleUpdater 1 1 0 1550.0 0.6 1.2X -FloatToDoubleUpdater 1 1 0 1419.0 0.7 1.1X -DateToTimestampNTZUpdater 2 2 0 605.2 1.7 0.5X -DowncastLongUpdater (INT64 -> Decimal(9,2)) 1 1 0 1285.1 0.8 1.0X +IntegerToLongUpdater 1 1 0 1386.0 0.7 1.0X +IntegerToDoubleUpdater 1 1 0 1554.5 0.6 1.1X +FloatToDoubleUpdater 1 1 0 1537.7 0.7 1.1X +DateToTimestampNTZUpdater 2 2 0 596.9 1.7 0.4X +LongAsNanosUpdater (TimeType) 1 1 0 942.2 1.1 0.7X +DowncastLongUpdater (INT64 -> Decimal(9,2)) 1 1 0 1394.2 0.7 1.0X ================================================================================================ Rebase Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor -Rebase Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative -------------------------------------------------------------------------------------------------------------------------------- -IntegerWithRebaseUpdater (DATE legacy) 0 0 0 2662.8 0.4 1.0X -LongWithRebaseUpdater (TIMESTAMP_MICROS legacy) 1 1 0 2084.1 0.5 0.8X -LongAsMicrosUpdater (TIMESTAMP_MILLIS) 2 2 0 454.8 2.2 0.2X +Rebase Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +IntegerWithRebaseUpdater (DATE legacy) 0 0 0 2526.8 0.4 1.0X +LongWithRebaseUpdater (TIMESTAMP_MICROS legacy) 1 1 0 1995.7 0.5 0.8X +LongAsMicrosUpdater (TIMESTAMP_MILLIS) 1 1 0 1087.7 0.9 0.4X +DateToTimestampNTZWithRebaseUpdater (DATE legacy) 2 2 0 470.9 2.1 0.2X +LongAsMicrosRebaseUpdater (TIMESTAMP_MILLIS legacy) 1 1 0 961.7 1.0 0.4X ================================================================================================ Unsigned Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor Unsigned Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------- -UnsignedIntegerUpdater (UINT32 -> Long) 1 1 0 1094.1 0.9 1.0X -UnsignedLongUpdater (UINT64 -> Decimal(20,0)) 17 17 0 61.0 16.4 0.1X +UnsignedIntegerUpdater (UINT32 -> Long) 1 1 0 1174.9 0.9 1.0X +UnsignedLongUpdater (UINT64 -> Decimal(20,0)) 17 17 0 63.4 15.8 0.1X ================================================================================================ Decimal Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor Decimal Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -IntegerToDecimalUpdater 0 0 0 10261.0 0.1 1.0X -LongToDecimalUpdater 0 0 0 5118.9 0.2 0.5X -FixedLenByteArrayToDecimalUpdater 21 21 0 51.0 19.6 0.0X +IntegerToDecimalUpdater 0 0 0 10115.2 0.1 1.0X +LongToDecimalUpdater 0 0 0 5563.6 0.2 0.6X +FixedLenByteArrayToDecimalUpdater 20 20 0 53.6 18.6 0.0X ================================================================================================ FixedLenByteArray Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure +OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1018-azure AMD EPYC 7763 64-Core Processor FixedLenByteArray Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------------- -FixedLenByteArrayUpdater (len=16 -> Binary) 19 19 0 55.3 18.1 1.0X -FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2)) 7 7 0 160.2 6.2 2.9X -FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4)) 9 9 0 123.3 8.1 2.2X +FixedLenByteArrayUpdater (len=16 -> Binary) 18 19 1 57.7 17.3 1.0X +FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2)) 6 6 0 173.7 5.8 3.0X +FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4)) 8 8 0 133.5 7.5 2.3X diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java index c088f5f2844be..90e4c3e04f814 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java @@ -429,7 +429,11 @@ public void readValues( int offset, WritableColumnVector values, VectorizedValuesReader valuesReader) { - valuesReader.readIntegersAsTimestampMicros(total, values, offset); + valuesReader.readIntegersAsLongs(total, values, offset); + for (int i = 0; i < total; i++) { + values.putLong(offset + i, + DateTimeUtils.daysToMicros((int) values.getLong(offset + i), ZoneOffset.UTC)); + } } @Override @@ -470,8 +474,10 @@ public void readValues( int offset, WritableColumnVector values, VectorizedValuesReader valuesReader) { - for (int i = 0; i < total; ++i) { - readValue(offset + i, values, valuesReader); + valuesReader.readIntegersAsLongs(total, values, offset); + for (int i = 0; i < total; i++) { + int rebasedDays = rebaseDays((int) values.getLong(offset + i), failIfRebase); + values.putLong(offset + i, DateTimeUtils.daysToMicros(rebasedDays, ZoneOffset.UTC)); } } @@ -796,8 +802,9 @@ public void readValues( int offset, WritableColumnVector values, VectorizedValuesReader valuesReader) { - for (int i = 0; i < total; ++i) { - readValue(offset + i, values, valuesReader); + valuesReader.readLongs(total, values, offset); + for (int i = 0; i < total; i++) { + values.putLong(offset + i, DateTimeUtils.millisToMicros(values.getLong(offset + i))); } } @@ -840,8 +847,10 @@ public void readValues( int offset, WritableColumnVector values, VectorizedValuesReader valuesReader) { - for (int i = 0; i < total; ++i) { - readValue(offset + i, values, valuesReader); + valuesReader.readLongs(total, values, offset); + for (int i = 0; i < total; i++) { + long julianMicros = DateTimeUtils.millisToMicros(values.getLong(offset + i)); + values.putLong(offset + i, rebaseMicros(julianMicros, failIfRebase, timeZone)); } } @@ -878,8 +887,9 @@ public void readValues( int offset, WritableColumnVector values, VectorizedValuesReader valuesReader) { - for (int i = 0; i < total; ++i) { - readValue(offset + i, values, valuesReader); + valuesReader.readLongs(total, values, offset); + for (int i = 0; i < total; i++) { + values.putLong(offset + i, DateTimeUtils.microsToNanos(values.getLong(offset + i))); } } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java index 23207e7db3570..9249fab7915ca 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.time.ZoneOffset; import org.apache.parquet.bytes.ByteBufferInputStream; import org.apache.parquet.column.values.ValuesReader; @@ -27,7 +26,6 @@ import org.apache.parquet.io.ParquetDecodingException; import org.apache.spark.SparkUnsupportedOperationException; -import org.apache.spark.sql.catalyst.util.DateTimeUtils; import org.apache.spark.sql.catalyst.util.RebaseDateTime; import org.apache.spark.sql.execution.datasources.DataSourceUtils; import org.apache.spark.sql.execution.vectorized.WritableColumnVector; @@ -206,19 +204,6 @@ public final void readLongsAsInts(int total, WritableColumnVector c, int rowId) } } - @Override - public final void readIntegersAsTimestampMicros( - int total, WritableColumnVector c, int rowId) { - int requiredBytes = total * 4; - ByteBuffer buffer = getBuffer(requiredBytes); - // Per-element conversion calls into `DateTimeUtils.daysToMicros`, which is `days * - // MICROS_PER_DAY` for UTC plus an overflow check via `Math.multiplyExact`. No - // `hasArray` bulk-copy path because source and target have different widths. - for (int i = 0; i < total; i += 1) { - c.putLong(rowId + i, DateTimeUtils.daysToMicros(buffer.getInt(), ZoneOffset.UTC)); - } - } - // A fork of `readIntegers` to rebase the date values. For performance reasons, this method // iterates the values twice: check if we need to rebase first, then go to the optimized branch // if rebase is not needed. diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java index bf6a1c6a03886..c62f7bcec8c35 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java @@ -18,9 +18,7 @@ package org.apache.spark.sql.execution.datasources.parquet; import java.nio.ByteBuffer; -import java.time.ZoneOffset; -import org.apache.spark.sql.catalyst.util.DateTimeUtils; import org.apache.spark.sql.execution.vectorized.WritableColumnVector; import org.apache.parquet.io.api.Binary; @@ -138,32 +136,6 @@ default void readLongsAsInts(int total, WritableColumnVector c, int rowId) { } } - /** - * Reads {@code total} INT32 date-day values (days since 1970-01-01, Proleptic Gregorian), - * converts each to TimestampNTZ micros at UTC via - * {@link DateTimeUtils#daysToMicros(int, java.time.ZoneId)}, and writes them into - * {@code c} starting at {@code c[rowId]}. Used by the type-converting updater that - * reads parquet INT32 DATE columns into Spark {@code TimestampNTZType} targets in - * {@code CORRECTED} datetime-rebase mode. The {@code LEGACY}/{@code EXCEPTION} rebase - * variants are out of scope for this method. - * - *

The default implementation is a per-row loop that calls - * {@code DateTimeUtils.daysToMicros} per element; it is algorithmically equivalent to - * the legacy per-row Updater path but the per-element conversion call dominates the - * loop, so the speedup from overriding this method is more modest than for the pure - * primitive-cast siblings ({@link #readIntegersAsLongs}, {@link #readIntegersAsDoubles}). - * Subclasses backed by contiguous bulk storage (e.g. PLAIN encoding via - * {@link VectorizedPlainValuesReader}) should override to read source bytes once and run - * a tight in-method conversion loop, avoiding {@code total} virtual dispatches on - * {@link #readInteger()}. Readers without an override preserve correctness but gain no - * speedup. - */ - default void readIntegersAsTimestampMicros(int total, WritableColumnVector c, int rowId) { - for (int i = 0; i < total; i += 1) { - c.putLong(rowId + i, DateTimeUtils.daysToMicros(readInteger(), ZoneOffset.UTC)); - } - } - void readBinary(int total, WritableColumnVector c, int rowId); void readGeometry(int total, WritableColumnVector c, int rowId); void readGeography(int total, WritableColumnVector c, int rowId); diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterBenchmark.scala index a78593096d5c8..a0664fa5780c3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterBenchmark.scala @@ -45,10 +45,10 @@ import org.apache.spark.sql.types._ * (Boolean, Byte, Short, Integer, Long, Float, Double, Binary). * B. Type-converting Updaters -- per-row read+convert+write loops. * `IntegerToLong`, `IntegerToDouble`, `FloatToDouble`, `DateToTimestampNTZ`, - * `DowncastLong`. + * `DowncastLong`, `LongAsNanos`. * C. Rebase Updaters -- date/timestamp legacy-calendar rebase variants. * `IntegerWithRebase` (DATE), `LongWithRebase` (TIMESTAMP_MICROS), - * `LongAsMicros`. + * `LongAsMicros`, `DateToTimestampNTZWithRebase`, `LongAsMicrosRebase`. * D. Unsigned Updaters -- `UnsignedInteger`, `UnsignedLong`. * E. Decimal Updaters -- `IntegerToDecimal`, `LongToDecimal`, * `BinaryToDecimal`, `FixedLenByteArrayToDecimal`. @@ -264,6 +264,11 @@ object ParquetVectorUpdaterBenchmark extends BenchmarkBase { TimestampNTZType, descriptor(PrimitiveTypeName.INT32, LogicalTypeAnnotation.dateType()), longVec, intBytes) + addReadValuesCase(benchmark, "LongAsNanosUpdater (TimeType)", + TimeType(), + descriptor(PrimitiveTypeName.INT64, + LogicalTypeAnnotation.timeType(false, LogicalTypeAnnotation.TimeUnit.MICROS)), + longVec, longBytes) // 32-bit-decimal target with INT64 source routes via canReadAsLongDecimal + // is32BitDecimalType, both TRUE here, hence DowncastLongUpdater. addReadValuesCase(benchmark, "DowncastLongUpdater (INT64 -> Decimal(9,2))", @@ -303,6 +308,17 @@ object ParquetVectorUpdaterBenchmark extends BenchmarkBase { descriptor(PrimitiveTypeName.INT64, LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS)), longVec, longBytes) + addReadValuesCase(benchmark, "DateToTimestampNTZWithRebaseUpdater (DATE legacy)", + TimestampNTZType, + descriptor(PrimitiveTypeName.INT32, LogicalTypeAnnotation.dateType()), + longVec, intBytes, + datetimeRebaseMode = "LEGACY") + addReadValuesCase(benchmark, "LongAsMicrosRebaseUpdater (TIMESTAMP_MILLIS legacy)", + TimestampType, + descriptor(PrimitiveTypeName.INT64, + LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS)), + longVec, longBytes, + datetimeRebaseMode = "LEGACY") benchmark.run() }