apache · MaxGekk · May 27, 2026 · May 28, 2026 · May 28, 2026 · May 28, 2026
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
@@ -3729,6 +3729,12 @@
     ],
     "sqlState" : "42K0N"
   },
+  "INVALID_EXTERNAL_VALUE" : {
+    "message" : [
+      "The value (<other>) of the type (<otherClass>) cannot be converted to the <dataType> type."
+    ],
+    "sqlState" : "42K0N"
+  },
   "INVALID_EXTRACT_BASE_FIELD_TYPE" : {
     "message" : [
       "Can't extract a value from <base>. Need a complex type [STRUCT, ARRAY, MAP] but got <other>."
@@ -11487,11 +11493,6 @@
       "Must be 2 children: <others>"
     ]
   },
-  "_LEGACY_ERROR_TEMP_3219" : {
-    "message" : [
-      "The value (<other>) of the type (<otherClass>) cannot be converted to the <dataType> type."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_3220" : {
     "message" : [
       "The value (<other>) of the type (<otherClass>) cannot be converted to an array of <elementType>"

diff --git a/docs/sql-ref-datatypes.md b/docs/sql-ref-datatypes.md
@@ -54,7 +54,7 @@ Spark SQL and DataFrames support the following data types:
   - `TimestampNTZType`: Timestamp without time zone(TIMESTAMP_NTZ). It represents values comprising values of fields year, month, day,
   hour, minute, and second. All operations are performed without taking any time zone into account.
     - Note: TIMESTAMP in Spark is a user-specified alias associated with one of the TIMESTAMP_LTZ and TIMESTAMP_NTZ variations.  Users can set the default timestamp type as `TIMESTAMP_LTZ`(default value) or `TIMESTAMP_NTZ` via the configuration `spark.sql.timestampType`.
-  - `TimestampNTZNanosType(precision)` / `TimestampLTZNanosType(precision)`: Preview nanosecond-capable variants of `TIMESTAMP_NTZ` and `TIMESTAMP_LTZ` with fractional seconds precision `precision` in `[7, 9]`. Unparameterized `TIMESTAMP`, `TIMESTAMP_NTZ`, and `TIMESTAMP_LTZ` remain microsecond types. Enable the preview feature with `SET spark.sql.timestampNanosTypes.enabled=true;` before using these types in schemas or SQL.
+  - `TimestampNTZNanosType(precision)` / `TimestampLTZNanosType(precision)`: Preview nanosecond-capable variants of `TIMESTAMP_NTZ` and `TIMESTAMP_LTZ` with fractional seconds precision `precision` in `[7, 9]`. Unparameterized `TIMESTAMP`, `TIMESTAMP_NTZ`, and `TIMESTAMP_LTZ` remain microsecond types. In schema-driven Dataset/DataFrame conversion, Spark maps `TimestampNTZNanosType` to `java.time.LocalDateTime` and `TimestampLTZNanosType` to `java.time.Instant`; values with more sub-micro digits than declared by `precision` are floor-truncated to that precision. Enable the preview feature with `SET spark.sql.timestampNanosTypes.enabled=true;` before using these types in schemas or SQL.
 
 * Interval types
   - `YearMonthIntervalType(startField, endField)`: Represents a year-month interval which is made up of a contiguous subset of the following fields:

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala
@@ -257,6 +257,14 @@ object AgnosticEncoders {
   case class InstantEncoder(override val lenientSerialization: Boolean)
       extends LeafEncoder[Instant](TimestampType)
   case object LocalDateTimeEncoder extends LeafEncoder[LocalDateTime](TimestampNTZType)
+  // Nanosecond-precision counterparts of `LocalDateTimeEncoder` / `InstantEncoder(false)`.
+  // They are used by `RowEncoder` when the schema declares a `TimestampNTZNanosType(p)` or
+  // `TimestampLTZNanosType(p)` column, so Dataset create/collect roundtrips preserve full
+  // nanosecond precision. See SPARK-57033.
+  case class LocalDateTimeNanosEncoder(precision: Int)
+      extends LeafEncoder[LocalDateTime](TimestampNTZNanosType(precision))
+  case class InstantNanosEncoder(precision: Int)
+      extends LeafEncoder[Instant](TimestampLTZNanosType(precision))
   case object LocalTimeEncoder extends LeafEncoder[LocalTime](TimeType())
 
   case class SparkDecimalEncoder(dt: DecimalType) extends LeafEncoder[Decimal](dt)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -21,8 +21,8 @@ import scala.collection.mutable
 import scala.reflect.classTag
 
 import org.apache.spark.sql.{AnalysisException, Row}
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder, CalendarIntervalEncoder, CharEncoder, DateEncoder, DayTimeIntervalEncoder, EncoderField, GeographyEncoder, GeometryEncoder, InstantEncoder, IterableEncoder, JavaDecimalEncoder, LocalDateEncoder, LocalDateTimeEncoder, LocalTimeEncoder, MapEncoder, NullEncoder, RowEncoder => AgnosticRowEncoder, StringEncoder, TimestampEncoder, UDTEncoder, VarcharEncoder, VariantEncoder, YearMonthIntervalEncoder}
-import org.apache.spark.sql.errors.DataTypeErrorsBase
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder, CalendarIntervalEncoder, CharEncoder, DateEncoder, DayTimeIntervalEncoder, EncoderField, GeographyEncoder, GeometryEncoder, InstantEncoder, InstantNanosEncoder, IterableEncoder, JavaDecimalEncoder, LocalDateEncoder, LocalDateTimeEncoder, LocalDateTimeNanosEncoder, LocalTimeEncoder, MapEncoder, NullEncoder, RowEncoder => AgnosticRowEncoder, StringEncoder, TimestampEncoder, UDTEncoder, VarcharEncoder, VariantEncoder, YearMonthIntervalEncoder}
+import org.apache.spark.sql.errors.{DataTypeErrors, DataTypeErrorsBase}
 import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.ops.TypeApiOps
@@ -50,6 +50,8 @@ import org.apache.spark.util.ArrayImplicits._
  *   TimestampType -> java.time.Instant if spark.sql.datetime.java8API.enabled is true
  *
  *   TimestampNTZType -> java.time.LocalDateTime
+ *   TimestampNTZNanosType -> java.time.LocalDateTime
+ *   TimestampLTZNanosType -> java.time.Instant
  *   TimeType -> java.time.LocalTime
  *
  *   DayTimeIntervalType -> java.time.Duration
@@ -97,6 +99,14 @@ object RowEncoder extends DataTypeErrorsBase {
       case TimestampType if SqlApiConf.get.datetimeJava8ApiEnabled => InstantEncoder(lenient)
       case TimestampType => TimestampEncoder(lenient)
       case TimestampNTZType => LocalDateTimeEncoder
+      // Nano timestamp types intentionally do not honor `lenient`: legacy `java.sql.Timestamp` /
+      // `java.sql.Date` external types are out of scope for nanosecond precision (SPARK-57033).
+      case t: TimestampNTZNanosType =>
+        DataTypeErrors.checkTimestampNanosTypesEnabled()
+        LocalDateTimeNanosEncoder(t.precision)
+      case t: TimestampLTZNanosType =>
+        DataTypeErrors.checkTimestampNanosTypesEnabled()
+        InstantNanosEncoder(t.precision)
       case DateType if SqlApiConf.get.datetimeJava8ApiEnabled => LocalDateEncoder(lenient)
       case DateType => DateEncoder(lenient)
       case _: TimeType => LocalTimeEncoder

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.RebaseDateTime.{rebaseGregorianToJulianDays, rebaseGregorianToJulianMicros, rebaseJulianToGregorianDays, rebaseJulianToGregorianMicros}
 import org.apache.spark.sql.errors.ExecutionErrors
 import org.apache.spark.sql.types.{DateType, TimestampType, TimeType}
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{TimestampNanosVal, UTF8String}
 import org.apache.spark.util.SparkClassUtils
 
 trait SparkDateTimeUtils {
@@ -208,6 +208,82 @@ trait SparkDateTimeUtils {
     instantToMicros(localDateTime.toInstant(ZoneOffset.UTC))
   }
 
+  /**
+   * Truncates the sub-microsecond nanosecond part to the given timestamp precision `p` in [7, 9].
+   * Precision 9 keeps all three digits, 8 zeros the last digit, 7 zeros the last two.
+   *
+   * The input is the already-extracted `nanosWithinMicro` component (`0..999`), so truncation is
+   * independent of the epoch sign of the original timestamp value.
+   *
+   * Precisions outside `[7, 9]` are passed through unchanged because the surrounding timestamp
+   * nanos types validate the bound.
+   */
+  private def truncateNanosWithinMicroToPrecision(nanosWithinMicro: Int, precision: Int): Int = {
+    precision match {
+      case 7 => (nanosWithinMicro / 100) * 100
+      case 8 => (nanosWithinMicro / 10) * 10
+      case _ => nanosWithinMicro
+    }
+  }
+
+  /**
+   * Converts a `java.time.LocalDateTime` into the composite `(epochMicros, nanosWithinMicro)`
+   * pair used by `TimestampNTZNanosType(precision)` (interpreted at UTC). `epochMicros` comes
+   * from [[localDateTimeToMicros]] (which is floor toward `-inf` for the integral micro part);
+   * the last three decimal digits of `localDateTime.getNano` (`[0, 999]`) become
+   * `nanosWithinMicro` after dropping `(9 - precision)` low digits.
+   *
+   * Combined, the result is the floor toward `-inf` of the original nanosecond value rounded down
+   * to the precision step (10^(9 - precision) ns). At `precision = 9` the conversion is lossless
+   * within the valid range; at 7 / 8 the lowest 2 / 1 sub-micro digits are dropped. The same
+   * flooring will be the basis of the future `CAST(... AS TIMESTAMP_NTZ(precision))` rule.
+   */
+  def localDateTimeToTimestampNanos(
+      localDateTime: LocalDateTime,
+      precision: Int): TimestampNanosVal = {
+    val epochMicros = localDateTimeToMicros(localDateTime)
+    val rawNanosWithinMicro = localDateTime.getNano % NANOS_PER_MICROS.toInt
+    val nanosWithinMicro = truncateNanosWithinMicroToPrecision(rawNanosWithinMicro, precision)
+    TimestampNanosVal.fromParts(epochMicros, nanosWithinMicro.toShort)
+  }
+
+  /**
+   * Reverse of [[localDateTimeToTimestampNanos]]: rebuilds a `java.time.LocalDateTime` (at UTC)
+   * from a `TimestampNanosVal`. `nanosWithinMicro` is in `[0, 999]` so `plusNanos` never crosses
+   * the second boundary.
+   */
+  def timestampNanosToLocalDateTime(v: TimestampNanosVal): LocalDateTime = {
+    microsToLocalDateTime(v.epochMicros).plusNanos(v.nanosWithinMicro.toLong)
+  }
+
+  /**
+   * Converts a `java.time.Instant` into the composite `(epochMicros, nanosWithinMicro)` pair used
+   * by `TimestampLTZNanosType(precision)`. `epochMicros` comes from [[instantToMicros]] (floor
+   * toward `-inf` for the integral micro part); the last three decimal digits of
+   * `instant.getNano` (`[0, 999]`) become `nanosWithinMicro` after dropping `(9 - precision)` low
+   * digits.
+   *
+   * Combined, the result is the floor toward `-inf` of the original nanosecond value rounded down
+   * to the precision step (10^(9 - precision) ns). At `precision = 9` the conversion is lossless
+   * within the valid range; at 7 / 8 the lowest 2 / 1 sub-micro digits are dropped. The same
+   * flooring will be the basis of the future `CAST(... AS TIMESTAMP_LTZ(precision))` rule.
+   */
+  def instantToTimestampNanos(instant: Instant, precision: Int): TimestampNanosVal = {
+    val epochMicros = instantToMicros(instant)
+    val rawNanosWithinMicro = instant.getNano % NANOS_PER_MICROS.toInt
+    val nanosWithinMicro = truncateNanosWithinMicroToPrecision(rawNanosWithinMicro, precision)
+    TimestampNanosVal.fromParts(epochMicros, nanosWithinMicro.toShort)
+  }
+
+  /**
+   * Reverse of [[instantToTimestampNanos]]: rebuilds a `java.time.Instant` from a
+   * `TimestampNanosVal`. `nanosWithinMicro` is in `[0, 999]` so `plusNanos` never crosses the
+   * second boundary.
+   */
+  def timestampNanosToInstant(v: TimestampNanosVal): Instant = {
+    microsToInstant(v.epochMicros).plusNanos(v.nanosWithinMicro.toLong)
+  }
+
   /**
    * Converts the local date to the number of days since 1970-01-01.
    */

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType._
 import org.apache.spark.sql.types.YearMonthIntervalType._
-import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, UTF8String}
+import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, TimestampNanosVal, UTF8String}
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.collection.Utils
 
@@ -88,6 +88,8 @@ object CatalystTypeConverters {
       case TimestampType if SQLConf.get.datetimeJava8ApiEnabled => InstantConverter
       case TimestampType => TimestampConverter
       case TimestampNTZType => TimestampNTZConverter
+      case t: TimestampNTZNanosType => new TimestampNTZNanosConverter(t)
+      case t: TimestampLTZNanosType => new TimestampLTZNanosConverter(t)
       case dt: DecimalType => new DecimalConverter(dt)
       case BooleanType => BooleanConverter
       case ByteType => ByteConverter
@@ -298,7 +300,7 @@ object CatalystTypeConverters {
         }
         new GenericInternalRow(ar)
       case other => throw new SparkIllegalArgumentException(
-        errorClass = "_LEGACY_ERROR_TEMP_3219",
+        errorClass = "INVALID_EXTERNAL_VALUE",
         messageParameters = scala.collection.immutable.Map(
           "other" -> other.toString,
           "otherClass" -> other.getClass.getCanonicalName,
@@ -357,7 +359,7 @@ object CatalystTypeConverters {
       case chr: Char => UTF8String.fromString(chr.toString)
       case ac: Array[Char] => UTF8String.fromString(String.valueOf(ac))
       case other => throw new SparkIllegalArgumentException(
-        errorClass = "_LEGACY_ERROR_TEMP_3219",
+        errorClass = "INVALID_EXTERNAL_VALUE",
         messageParameters = scala.collection.immutable.Map(
           "other" -> other.toString,
           "otherClass" -> other.getClass.getCanonicalName,
@@ -383,7 +385,7 @@ object CatalystTypeConverters {
       case g: org.apache.spark.sql.types.Geometry if SQLConf.get.geospatialEnabled =>
         STUtils.serializeGeomFromWKB(g, dataType)
       case other => throw new SparkIllegalArgumentException(
-        errorClass = "_LEGACY_ERROR_TEMP_3219",
+        errorClass = "INVALID_EXTERNAL_VALUE",
         messageParameters = scala.collection.immutable.Map(
           "other" -> other.toString,
           "otherClass" -> other.getClass.getCanonicalName,
@@ -408,7 +410,7 @@ object CatalystTypeConverters {
       case g: org.apache.spark.sql.types.Geography if SQLConf.get.geospatialEnabled =>
         STUtils.serializeGeogFromWKB(g, dataType)
       case other => throw new SparkIllegalArgumentException(
-        errorClass = "_LEGACY_ERROR_TEMP_3219",
+        errorClass = "INVALID_EXTERNAL_VALUE",
         messageParameters = scala.collection.immutable.Map(
           "other" -> other.toString,
           "otherClass" -> other.getClass.getCanonicalName,
@@ -432,7 +434,7 @@ object CatalystTypeConverters {
       case d: Date => DateTimeUtils.fromJavaDate(d)
       case l: LocalDate => DateTimeUtils.localDateToDays(l)
       case other => throw new SparkIllegalArgumentException(
-        errorClass = "_LEGACY_ERROR_TEMP_3219",
+        errorClass = "INVALID_EXTERNAL_VALUE",
         messageParameters = scala.collection.immutable.Map(
           "other" -> other.toString,
           "otherClass" -> other.getClass.getCanonicalName,
@@ -472,7 +474,7 @@ object CatalystTypeConverters {
       case t: Timestamp => DateTimeUtils.fromJavaTimestamp(t)
       case i: Instant => DateTimeUtils.instantToMicros(i)
       case other => throw new SparkIllegalArgumentException(
-        errorClass = "_LEGACY_ERROR_TEMP_3219",
+        errorClass = "INVALID_EXTERNAL_VALUE",
         messageParameters = scala.collection.immutable.Map(
           "other" -> other.toString,
           "otherClass" -> other.getClass.getCanonicalName,
@@ -500,7 +502,7 @@ object CatalystTypeConverters {
     override def toCatalystImpl(scalaValue: Any): Any = scalaValue match {
       case l: LocalDateTime => DateTimeUtils.localDateTimeToMicros(l)
       case other => throw new SparkIllegalArgumentException(
-        errorClass = "_LEGACY_ERROR_TEMP_3219",
+        errorClass = "INVALID_EXTERNAL_VALUE",
         messageParameters = scala.collection.immutable.Map(
           "other" -> other.toString,
           "otherClass" -> other.getClass.getCanonicalName,
@@ -515,6 +517,50 @@ object CatalystTypeConverters {
       DateTimeUtils.microsToLocalDateTime(row.getLong(column))
   }
 
+  private class TimestampNTZNanosConverter(dataType: TimestampNTZNanosType)
+    extends CatalystTypeConverter[Any, LocalDateTime, TimestampNanosVal] {
+    override def toCatalystImpl(scalaValue: Any): TimestampNanosVal = scalaValue match {
+      case l: LocalDateTime => DateTimeUtils.localDateTimeToTimestampNanos(l, dataType.precision)
+      case other => throw new SparkIllegalArgumentException(
+        errorClass = "INVALID_EXTERNAL_VALUE",
+        messageParameters = scala.collection.immutable.Map(
+          "other" -> other.toString,
+          "otherClass" -> other.getClass.getCanonicalName,
+          "dataType" -> dataType.sql))
+    }
+
+    override def toScala(catalystValue: TimestampNanosVal): LocalDateTime =
+      if (catalystValue == null) null
+      else DateTimeUtils.timestampNanosToLocalDateTime(catalystValue)
+
+    override def toScalaImpl(row: InternalRow, column: Int): LocalDateTime =
+      DateTimeUtils.timestampNanosToLocalDateTime(row.getTimestampNTZNanos(column))
+  }
+
+  // Always maps `TimestampLTZNanosType` to `java.time.Instant`. Unlike micro `TimestampType`,
+  // the mapping does not consult `spark.sql.datetime.java8API.enabled`: the nanos LTZ type is
+  // post-Java-8 and the legacy `java.sql.Timestamp` external type is intentionally out of scope
+  // here. See SPARK-57033.
+  private class TimestampLTZNanosConverter(dataType: TimestampLTZNanosType)
+    extends CatalystTypeConverter[Any, Instant, TimestampNanosVal] {
+    override def toCatalystImpl(scalaValue: Any): TimestampNanosVal = scalaValue match {
+      case i: Instant => DateTimeUtils.instantToTimestampNanos(i, dataType.precision)
+      case other => throw new SparkIllegalArgumentException(
+        errorClass = "INVALID_EXTERNAL_VALUE",
+        messageParameters = scala.collection.immutable.Map(
+          "other" -> other.toString,
+          "otherClass" -> other.getClass.getCanonicalName,
+          "dataType" -> dataType.sql))
+    }
+
+    override def toScala(catalystValue: TimestampNanosVal): Instant =
+      if (catalystValue == null) null
+      else DateTimeUtils.timestampNanosToInstant(catalystValue)
+
+    override def toScalaImpl(row: InternalRow, column: Int): Instant =
+      DateTimeUtils.timestampNanosToInstant(row.getTimestampLTZNanos(column))
+  }
+
   private class DecimalConverter(dataType: DecimalType)
     extends CatalystTypeConverter[Any, JavaBigDecimal, Decimal] {
 
@@ -527,7 +573,7 @@ object CatalystTypeConverters {
         case d: JavaBigInteger => Decimal(d)
         case d: Decimal => d
         case other => throw new SparkIllegalArgumentException(
-          errorClass = "_LEGACY_ERROR_TEMP_3219",
+          errorClass = "INVALID_EXTERNAL_VALUE",
           messageParameters = scala.collection.immutable.Map(
             "other" -> other.toString,
             "otherClass" -> other.getClass.getCanonicalName,
@@ -655,6 +701,9 @@ object CatalystTypeConverters {
     case ld: LocalDate => LocalDateConverter.toCatalyst(ld)
     case t: LocalTime => TimeConverter.toCatalyst(t)
     case t: Timestamp => TimestampConverter.toCatalyst(t)
+    // SPARK-57033: schema-less convertToCatalyst keeps bare `Instant` / `LocalDateTime` on the
+    // microsecond converters. The nanosecond path is schema-driven only - users opt in via an
+    // explicit `TimestampLTZNanosType` / `TimestampNTZNanosType` column in the schema.
     case i: Instant => InstantConverter.toCatalyst(i)
     case l: LocalDateTime => TimestampNTZConverter.toCatalyst(l)
     case d: BigDecimal =>