From f124d170abcd051fc666464b4c9b0b89453514f9 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 16 Jun 2026 22:33:53 +0000 Subject: [PATCH 1/2] [SPARK-57496][SQL][BUILD] Keep the Types Framework ops and UDF worker packages out of the published API Move the client-side Types Framework ops (TypeApiOps, TimeTypeApiOps, TimestampNanosTypeApiOps) from org.apache.spark.sql.types.ops to org.apache.spark.sql.catalyst.types.ops. They are internal plumbing (parallel to the server-side TypeOps) but sat inside the public org.apache.spark.sql.types package, leaking into the published API. The catalyst package is already excluded from both the generated docs (ignoreUndocumentedPackages) and MiMa (MimaExcludes), so co-locating the client ops there with the server-side TypeOps keeps them out of the public surface with no new build/MiMa entries. Also exclude org.apache.spark.udf.worker from the generated docs in SparkBuild.scala: it is UDF-worker infrastructure (mostly protobuf- generated *OrBuilder Java plus worker internals) that surfaced as public API. Co-authored-by: Isaac --- project/SparkBuild.scala | 1 + sql/api/src/main/scala/org/apache/spark/sql/Row.scala | 2 +- .../org/apache/spark/sql/catalyst/encoders/RowEncoder.scala | 2 +- .../spark/sql/{ => catalyst}/types/ops/TimeTypeApiOps.scala | 2 +- .../sql/{ => catalyst}/types/ops/TimestampNanosTypeApiOps.scala | 2 +- .../apache/spark/sql/{ => catalyst}/types/ops/TypeApiOps.scala | 2 +- .../src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala | 2 +- .../apache/spark/sql/catalyst/expressions/ToStringBase.scala | 2 +- .../org/apache/spark/sql/catalyst/types/ops/TimeTypeOps.scala | 1 - .../spark/sql/catalyst/types/ops/TimestampNanosTypeOps.scala | 1 - .../sql/catalyst/types/ops/TimestampNanosTypeOpsSuite.scala | 1 - .../main/scala/org/apache/spark/sql/execution/HiveResult.scala | 2 +- .../org/apache/spark/sql/execution/python/EvaluatePython.scala | 2 +- .../sql/hive/thriftserver/SparkExecuteStatementOperation.scala | 2 +- 14 files changed, 11 insertions(+), 13 deletions(-) rename sql/api/src/main/scala/org/apache/spark/sql/{ => catalyst}/types/ops/TimeTypeApiOps.scala (98%) rename sql/api/src/main/scala/org/apache/spark/sql/{ => catalyst}/types/ops/TimestampNanosTypeApiOps.scala (99%) rename sql/api/src/main/scala/org/apache/spark/sql/{ => catalyst}/types/ops/TypeApiOps.scala (99%) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index e55e8c2cea02b..7f0582de06742 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1657,6 +1657,7 @@ object Unidoc { .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/kafka010"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/types/variant"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/ui/flamegraph"))) + .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/udf/worker"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/collection"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/io"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/kvstore"))) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Row.scala b/sql/api/src/main/scala/org/apache/spark/sql/Row.scala index b6b5c7da96445..091d2300a186a 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/Row.scala @@ -32,12 +32,12 @@ import org.json4s.jackson.JsonMethods.{compact, pretty, render} import org.apache.spark.SparkIllegalArgumentException import org.apache.spark.annotation.{Stable, Unstable} import org.apache.spark.sql.catalyst.expressions.GenericRow +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.catalyst.util.{DateFormatter, SparkDateTimeUtils, TimeFormatter, TimestampFormatter, UDTUtils} import org.apache.spark.sql.errors.DataTypeErrors import org.apache.spark.sql.errors.DataTypeErrors.{toSQLType, toSQLValue} import org.apache.spark.sql.internal.SqlApiConf import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.unsafe.types.CalendarInterval import org.apache.spark.util.ArrayImplicits._ diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala index 2ad579b2cca56..bb33c15ed7230 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala @@ -22,10 +22,10 @@ import scala.reflect.classTag import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder, CalendarIntervalEncoder, CharEncoder, DateEncoder, DayTimeIntervalEncoder, EncoderField, GeographyEncoder, GeometryEncoder, InstantEncoder, IterableEncoder, JavaDecimalEncoder, LocalDateEncoder, LocalDateTimeEncoder, LocalTimeEncoder, MapEncoder, NullEncoder, RowEncoder => AgnosticRowEncoder, StringEncoder, TimestampEncoder, UDTEncoder, VarcharEncoder, VariantEncoder, YearMonthIntervalEncoder} +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.errors.DataTypeErrorsBase import org.apache.spark.sql.internal.SqlApiConf import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.util.ArrayImplicits._ /** diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimeTypeApiOps.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeApiOps.scala similarity index 98% rename from sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimeTypeApiOps.scala rename to sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeApiOps.scala index 7b55961092903..090aa66b74bb4 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimeTypeApiOps.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeApiOps.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.types.ops +package org.apache.spark.sql.catalyst.types.ops import java.time.LocalTime diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimestampNanosTypeApiOps.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimestampNanosTypeApiOps.scala similarity index 99% rename from sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimestampNanosTypeApiOps.scala rename to sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimestampNanosTypeApiOps.scala index 4e050b69471ab..c114cab437596 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimestampNanosTypeApiOps.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimestampNanosTypeApiOps.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.types.ops +package org.apache.spark.sql.catalyst.types.ops import java.time.{Instant, LocalDateTime, ZoneId, ZoneOffset} diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TypeApiOps.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TypeApiOps.scala similarity index 99% rename from sql/api/src/main/scala/org/apache/spark/sql/types/ops/TypeApiOps.scala rename to sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TypeApiOps.scala index c76551aacd84d..728c6ae40cd4b 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TypeApiOps.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TypeApiOps.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.types.ops +package org.apache.spark.sql.catalyst.types.ops import java.time.ZoneId diff --git a/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala index 1c1024fc0152e..f695c079ade40 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala @@ -27,9 +27,9 @@ import org.apache.arrow.vector.types.{DateUnit, FloatingPointPrecision, Interval import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema} import org.apache.spark.SparkException +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.errors.ExecutionErrors import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.util.ArrayImplicits._ private[sql] object ArrowUtils { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala index 65e0b125c2255..4b4a310f7f026 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala @@ -22,12 +22,12 @@ import java.time.ZoneOffset import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.catalyst.util.{ArrayData, CharVarcharCodegenUtils, DateFormatter, FractionTimeFormatter, IntervalStringStyles, IntervalUtils, MapData, TimestampFormatter} import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.BinaryOutputStyle import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.unsafe.UTF8StringBuilder import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} import org.apache.spark.util.ArrayImplicits._ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeOps.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeOps.scala index f4c4b6c5a543c..d1700aad05cf7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeOps.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeOps.scala @@ -30,7 +30,6 @@ import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.arrow.{ArrowFieldWriter, TimeWriter} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{ObjectType, TimeType} -import org.apache.spark.sql.types.ops.TimeTypeApiOps /** * Server-side (catalyst) operations for TimeType. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimestampNanosTypeOps.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimestampNanosTypeOps.scala index 96fe32999b93b..7a264f19573de 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimestampNanosTypeOps.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimestampNanosTypeOps.scala @@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalTimestampLTZNanosType, PhysicalTimestampNTZNanosType} import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.types.{ObjectType, TimestampLTZNanosType, TimestampNTZNanosType} -import org.apache.spark.sql.types.ops.{TimestampLTZNanosTypeApiOps, TimestampNTZNanosTypeApiOps} import org.apache.spark.unsafe.types.TimestampNanosVal /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/types/ops/TimestampNanosTypeOpsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/types/ops/TimestampNanosTypeOpsSuite.scala index 6633416a06454..ae9b1e7b373e8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/types/ops/TimestampNanosTypeOpsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/types/ops/TimestampNanosTypeOpsSuite.scala @@ -30,7 +30,6 @@ import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalTimestampL import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{DataType, TimestampLTZNanosType, TimestampNTZNanosType} -import org.apache.spark.sql.types.ops.{TimestampLTZNanosTypeApiOps, TypeApiOps} import org.apache.spark.unsafe.types.{TimestampNanosVal, UTF8String} /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index 6714510874351..9fb5b960dbf8a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -23,6 +23,7 @@ import java.time._ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.SQLConfHelper import org.apache.spark.sql.catalyst.expressions.ToStringBase +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, FractionTimeFormatter, STUtils, TimeFormatter, TimestampFormatter} import org.apache.spark.sql.catalyst.util.IntervalStringStyles.HIVE_STYLE import org.apache.spark.sql.catalyst.util.IntervalUtils.{durationToMicros, periodToMonths, toDayTimeIntervalString, toYearMonthIntervalString} @@ -31,7 +32,6 @@ import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTab import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.BinaryOutputStyle import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.unsafe.types.{CalendarInterval, VariantVal} import org.apache.spark.util.ArrayImplicits._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala index adee0b2ea19a1..ab5ad5d1270b7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala @@ -29,9 +29,9 @@ import org.apache.spark.api.python.SerDeUtil import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData, STUtils} import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.unsafe.types.{BinaryView, UTF8String, VariantVal} object EvaluatePython { diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index 46302b316b757..591c2727c498e 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -33,11 +33,11 @@ import org.apache.hive.service.rpc.thrift.{TCLIServiceConstants, TColumnDesc, TP import org.apache.spark.internal.{Logging, LogKeys} import org.apache.spark.internal.LogKeys._ import org.apache.spark.sql.{DataFrame, Row, SparkSession} +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution} import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.util.{Utils => SparkUtils} private[hive] class SparkExecuteStatementOperation( From 816f1e6c601d192bbd89f4434e800f13b9329cf0 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 17 Jun 2026 22:08:58 +0000 Subject: [PATCH 2/2] fix: remove stale TypeApiOps import in TypeOps TypeApiOps was moved into the same package (catalyst.types.ops), so the old org.apache.spark.sql.types.ops import is now broken and redundant. Co-authored-by: Isaac --- .../scala/org/apache/spark/sql/catalyst/types/ops/TypeOps.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TypeOps.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TypeOps.scala index b09feb0fb1cf1..ab58336b8ef28 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TypeOps.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TypeOps.scala @@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, MutableVa import org.apache.spark.sql.catalyst.types.PhysicalDataType import org.apache.spark.sql.execution.arrow.ArrowFieldWriter import org.apache.spark.sql.types.{DataType, TimestampLTZNanosType, TimestampNTZNanosType, TimeType} -import org.apache.spark.sql.types.ops.TypeApiOps /** * Server-side (catalyst) type operations for the Types Framework.