From 8fe15d29bac6586452618371be8fd1348607467d Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 16 Jun 2026 22:33:53 +0000 Subject: [PATCH] [SPARK-57496][SQL][BUILD] Keep the Types Framework ops and UDF worker packages out of the published API Move the client-side Types Framework ops (TypeApiOps, TimeTypeApiOps, TimestampNanosTypeApiOps) from org.apache.spark.sql.types.ops to org.apache.spark.sql.catalyst.types.ops. They are internal plumbing (parallel to the server-side TypeOps) but sat inside the public org.apache.spark.sql.types package, leaking into the published API. The catalyst package is already excluded from both the generated docs (ignoreUndocumentedPackages) and MiMa (MimaExcludes), so co-locating the client ops there with the server-side TypeOps keeps them out of the public surface with no new build/MiMa entries. Also exclude org.apache.spark.udf.worker from the generated docs in SparkBuild.scala: it is UDF-worker infrastructure (mostly protobuf- generated *OrBuilder Java plus worker internals) that surfaced as public API. Co-authored-by: Isaac --- project/SparkBuild.scala | 1 + sql/api/src/main/scala/org/apache/spark/sql/Row.scala | 2 +- .../org/apache/spark/sql/catalyst/encoders/RowEncoder.scala | 2 +- .../spark/sql/{ => catalyst}/types/ops/TimeTypeApiOps.scala | 2 +- .../apache/spark/sql/{ => catalyst}/types/ops/TypeApiOps.scala | 2 +- .../src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala | 2 +- .../apache/spark/sql/catalyst/expressions/ToStringBase.scala | 2 +- .../org/apache/spark/sql/catalyst/types/ops/TimeTypeOps.scala | 1 - .../main/scala/org/apache/spark/sql/execution/HiveResult.scala | 2 +- .../org/apache/spark/sql/execution/python/EvaluatePython.scala | 2 +- .../sql/hive/thriftserver/SparkExecuteStatementOperation.scala | 2 +- 11 files changed, 10 insertions(+), 10 deletions(-) rename sql/api/src/main/scala/org/apache/spark/sql/{ => catalyst}/types/ops/TimeTypeApiOps.scala (98%) rename sql/api/src/main/scala/org/apache/spark/sql/{ => catalyst}/types/ops/TypeApiOps.scala (99%) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 729364a31e54c..7c5e49f3a69ec 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1632,6 +1632,7 @@ object Unidoc { .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/kafka010"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/types/variant"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/ui/flamegraph"))) + .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/udf/worker"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/collection"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/io"))) .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/kvstore"))) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Row.scala b/sql/api/src/main/scala/org/apache/spark/sql/Row.scala index 3a0e4d45f937c..89859c0326b29 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/Row.scala @@ -32,12 +32,12 @@ import org.json4s.jackson.JsonMethods.{compact, pretty, render} import org.apache.spark.SparkIllegalArgumentException import org.apache.spark.annotation.{Stable, Unstable} import org.apache.spark.sql.catalyst.expressions.GenericRow +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.catalyst.util.{DateFormatter, SparkDateTimeUtils, TimeFormatter, TimestampFormatter, UDTUtils} import org.apache.spark.sql.errors.DataTypeErrors import org.apache.spark.sql.errors.DataTypeErrors.{toSQLType, toSQLValue} import org.apache.spark.sql.internal.SqlApiConf import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.unsafe.types.CalendarInterval import org.apache.spark.util.ArrayImplicits._ diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala index bad673672188c..2dfdce7c10e6a 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala @@ -22,10 +22,10 @@ import scala.reflect.classTag import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder, CalendarIntervalEncoder, CharEncoder, DateEncoder, DayTimeIntervalEncoder, EncoderField, GeographyEncoder, GeometryEncoder, InstantEncoder, IterableEncoder, JavaDecimalEncoder, LocalDateEncoder, LocalDateTimeEncoder, LocalTimeEncoder, MapEncoder, NullEncoder, RowEncoder => AgnosticRowEncoder, StringEncoder, TimestampEncoder, UDTEncoder, VarcharEncoder, VariantEncoder, YearMonthIntervalEncoder} +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.errors.DataTypeErrorsBase import org.apache.spark.sql.internal.SqlApiConf import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.util.ArrayImplicits._ /** diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimeTypeApiOps.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeApiOps.scala similarity index 98% rename from sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimeTypeApiOps.scala rename to sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeApiOps.scala index dd8f0398aba9c..fbe942c65eb17 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimeTypeApiOps.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeApiOps.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.types.ops +package org.apache.spark.sql.catalyst.types.ops import java.time.LocalTime diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TypeApiOps.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TypeApiOps.scala similarity index 99% rename from sql/api/src/main/scala/org/apache/spark/sql/types/ops/TypeApiOps.scala rename to sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TypeApiOps.scala index fff5b8b6a022e..ac3347efc1abc 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TypeApiOps.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TypeApiOps.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.types.ops +package org.apache.spark.sql.catalyst.types.ops import org.apache.arrow.vector.types.pojo.ArrowType diff --git a/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala index 1c1024fc0152e..f695c079ade40 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala @@ -27,9 +27,9 @@ import org.apache.arrow.vector.types.{DateUnit, FloatingPointPrecision, Interval import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema} import org.apache.spark.SparkException +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.errors.ExecutionErrors import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.util.ArrayImplicits._ private[sql] object ArrowUtils { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala index 04052dafb61ae..d0c7231d63604 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala @@ -22,12 +22,12 @@ import java.time.ZoneOffset import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.catalyst.util.{ArrayData, CharVarcharCodegenUtils, DateFormatter, FractionTimeFormatter, IntervalStringStyles, IntervalUtils, MapData, TimestampFormatter} import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.BinaryOutputStyle import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.unsafe.UTF8StringBuilder import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} import org.apache.spark.util.ArrayImplicits._ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeOps.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeOps.scala index 0cf152079c520..a69581ddd201d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeOps.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/ops/TimeTypeOps.scala @@ -28,7 +28,6 @@ import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalLongType} import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.execution.arrow.{ArrowFieldWriter, TimeWriter} import org.apache.spark.sql.types.{ObjectType, TimeType} -import org.apache.spark.sql.types.ops.TimeTypeApiOps /** * Server-side (catalyst) operations for TimeType. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index 6714510874351..9fb5b960dbf8a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -23,6 +23,7 @@ import java.time._ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.SQLConfHelper import org.apache.spark.sql.catalyst.expressions.ToStringBase +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, FractionTimeFormatter, STUtils, TimeFormatter, TimestampFormatter} import org.apache.spark.sql.catalyst.util.IntervalStringStyles.HIVE_STYLE import org.apache.spark.sql.catalyst.util.IntervalUtils.{durationToMicros, periodToMonths, toDayTimeIntervalString, toYearMonthIntervalString} @@ -31,7 +32,6 @@ import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTab import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.BinaryOutputStyle import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.unsafe.types.{CalendarInterval, VariantVal} import org.apache.spark.util.ArrayImplicits._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala index adee0b2ea19a1..ab5ad5d1270b7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala @@ -29,9 +29,9 @@ import org.apache.spark.api.python.SerDeUtil import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData, STUtils} import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.unsafe.types.{BinaryView, UTF8String, VariantVal} object EvaluatePython { diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index 46302b316b757..591c2727c498e 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -33,11 +33,11 @@ import org.apache.hive.service.rpc.thrift.{TCLIServiceConstants, TColumnDesc, TP import org.apache.spark.internal.{Logging, LogKeys} import org.apache.spark.internal.LogKeys._ import org.apache.spark.sql.{DataFrame, Row, SparkSession} +import org.apache.spark.sql.catalyst.types.ops.TypeApiOps import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution} import org.apache.spark.sql.types._ -import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.util.{Utils => SparkUtils} private[hive] class SparkExecuteStatementOperation(