From cc738db500003702b1786725fde119cf7ad543de Mon Sep 17 00:00:00 2001 From: Sandro Speh Date: Mon, 20 Apr 2026 12:08:38 +0000 Subject: [PATCH 1/5] [SPARK-55668][SQL] Add post-processing for CDC changelog tables Add analyzer rule and execution primitives for Spark-side CDC post-processing: - `ResolveChangelogTable` analyzer rule: injects carry-over removal and/or update-detection plans above a resolved `DataSourceV2Relation(ChangelogTable)`. - `CarryOverRemoval` logical node + `CarryOverRemovalExec` physical node + `CarryOverIterator`: sort-based removal of identical delete+insert CoW artifacts, keyed by rowId+rowVersion. - Update detection: Window-based relabeling of delete+insert pairs within the same rowId+rowVersion partition to `update_preimage`/`update_postimage`. - `ChangelogTable.validateSchema`: fail-fast validation of required CDC metadata columns, column types, and row-identity requirements. - New error class `INVALID_CHANGELOG_SCHEMA` with four sub-classes (MISSING_COLUMN, INVALID_COLUMN_TYPE, MISSING_ROW_ID, NESTED_ROW_ID). - `InMemoryChangelogCatalog` extended with `ChangelogProperties` to configure post-processing scenarios in tests. - Tests: `ChangelogResolutionSuite` schema-validation cases and `ResolveChangelogTablePostProcessingSuite` end-to-end SQL coverage. --- .../resources/error/error-conditions.json | 28 + .../sql/catalyst/analysis/Analyzer.scala | 1 + .../analysis/ResolveChangelogTable.scala | 242 ++++++ .../plans/logical/CarryOverRemoval.scala | 58 ++ .../sql/errors/QueryCompilationErrors.scala | 38 + .../datasources/v2/ChangelogTable.scala | 48 +- .../catalog/InMemoryChangelogCatalog.scala | 60 +- .../sql/execution/CarryOverIterator.scala | 238 ++++++ .../sql/execution/CarryOverRemovalExec.scala | 85 ++ .../spark/sql/execution/SparkStrategies.scala | 4 + .../connector/ChangelogResolutionSuite.scala | 153 +++- ...lveChangelogTablePostProcessingSuite.scala | 791 ++++++++++++++++++ 12 files changed, 1738 insertions(+), 8 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CarryOverRemoval.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverIterator.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverRemovalExec.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index f62c22b196483..f1e493bf8d704 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3276,6 +3276,34 @@ }, "sqlState" : "42K03" }, + "INVALID_CHANGELOG_SCHEMA" : { + "message" : [ + "The Change Data Capture (CDC) schema returned by connector is invalid." + ], + "subClass" : { + "INVALID_COLUMN_TYPE" : { + "message" : [ + "Column `` has type , expected ." + ] + }, + "MISSING_COLUMN" : { + "message" : [ + "Required column `` is missing." + ] + }, + "MISSING_ROW_ID" : { + "message" : [ + "Connector advertises one or more post-processing properties (`containsCarryoverRows`, `representsUpdateAsDeleteAndInsert`, `containsIntermediateChanges`) that require row identity, but `Changelog.rowId()` returned an empty array. Either set all three to `false`, or return at least one row-id `NamedReference`." + ] + }, + "NESTED_ROW_ID" : { + "message" : [ + "rowId `` is nested. Spark currently requires row identity columns to be top-level columns of the changelog schema. Project the nested field to a top-level column in `Changelog.columns()` and point `Changelog.rowId()` at that column." + ] + } + }, + "sqlState" : "42K03" + }, "INVALID_CLONE_SESSION_REQUEST" : { "message" : [ "Invalid session clone request." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 1224b8ba18a3d..32713b082db77 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -445,6 +445,7 @@ class Analyzer( new ResolveCatalogs(catalogManager) :: ResolveInsertInto :: ResolveRelations :: + ResolveChangelogTable :: ResolvePartitionSpec :: ResolveFieldNameAndPosition :: AddMetadataColumns :: diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala new file mode 100644 index 0000000000000..91e0e63304be7 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.analysis + +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.aggregate.Count +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.trees.TreeNodeTag +import org.apache.spark.sql.connector.catalog.{Changelog, ChangelogInfo} +import org.apache.spark.sql.execution.datasources.v2.{ChangelogTable, DataSourceV2Relation} +import org.apache.spark.sql.types.IntegerType + +/** + * Post-processes a resolved [[DataSourceV2Relation]] backed by a [[ChangelogTable]] to inject + * carry-over removal and/or update detection plans. + * + * Fires after [[ResolveRelations]] has wrapped the connector's [[Changelog]] in a + * [[DataSourceV2Relation]]. + * + * Transformation order: carry-over removal BEFORE update detection. + */ +object ResolveChangelogTable extends Rule[LogicalPlan] { + + private val CHANGELOG_TRANSFORMED_TAG = + TreeNodeTag[Boolean]("changelog_transformed") + + override def apply(plan: LogicalPlan): LogicalPlan = { + if (isAlreadyTransformed(plan)) return plan + var updatedPlan = plan + updatedPlan = plan.resolveOperatorsUp { + case rel @ DataSourceV2Relation(table: ChangelogTable, _, _, _, _, _) => + val changelog = table.changelog + val options = table.changelogInfo + var updatedRel: LogicalPlan = rel + // Transformations in order: 1. carry-over removal, 2. update detection, 3. net changes. + if (options.deduplicationMode() != ChangelogInfo.DeduplicationMode.NONE && + changelog.containsCarryoverRows()) { + updatedRel = injectCarryoverRemoval(rel, changelog) + } + if (options.computeUpdates() && changelog.representsUpdateAsDeleteAndInsert()) { + updatedRel = injectUpdateDetection(updatedRel, changelog) + } + if (options.deduplicationMode() == ChangelogInfo.DeduplicationMode.NET_CHANGES && + changelog.containsIntermediateChanges()) { + updatedRel = injectNetChangeComputation(updatedRel, changelog) + } + updatedRel + } + if (updatedPlan ne plan) { + updatedPlan.setTagValue(CHANGELOG_TRANSFORMED_TAG, true) + } + updatedPlan + } + + // --------------------------------------------------------------------------- + // Guard + // --------------------------------------------------------------------------- + + /** + * Returns true if this plan has already been transformed by this rule. + * Uses a plan-level tag to prevent re-processing on subsequent rule executor iterations. + */ + private def isAlreadyTransformed(plan: LogicalPlan): Boolean = { + plan.getTagValue(CHANGELOG_TRANSFORMED_TAG).getOrElse(false) + } + + // --------------------------------------------------------------------------- + // Update Detection (window-based) + // --------------------------------------------------------------------------- + + /** + * Injects a window-based update detection plan above the given plan. + * + * Adds a Window node partitioned by (rowId, rowVersion) that counts deletes and inserts + * per partition, then rewrites _change_type: + * - insert becomes update_postimage (when both delete and insert exist in same partition) + * - delete becomes update_preimage (when both delete and insert exist in same partition) + * - otherwise keeps original _change_type + * + * Plan shape: + * Project (drop _ins_cnt, _del_cnt) + * |-- Project (rewrite _change_type via CASE WHEN) + * |-- Window (partition by rowId+rowVersion, compute _del_cnt/_ins_cnt) + * |-- [input plan] + * + * @param plan the input plan (DataSourceV2Relation or already-transformed plan) + * @param cl the Changelog providing rowId() and rowVersion() + * @return plan with update detection injected + */ + private def injectUpdateDetection( + plan: LogicalPlan, + cl: Changelog): LogicalPlan = { + val changeTypeAttr = plan.output.find(_.name == "_change_type").get + val rowIdExprs = V2ExpressionUtils.resolveRefs[NamedExpression](cl.rowId().toSeq, plan) + val rowVersionExpr = V2ExpressionUtils.resolveRef[NamedExpression](cl.rowVersion(), plan) + // Layer 1: Window with _del_cnt, _ins_cnt + val insertIf = If( + EqualTo(changeTypeAttr, Literal("insert")), Literal(1), Literal(null, IntegerType)) + val deleteIf = If( + EqualTo(changeTypeAttr, Literal("delete")), Literal(1), Literal(null, IntegerType)) + + val partitionByCols = rowIdExprs ++ Seq(rowVersionExpr) + val windowSpec = WindowSpecDefinition(partitionByCols, Nil, UnspecifiedFrame) + + val insCntAlias = Alias( + WindowExpression(Count(Seq(insertIf)).toAggregateExpression(), windowSpec), "_ins_cnt")() + val delCntAlias = Alias( + WindowExpression(Count(Seq(deleteIf)).toAggregateExpression(), windowSpec), "_del_cnt")() + + val windowNode = Window( + plan.output ++ Seq(delCntAlias, insCntAlias), partitionByCols, Nil, plan) + + // Layer 2: Project rewriting _change_type (preserve exprId!) + val insCntRef = insCntAlias.toAttribute + val delCntRef = delCntAlias.toAttribute + + val isUpdate = And( + GreaterThanOrEqual(delCntRef, Literal(1L)), + GreaterThanOrEqual(insCntRef, Literal(1L)) + ) + + val updateType = If( + EqualTo(changeTypeAttr, Literal("insert")), + Literal("update_postimage"), + Literal("update_preimage") + ) + + val caseExpr = CaseWhen(Seq((isUpdate, updateType)), changeTypeAttr) + val newChangeTypeAlias = Alias(caseExpr, "_change_type")() + val projectList = windowNode.output.map { attr => + if (attr.name == "_change_type") newChangeTypeAlias else attr + } + val layer2 = Project(projectList, windowNode) + + // Layer 3: Project dropping _del_cnt, _ins_cnt + val layer3 = Project( + layer2.output.filter(a => a.name != "_ins_cnt" && a.name != "_del_cnt"), layer2) + layer3 + } + + // --------------------------------------------------------------------------- + // Carry-Over Removal (sort-based) + // --------------------------------------------------------------------------- + + /** + * Injects carry-over removal above the given plan. + * + * Adds Repartition + Sort + CarryOverRemoval that compares consecutive delete+insert + * pairs field-by-field and drops identical pairs. + * + * Plan shape: + * CarryOverRemoval (custom logical node; CarryOverRemovalExec physical node) + * |-- Sort [rowId ASC, rowVersion ASC, _change_type ASC] + * |-- RepartitionByExpression (rowId, rowVersion) + * |-- [input plan] + * + * @param plan the input plan + * @param cl the Changelog providing rowId() and rowVersion() + * @return plan with carry-over removal injected + */ + private def injectCarryoverRemoval( + plan: LogicalPlan, + cl: Changelog): LogicalPlan = { + // 1. Resolve rowId/rowVersion attributes + val changeTypeAttr = plan.output.find(_.name == "_change_type").get + val rowIdExprs = + V2ExpressionUtils.resolveRefs[NamedExpression](cl.rowId().toSeq, plan) + val rowVersionExpr = + V2ExpressionUtils.resolveRef[NamedExpression](cl.rowVersion(), plan) + // 2. RepartitionByExpression + val partitionExpr = rowIdExprs ++ Seq(rowVersionExpr) + val repartitioned = RepartitionByExpression(partitionExpr, plan, None) + // 3. Sort (rowId, rowVersion, _change_type ASC; "delete" sorts before "insert") + val sortOrder = (rowIdExprs ++ Seq(rowVersionExpr, changeTypeAttr)) + .map(e => SortOrder(e, Ascending)) + val sorted = Sort(sortOrder, global = false, repartitioned) + // 4. Build CarryOverRemoval with column NAMES, not ordinals. + // + // TODO(review discussion): We pass column names and resolve them to ordinals at execution + // time in CarryOverRemovalExec.doExecute(). Ordinals computed at analysis time were tried + // first but broke: ColumnPruning / projection rewrites between analysis and physical + // planning can reorder or re-number columns, so analysis-time ordinals pointed at the + // wrong fields by the time the exec node ran. Resolving names against child.output at + // execute time dodges that. Flagging this as a discussion point. @Johan: is + // name-resolution the right idiom here, or is there a cleaner way to pin ordinals + // through the optimizer? + // + // TODO(Sandro): Support nested rowId paths (e.g. Seq("payload", "id")). Spark's + // NamedReference API admits them, and prior commits on this branch (see git log before + // this commit) implemented struct-field extraction in CarryOverRemovalExec plus a + // "keep the parent struct in data comparison" special case in the dataColumnNames + // computation below. Restore from that history when the first real connector needs it. + // The top-level-only restriction is enforced in ChangelogTable.validateSchema, so here + // we can take `fieldNames()(0)` directly. + val rowIdColumnNames = cl.rowId().map(_.fieldNames()(0)).toSeq + val rowVersionColumnName = cl.rowVersion().fieldNames()(0) // e.g. "_commit_version" + + val metadataNames = Set("_change_type", "_commit_version", "_commit_timestamp") + val excluded = metadataNames ++ rowIdColumnNames.toSet + val dataColumnNames = plan.output.map(_.name).filterNot(excluded.contains) + + // TODO(review discussion): pinning every relation attribute via requiredAttributes is a + // blunt instrument: it disables ColumnPruning for the entire subtree rooted at + // CarryOverRemoval. @Johan, is there a more surgical way to keep just the data columns + // alive through the optimizer (e.g. a dedicated trait, or marking the logical node as + // requires-all-output)? For now this is defensive. + val requiredAttrs = plan.output + + CarryOverRemoval(sorted, rowIdColumnNames, rowVersionColumnName, dataColumnNames, requiredAttrs) + } + + // --------------------------------------------------------------------------- + // Net Change Computation TODO + // --------------------------------------------------------------------------- + + /** + * Injects net change computation above the given plan. + * Collapses multiple changes per row identity into the net effect. + */ + private def injectNetChangeComputation( + plan: LogicalPlan, + cl: Changelog): LogicalPlan = { + throw new UnsupportedOperationException( + "Net change computation is not yet supported") + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CarryOverRemoval.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CarryOverRemoval.scala new file mode 100644 index 0000000000000..deced45bf6f07 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CarryOverRemoval.scala @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.plans.logical + +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet} + +/** + * Logical plan node for CDC carry-over removal. + * + * Expects its child to be pre-sorted by (rowId, rowVersion, _change_type ASC) within each + * partition, with data repartitioned by (rowId, rowVersion). + * + * Column references are stored as names (not ordinals) because the optimizer may insert + * projections that change column positions between analysis and execution. + * + * @param child the pre-sorted, repartitioned child plan + * @param rowIdColumnNames top-level names of row identity columns, e.g. Seq("id") or + * Seq("pk1", "pk2") for composite keys. Nested rowId paths are + * rejected at analysis time; see the TODO in + * [[org.apache.spark.sql.catalyst.analysis.ResolveChangelogTable]]. + * @param rowVersionColumnName name of the row version column (e.g. "_commit_version") + * @param dataColumnNames names of data columns for field-by-field comparison + * @param requiredAttributes attributes the optimizer must not project away + */ +case class CarryOverRemoval( + child: LogicalPlan, + rowIdColumnNames: Seq[String], + rowVersionColumnName: String, + dataColumnNames: Seq[String], + requiredAttributes: Seq[Attribute] = Seq.empty) extends UnaryNode { + + override def output: Seq[Attribute] = child.output + + // Tell the optimizer we still need every data column at execution time so ColumnPruning + // doesn't drop columns that CarryOverRemovalExec compares field-by-field. + override def references: AttributeSet = + AttributeSet(requiredAttributes) ++ super.references + + override def maxRows: Option[Long] = child.maxRows + + override protected def withNewChildInternal(newChild: LogicalPlan): CarryOverRemoval = + copy(child = newChild) +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 8464a96ef26c6..19025f4078a7f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -3847,6 +3847,44 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat messageParameters = Map("catalogName" -> catalogName)) } + def changelogMissingColumnError( + changelogName: String, columnName: String): AnalysisException = { + new AnalysisException( + errorClass = "INVALID_CHANGELOG_SCHEMA.MISSING_COLUMN", + messageParameters = Map( + "changelogName" -> changelogName, + "columnName" -> columnName)) + } + + def changelogInvalidColumnTypeError( + changelogName: String, + columnName: String, + expectedType: String, + actualType: String): AnalysisException = { + new AnalysisException( + errorClass = "INVALID_CHANGELOG_SCHEMA.INVALID_COLUMN_TYPE", + messageParameters = Map( + "changelogName" -> changelogName, + "columnName" -> columnName, + "expectedType" -> expectedType, + "actualType" -> actualType)) + } + + def changelogNestedRowIdError( + changelogName: String, rowId: String): AnalysisException = { + new AnalysisException( + errorClass = "INVALID_CHANGELOG_SCHEMA.NESTED_ROW_ID", + messageParameters = Map( + "changelogName" -> changelogName, + "rowId" -> rowId)) + } + + def changelogMissingRowIdError(changelogName: String): AnalysisException = { + new AnalysisException( + errorClass = "INVALID_CHANGELOG_SCHEMA.MISSING_ROW_ID", + messageParameters = Map("changelogName" -> changelogName)) + } + def invalidCdcOptionConflictingRangeTypes(): Throwable = { new AnalysisException( errorClass = "INVALID_CDC_OPTION.CONFLICTING_RANGE_TYPES", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala index 8521df3db2ff0..82cab863ae9b4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala @@ -19,9 +19,12 @@ package org.apache.spark.sql.execution.datasources.v2 import java.util.{EnumSet => JEnumSet, Set => JSet} -import org.apache.spark.sql.connector.catalog.{Changelog, ChangelogInfo, Column, SupportsRead, Table, TableCapability} +import org.apache.spark.sql.connector.catalog.{ + Changelog, ChangelogInfo, Column, SupportsRead, Table, TableCapability} import org.apache.spark.sql.connector.catalog.TableCapability.{BATCH_READ, MICRO_BATCH_READ} import org.apache.spark.sql.connector.read.ScanBuilder +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.types.{DataType, StringType, TimestampType} import org.apache.spark.sql.util.CaseInsensitiveStringMap /** @@ -35,6 +38,11 @@ case class ChangelogTable( changelog: Changelog, changelogInfo: ChangelogInfo) extends Table with SupportsRead { + // Validate the connector returned a schema with the required CDC metadata columns + // and correct types. `_commit_version` is connector-defined per the Changelog contract, + // so its type is not checked. + ChangelogTable.validateSchema(changelog) + override def name: String = changelog.name override def columns: Array[Column] = changelog.columns @@ -45,3 +53,41 @@ case class ChangelogTable( override def capabilities: JSet[TableCapability] = JEnumSet.of(BATCH_READ, MICRO_BATCH_READ) } + +object ChangelogTable { + + def validateSchema(cl: Changelog): Unit = { + val byName = cl.columns.map(c => c.name -> c).toMap + def check(name: String, expected: DataType*): Unit = { + val col = byName.getOrElse(name, + throw QueryCompilationErrors.changelogMissingColumnError(cl.name, name)) + if (expected.nonEmpty && col.dataType != expected.head) { + throw QueryCompilationErrors.changelogInvalidColumnTypeError( + cl.name, name, expected.head.sql, col.dataType.sql) + } + } + check("_change_type", StringType) + check("_commit_version") // connector-defined, any type accepted + check("_commit_timestamp", TimestampType) + // If the connector advertises any property that requires row identity for Spark-side + // post-processing, rowId() must be non-empty. Checked here so bad connectors fail at + // relation construction rather than silently having post-processing skipped. + val needsRowId = + cl.containsCarryoverRows || + cl.representsUpdateAsDeleteAndInsert || + cl.containsIntermediateChanges + if (needsRowId && cl.rowId().isEmpty) { + throw QueryCompilationErrors.changelogMissingRowIdError(cl.name) + } + // TODO(Sandro): relax when nested rowId support is re-added (see ResolveChangelogTable + // for the reasoning and prior-commit pointer). For now, every rowId must resolve to a + // top-level column of the changelog schema. + cl.rowId().foreach { ref => + val parts = ref.fieldNames() + if (parts.length != 1) { + throw QueryCompilationErrors.changelogNestedRowIdError( + cl.name, parts.mkString(".")) + } + } + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala index c47ed2668e3b4..b31715d2aa762 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.ChangelogRange.{TimestampRange, UnboundedRange, VersionRange} +import org.apache.spark.sql.connector.expressions.{FieldReference, NamedReference} import org.apache.spark.sql.connector.read._ import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset} import org.apache.spark.sql.types._ @@ -44,6 +45,22 @@ class InMemoryChangelogCatalog extends InMemoryCatalog { private var _lastChangelogInfo: Option[ChangelogInfo] = None def lastChangelogInfo: Option[ChangelogInfo] = _lastChangelogInfo + // Per-table overrides for Changelog properties (carry-over rows, intermediate changes, + // update representation, row identity). Tests can set these to exercise post-processing. + private val changelogProperties: mutable.Map[String, ChangelogProperties] = + mutable.Map.empty + + /** + * Override the [[Changelog]] properties returned for a given table. + * Defaults are: containsCarryoverRows=false, containsIntermediateChanges=false, + * representsUpdateAsDeleteAndInsert=false, no rowId, no rowVersion. + */ + def setChangelogProperties( + ident: Identifier, + properties: ChangelogProperties): Unit = { + changelogProperties(ident.toString) = properties + } + override def loadChangelog( ident: Identifier, changelogInfo: ChangelogInfo): Changelog = { @@ -58,8 +75,9 @@ class InMemoryChangelogCatalog extends InMemoryCatalog { // _commit_version is at index numDataCols + 1 (after _change_type) val commitVersionIdx = numDataCols + 1 val filtered = filterByRange(allRows.toSeq, commitVersionIdx, changelogInfo.range()) + val props = changelogProperties.getOrElse(ident.toString, ChangelogProperties()) new InMemoryChangelog( - table.name + "_changelog", table.columns, filtered) + table.name + "_changelog", table.columns, filtered, props) } /** @@ -109,15 +127,35 @@ class InMemoryChangelogCatalog extends InMemoryCatalog { } } +/** + * Configurable properties for [[InMemoryChangelog]] that test cases can use to exercise + * Spark's post-processing (carry-over removal, update detection, net changes). + * + * @param containsCarryoverRows whether the change stream may contain identical CoW pairs + * @param containsIntermediateChanges whether multiple changes per row may exist + * @param representsUpdateAsDeleteAndInsert whether updates appear as raw delete+insert + * @param rowIdNames optional row identity columns as top-level names (e.g. Seq("id")) + * @param rowVersionName optional row version column (e.g. Some("_commit_version")) + */ +case class ChangelogProperties( + containsCarryoverRows: Boolean = false, + containsIntermediateChanges: Boolean = false, + representsUpdateAsDeleteAndInsert: Boolean = false, + rowIdNames: Seq[String] = Seq.empty, + rowVersionName: Option[String] = None) + /** * A test [[Changelog]] that returns pre-populated change rows. * - * Reports `containsCarryoverRows = false` so Spark skips carry-over removal. + * Properties (carry-over presence, update representation, row identity) are configurable + * via the [[ChangelogProperties]] parameter so tests can exercise different code paths + * in Spark's post-processing analyzer rule. */ class InMemoryChangelog( tableName: String, dataColumns: Array[Column], - changeRows: Seq[InternalRow]) extends Changelog { + changeRows: Seq[InternalRow], + properties: ChangelogProperties = ChangelogProperties()) extends Changelog { private val cdcColumns: Array[Column] = dataColumns ++ Array( Column.create("_change_type", StringType), @@ -128,11 +166,21 @@ class InMemoryChangelog( override def columns(): Array[Column] = cdcColumns - override def containsCarryoverRows(): Boolean = false + override def containsCarryoverRows(): Boolean = properties.containsCarryoverRows + + override def containsIntermediateChanges(): Boolean = properties.containsIntermediateChanges - override def containsIntermediateChanges(): Boolean = false + override def representsUpdateAsDeleteAndInsert(): Boolean = + properties.representsUpdateAsDeleteAndInsert - override def representsUpdateAsDeleteAndInsert(): Boolean = false + override def rowId(): Array[NamedReference] = { + properties.rowIdNames.map(name => FieldReference.column(name): NamedReference).toArray + } + + override def rowVersion(): NamedReference = properties.rowVersionName match { + case Some(name) => FieldReference.column(name) + case None => super.rowVersion() + } override def newScanBuilder( options: CaseInsensitiveStringMap): ScanBuilder = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverIterator.scala new file mode 100644 index 0000000000000..632c502896717 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverIterator.scala @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.util.TypeUtils +import org.apache.spark.sql.types.{DataType, StructType} + +/** + * Streaming iterator for CDC carry-over removal. + * + * Runs a three-buffer state machine (`pendingDelete`, `pendingEmit`, `pendingNext`; see field + * docs below) over a pre-sorted iterator (sorted by rowId, rowVersion, _change_type ASC), + * comparing consecutive delete+insert pairs with the same (rowId, rowVersion): + * - If all data columns are identical -> carry-over, drop both rows + * - If data columns differ -> real change, emit both rows + * - Unpaired delete or insert -> emit as-is + * + * TODO(@Johan): Input rows may be mutable/reused by the upstream iterator. Any row buffered + * for comparison against the next row is defensively copied via `.copy()`. This is the + * standard DSv2 pattern, but the per-row copy cost is non-trivial on large scans. Confirm + * whether the upstream stage here guarantees non-reused rows (in which case we could drop the + * copy), or whether we should keep the defensive copy. + * + * @param input pre-sorted iterator of InternalRow + * @param rowIdOrdinals column indices for row identity columns (supports composite keys) + * @param rowVersionOrdinal column index for row version comparison + * @param changeTypeOrdinal column index for _change_type (String: "delete" or "insert") + * @param dataOrdinals column indices for data column comparison (field-by-field equality) + * @param schema the output schema for generic data column comparison + */ +class CarryOverIterator( + input: Iterator[InternalRow], + rowIdOrdinals: Array[Int], + rowVersionOrdinal: Int, + changeTypeOrdinal: Int, + dataOrdinals: Array[Int], + schema: StructType) extends Iterator[InternalRow] { + + // State-machine buffers. + // + // pendingDelete: a delete row that has NOT yet decided its fate. Set when we read a delete + // from input; cleared when we either emit it (no paired insert / paired + // with different data) or drop it (paired with identical insert = carry-over). + // + // pendingEmit: the row to return on the next next()/hasNext() call. Serves as the + // "ready" signal: advance() runs until pendingEmit is non-null or input + // is drained. Cleared by next() after emission. + // + // pendingNext: a row read from input that could not be consumed in the current pass and + // must be re-examined on the next advance() tick. Set when the peeked-at + // row turns out to be cross-group (new rowId) or a stray delete. + // + // Invariants at every iteration boundary (hasNext -> advance -> next): + // - At most one of pendingDelete / pendingEmit is set on entry to advance(). + // - pendingNext may be set independently of the other two. + // - advance() exits only when pendingEmit != null OR input is drained AND pendingDelete + // is null AND pendingNext is null. + private var pendingDelete: InternalRow = null + private var pendingEmit: InternalRow = null + private var pendingNext: InternalRow = null + + // Cached at construction: types (for generic InternalRow.get) and type-aware orderings for + // data columns. Orderings are needed because `==` on Array[Byte] / ArrayData / InternalRow + // is reference equality. + private val rowIdTypes: Array[DataType] = + rowIdOrdinals.map(i => schema(i).dataType) + private val rowVersionType: DataType = schema(rowVersionOrdinal).dataType + private val dataColumnTypes: Array[DataType] = + dataOrdinals.map(i => schema(i).dataType) + private val dataColumnOrderings: Array[Ordering[Any]] = + dataColumnTypes.map(TypeUtils.getInterpretedOrdering) + + override def hasNext: Boolean = { + if (pendingEmit != null) return true + advance() + pendingEmit != null + } + + override def next(): InternalRow = { + if (!hasNext) throw new NoSuchElementException + val result = pendingEmit + pendingEmit = null + result + } + + /** + * Drives the carry-over removal state machine forward until `pendingEmit` holds a row to + * return or the input is drained with nothing left to emit. + * + * Each tick handles one of four input states, resolved in this priority order: + * + * (1) `pendingNext` non-null. A row re-queued by a previous tick (cross-group or stray + * delete). If it's a delete, becomes `pendingDelete` and we loop; otherwise it's the + * next thing to emit and we're done. + * + * (2) No `pendingDelete` and input empty. Nothing left to do; exit. + * + * (3) `pendingDelete` non-null and input empty. The buffered delete has no partner and + * must be emitted as-is (real change). + * + * (4) No `pendingDelete` and input has more. Read the next row. If it's a delete, buffer + * it (needs the next row to classify); otherwise emit directly. + * + * (5) `pendingDelete` non-null and input has more. Read the next row and decide: + * - same (rowId, rowVersion) AND `_change_type == insert` AND all data columns equal + * -> CoW carry-over: drop both. + * - same group AND insert AND data differs: real UPDATE-as-delete-insert; emit the + * delete now, re-queue the insert as `pendingNext` for emission next tick. + * - different group OR not an insert: the buffered delete was unpaired; emit it and + * re-queue the peeked row. + * + * Invariant: sort order is (rowId, rowVersion, `_change_type ASC`) so `delete` always + * precedes `insert` within a group; this lets us hold at most one unmatched delete at a + * time without multi-row lookahead. + * + * Termination: every iteration either (a) consumes one row from `input`, (b) resolves a + * buffered state by clearing `pendingNext` or `pendingDelete`, or (c) sets `pendingEmit` + * and exits. No path leaves all three buffers unchanged while `input` still has rows, so + * no infinite loop is possible. + */ + private def advance(): Unit = { + while (pendingEmit == null) { + if (pendingNext != null) { + val row = pendingNext + pendingNext = null + if (getChangeType(row) == "delete") { + pendingDelete = row + } else { + pendingEmit = row + return + } + } + if (pendingDelete == null && !input.hasNext) { + return + } else if (pendingDelete != null && !input.hasNext) { + pendingEmit = pendingDelete + pendingDelete = null + } else if (pendingDelete == null && input.hasNext) { + val row = input.next().copy() + if (getChangeType(row) == "delete") { + pendingDelete = row + } else { + pendingEmit = row + } + } else { + // pendingDelete != null && input.hasNext + val nextRow = input.next().copy() + if (getChangeType(nextRow) == "insert" && sameGroup(pendingDelete, nextRow)) { + if (dataColumnsEqual(pendingDelete, nextRow)) { + // carry-over: drop both + pendingDelete = null + } else { + pendingEmit = pendingDelete + pendingDelete = null + pendingNext = nextRow + } + } else { + // no partner + pendingEmit = pendingDelete + pendingDelete = null + pendingNext = nextRow + } + } + } + } + + /** + * Checks whether two rows have the same rowId and rowVersion values. + * rowId columns are always top-level in `plan.output`; connectors with nested identity + * fields currently have to project them to the top level (see + * [[org.apache.spark.sql.catalyst.analysis.ResolveChangelogTable]] for the restriction + * point and the TODO that tracks re-adding nested support). + */ + private def sameGroup(a: InternalRow, b: InternalRow): Boolean = { + val n = rowIdOrdinals.length + var k = 0 + while (k < n) { + val i = rowIdOrdinals(k) + val aNull = a.isNullAt(i) + val bNull = b.isNullAt(i) + if (aNull != bNull) return false + if (!aNull && a.get(i, rowIdTypes(k)) != b.get(i, rowIdTypes(k))) return false + k += 1 + } + val aNull = a.isNullAt(rowVersionOrdinal) + val bNull = b.isNullAt(rowVersionOrdinal) + if (aNull != bNull) return false + if (aNull) return true + a.get(rowVersionOrdinal, rowVersionType) == b.get(rowVersionOrdinal, rowVersionType) + } + + /** Checks whether two rows have identical data column values via type-aware orderings. */ + private def dataColumnsEqual(a: InternalRow, b: InternalRow): Boolean = { + val n = dataOrdinals.length + var k = 0 + while (k < n) { + val i = dataOrdinals(k) + val aNull = a.isNullAt(i) + val bNull = b.isNullAt(i) + if (aNull != bNull) return false + if (!aNull) { + val dt = dataColumnTypes(k) + if (dataColumnOrderings(k).compare(a.get(i, dt), b.get(i, dt)) != 0) return false + } + k += 1 + } + true + } + + /** + * Returns the _change_type string value from a row. Fails with a descriptive error if + * the column is null; a null _change_type indicates a misbehaving connector. + */ + private def getChangeType(row: InternalRow): String = { + if (row.isNullAt(changeTypeOrdinal)) { + throw new IllegalStateException( + "Connector emitted a change row with null _change_type; " + + "expected 'insert', 'delete', 'update_preimage', or 'update_postimage'.") + } + row.getUTF8String(changeTypeOrdinal).toString + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverRemovalExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverRemovalExec.scala new file mode 100644 index 0000000000000..d7c084d83d848 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverRemovalExec.scala @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.Attribute + +/** + * Physical execution node for CDC carry-over removal. + * + * Expects input pre-sorted by (rowId, rowVersion, _change_type ASC) within each partition. + * Uses [[CarryOverIterator]] to compare consecutive delete+insert pairs and drop identical + * pairs (CoW carry-over artifacts). + * + * All ordinals are resolved dynamically from the actual child output at execution time, + * because the optimizer may insert projections that change column positions. + * + * rowId columns are restricted to top-level columns in the child output. Nested rowId paths + * (e.g. `payload.id`) are rejected at analysis time by + * [[org.apache.spark.sql.catalyst.analysis.ResolveChangelogTable]]. If a future connector + * needs nested row identity, see the TODO there for the strategy and prior-commit pointer. + * + * @param child the pre-sorted child plan + * @param rowIdColumnNames top-level names of row identity columns (supports composite keys) + * @param rowVersionColumnName name of the row version column + * @param dataColumnNames names of data columns for comparison + */ +case class CarryOverRemovalExec( + child: SparkPlan, + rowIdColumnNames: Seq[String], + rowVersionColumnName: String, + dataColumnNames: Seq[String]) extends UnaryExecNode { + + override def output: Seq[Attribute] = child.output + + override protected def doExecute(): RDD[InternalRow] = { + val outputSchema = child.schema + val outputNames = child.output.map(_.name) + + val changeTypeOrdinal = resolveOrdinal(outputNames, "_change_type") + val rowVersionOrdinal = resolveOrdinal(outputNames, rowVersionColumnName) + val rowIdOrdinals = rowIdColumnNames.map(resolveOrdinal(outputNames, _)).toArray + val dataOrdinals = dataColumnNames.map(resolveOrdinal(outputNames, _)).toArray + + val rdd = child.execute() + rdd.mapPartitionsInternal { iter => + new CarryOverIterator( + iter, + rowIdOrdinals, + rowVersionOrdinal, + changeTypeOrdinal, + dataOrdinals, + outputSchema + ) + } + } + + /** Resolves a column name to its ordinal index, throwing a clear error if not found. */ + private def resolveOrdinal(outputNames: Seq[String], name: String): Int = { + val idx = outputNames.indexOf(name) + require(idx >= 0, + s"Column '$name' not found in CarryOverRemovalExec child output: " + + s"${outputNames.mkString(", ")}") + idx + } + + override protected def withNewChildInternal(newChild: SparkPlan): CarryOverRemovalExec = + copy(child = newChild) +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 919c1b97fd0f7..94719293299c6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -941,6 +941,10 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { execution.DeserializeToObjectExec(deserializer, objAttr, planLater(child)) :: Nil case logical.SerializeFromObject(serializer, child) => execution.SerializeFromObjectExec(serializer, planLater(child)) :: Nil + case logical.CarryOverRemoval(child, rowIdColumnNames, rowVersionColumnName, + dataColumnNames, _) => + execution.CarryOverRemovalExec( + planLater(child), rowIdColumnNames, rowVersionColumnName, dataColumnNames) :: Nil case logical.MapPartitions(f, objAttr, child) => execution.MapPartitionsExec(f, objAttr, planLater(child)) :: Nil case logical.MapPartitionsInR(f, p, b, is, os, objAttr, child) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala index 143ca3fe1a09e..a8ee47f8d0581 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala @@ -24,9 +24,12 @@ import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.ChangelogRange +import org.apache.spark.sql.connector.expressions.{FieldReference, NamedReference} +import org.apache.spark.sql.connector.read.ScanBuilder import org.apache.spark.sql.execution.datasources.v2.{ChangelogTable, DataSourceV2Relation} import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types.{LongType, StringType} +import org.apache.spark.sql.types.{IntegerType, LongType, StringType, TimestampType} +import org.apache.spark.sql.util.CaseInsensitiveStringMap /** * Tests for the CDC (Change Data Capture) analyzer resolution path: @@ -203,4 +206,152 @@ class ChangelogResolutionSuite extends QueryTest with SharedSparkSession { assert(range.startingVersion() == "1") assert(range.endingVersion().get() == "5") } + + // =========================================================================== + // Generic changelog schema validation (SPARK-55951 completion) + // =========================================================================== + + private def stubInfo(): ChangelogInfo = new ChangelogInfo( + new ChangelogRange.VersionRange("1", java.util.Optional.of("2"), true, true), + ChangelogInfo.DeduplicationMode.DROP_CARRYOVERS, + false) + + private def cl(name: String, cols: (String, org.apache.spark.sql.types.DataType)*) + : BrokenChangelog = { + new BrokenChangelog(name, cols.map { case (n, t) => Column.create(n, t) }.toArray) + } + + private def missing(columnName: String): Map[String, String] = + Map("changelogName" -> "bad_cl", "columnName" -> columnName) + + private def wrongType(columnName: String, expected: String, actual: String) + : Map[String, String] = Map( + "changelogName" -> "bad_cl", + "columnName" -> columnName, + "expectedType" -> expected, + "actualType" -> actual) + + // Valid metadata tuples; tests swap one of these out to create broken schemas. + private val validChangeType = "_change_type" -> StringType + private val validVersion = "_commit_version" -> LongType + private val validTimestamp = "_commit_timestamp" -> TimestampType + + test("ChangelogTable - missing _change_type column throws") { + checkError( + intercept[AnalysisException] { + ChangelogTable(cl("bad_cl", validVersion, validTimestamp), stubInfo()) + }, + condition = "INVALID_CHANGELOG_SCHEMA.MISSING_COLUMN", + parameters = missing("_change_type")) + } + + test("ChangelogTable - missing _commit_version column throws") { + checkError( + intercept[AnalysisException] { + ChangelogTable(cl("bad_cl", validChangeType, validTimestamp), stubInfo()) + }, + condition = "INVALID_CHANGELOG_SCHEMA.MISSING_COLUMN", + parameters = missing("_commit_version")) + } + + test("ChangelogTable - missing _commit_timestamp column throws") { + checkError( + intercept[AnalysisException] { + ChangelogTable(cl("bad_cl", validChangeType, validVersion), stubInfo()) + }, + condition = "INVALID_CHANGELOG_SCHEMA.MISSING_COLUMN", + parameters = missing("_commit_timestamp")) + } + + test("ChangelogTable - wrong _change_type data type throws") { + checkError( + intercept[AnalysisException] { + ChangelogTable( + cl("bad_cl", "_change_type" -> IntegerType, validVersion, validTimestamp), + stubInfo()) + }, + condition = "INVALID_CHANGELOG_SCHEMA.INVALID_COLUMN_TYPE", + parameters = wrongType("_change_type", "STRING", "INT")) + } + + test("ChangelogTable - wrong _commit_timestamp data type throws") { + checkError( + intercept[AnalysisException] { + ChangelogTable( + cl("bad_cl", validChangeType, validVersion, "_commit_timestamp" -> LongType), + stubInfo()) + }, + condition = "INVALID_CHANGELOG_SCHEMA.INVALID_COLUMN_TYPE", + parameters = wrongType("_commit_timestamp", "TIMESTAMP", "BIGINT")) + } + + test("ChangelogTable - _commit_version type is connector-defined (any type accepted)") { + Seq(IntegerType, LongType, StringType).foreach { versionType => + ChangelogTable( + cl("any_cl", validChangeType, "_commit_version" -> versionType, validTimestamp), + stubInfo()) + } + } + + test("ChangelogTable - valid schema with data columns passes") { + ChangelogTable( + cl("good_cl", "id" -> LongType, "name" -> StringType, + validChangeType, validVersion, validTimestamp), + stubInfo()) + } + + test("ChangelogTable - post-processing flag with empty rowId throws") { + // Connector advertises containsCarryoverRows=true but returns no rowId(). Spark cannot + // post-process without row identity, so relation construction must fail fast. + val bad = new BrokenChangelog( + "no_rowid_cl", + Array( + Column.create("id", LongType), + Column.create("_change_type", StringType), + Column.create("_commit_version", LongType), + Column.create("_commit_timestamp", TimestampType))) { + override def containsCarryoverRows(): Boolean = true + override def rowId(): Array[NamedReference] = Array.empty + } + checkError( + intercept[AnalysisException] { ChangelogTable(bad, stubInfo()) }, + condition = "INVALID_CHANGELOG_SCHEMA.MISSING_ROW_ID", + parameters = Map("changelogName" -> "no_rowid_cl")) + } + + test("ChangelogTable - nested rowId (multi-segment NamedReference) throws") { + // Nested rowId paths (e.g. Seq("payload", "id")) are intentionally rejected for now. + // See the TODO in ResolveChangelogTable for the re-enable plan. + val nested = new BrokenChangelog( + "nested_cl", + Array( + Column.create("payload", LongType), + Column.create("_change_type", StringType), + Column.create("_commit_version", LongType), + Column.create("_commit_timestamp", TimestampType))) { + override def rowId(): Array[NamedReference] = Array(FieldReference(Seq("payload", "id"))) + } + checkError( + intercept[AnalysisException] { ChangelogTable(nested, stubInfo()) }, + condition = "INVALID_CHANGELOG_SCHEMA.NESTED_ROW_ID", + parameters = Map("changelogName" -> "nested_cl", "rowId" -> "payload.id")) + } +} + +/** + * Test-only [[Changelog]] implementation that returns a hand-crafted schema. Used to + * exercise [[ChangelogTable]]'s schema validation without going through a real catalog. + */ +private class BrokenChangelog( + nameArg: String, + cols: Array[Column]) extends Changelog { + override def name(): String = nameArg + override def columns(): Array[Column] = cols + override def containsCarryoverRows(): Boolean = false + override def containsIntermediateChanges(): Boolean = false + override def representsUpdateAsDeleteAndInsert(): Boolean = false + override def rowId(): Array[NamedReference] = Array.empty + override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = { + throw new UnsupportedOperationException("not needed for schema validation tests") + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala new file mode 100644 index 0000000000000..19e550328b51e --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala @@ -0,0 +1,791 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import java.util.Collections + +import org.scalatest.BeforeAndAfterEach + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.connector.catalog.{ + ChangelogProperties, Column, Identifier, InMemoryChangelogCatalog} +import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{ + BinaryType, BooleanType, DoubleType, LongType, StringType} +import org.apache.spark.unsafe.types.UTF8String + +/** + * Tests for [[org.apache.spark.sql.catalyst.analysis.ResolveChangelogTable]] using the + * in-memory changelog catalog. These tests don't depend on Delta or any specific connector; + * they directly control what the connector "returns" by populating the in-memory changelog + * with hand-crafted change rows. + * + * Each test sets up [[ChangelogProperties]] on the catalog to enable specific post-processing + * paths (carry-over removal, update detection) and then verifies that Spark's analyzer rule + * correctly transforms the plan and produces the expected output. + */ +class ResolveChangelogTablePostProcessingSuite + extends QueryTest + with SharedSparkSession + with BeforeAndAfterEach { + + private val catalogName = "cdc_test_catalog" + private val testTableName = "events" + + override def beforeAll(): Unit = { + super.beforeAll() + spark.conf.set( + s"spark.sql.catalog.$catalogName", + classOf[InMemoryChangelogCatalog].getName) + } + + override def beforeEach(): Unit = { + super.beforeEach() + val cat = catalog + val ident = Identifier.of(Array.empty, testTableName) + if (cat.tableExists(ident)) cat.dropTable(ident) + cat.clearChangeRows(ident) + cat.setChangelogProperties(ident, ChangelogProperties()) + cat.createTable( + ident, + Array( + Column.create("id", LongType), + Column.create("name", StringType)), + Array.empty[Transform], + Collections.emptyMap[String, String]()) + } + + private def catalog: InMemoryChangelogCatalog = { + spark.sessionState.catalogManager + .catalog(catalogName) + .asInstanceOf[InMemoryChangelogCatalog] + } + + private def ident = Identifier.of(Array.empty, testTableName) + + /** + * Helper to create a change row matching schema + * (id, name, _change_type, _commit_version, _commit_timestamp). + */ + private def changeRow( + id: Long, + name: String, + changeType: String, + commitVersion: Long, + commitTimestamp: Long = 0L): InternalRow = { + InternalRow( + id, + UTF8String.fromString(name), + UTF8String.fromString(changeType), + commitVersion, + commitTimestamp) + } + + // =========================================================================== + // Carry-Over Removal + // =========================================================================== + + test("carry-over removal drops identical delete+insert pairs") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + // v1: insert Alice and Bob + // v2: real delete Alice, carry-over for Bob (delete + insert with identical data) + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "insert", 1L), + changeRow(1L, "Alice", "delete", 2L), + changeRow(2L, "Bob", "delete", 2L), // carry-over + changeRow(2L, "Bob", "insert", 2L))) // carry-over + + val rows = sql( + s"SELECT id, name, _change_type, _commit_version " + + s"FROM $catalogName.$testTableName CHANGES FROM VERSION 1 TO VERSION 2") + .orderBy("_commit_version", "id", "_change_type") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}") + + // v1 inserts kept + assert(descs.contains("1:Alice:insert:v1")) + assert(descs.contains("2:Bob:insert:v1")) + // Real Alice delete kept + assert(descs.contains("1:Alice:delete:v2")) + // Bob carry-over pair removed + assert(!descs.contains("2:Bob:delete:v2"), + s"Bob delete should be dropped. Got: ${descs.mkString(",")}") + assert(!descs.contains("2:Bob:insert:v2"), + s"Bob insert should be dropped. Got: ${descs.mkString(",")}") + } + + test("deduplicationMode=none keeps all carry-over rows") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "delete", 2L), + changeRow(2L, "Bob", "insert", 2L))) + + val rows = sql( + s"SELECT id FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (deduplicationMode = 'none')") + .collect() + + assert(rows.length == 3, "Without dedup, all 3 raw rows should be returned") + } + + // =========================================================================== + // Update Detection + // =========================================================================== + + test("update detection relabels delete+insert with different data as update") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = false, // no carry-overs in this test + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + // v2: Alice -> Robert (delete old, insert new) + changeRow(1L, "Alice", "delete", 2L), + changeRow(1L, "Robert", "insert", 2L))) + + val rows = sql( + s"SELECT id, name, _change_type, _commit_version " + + s"FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')") + .orderBy("_commit_version", "_change_type") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}") + + assert(descs.contains("1:Alice:insert"), s"v1 insert. Got: ${descs.mkString(",")}") + assert(descs.contains("1:Alice:update_preimage")) + assert(descs.contains("1:Robert:update_postimage")) + // No raw delete/insert at v2 + assert(!descs.contains("1:Alice:delete")) + assert(!descs.contains("1:Robert:insert")) + } + + test("delete and insert in different versions are NOT labeled as update") { + catalog.setChangelogProperties(ident, ChangelogProperties( + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(1L, "Alice", "delete", 2L), + changeRow(1L, "Alice", "insert", 3L))) + + val rows = sql( + s"SELECT _change_type, _commit_version FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 3 " + + s"WITH (computeUpdates = 'true', deduplicationMode = 'none')") + .collect() + + assert(!rows.exists(_.getString(0).contains("update_")), + "Delete and insert in different versions should not be labeled as update") + } + + // =========================================================================== + // No row identity: post-processing skipped + // =========================================================================== + + test("empty rowId skips post-processing in plan") { + // Default ChangelogProperties has no rowId; post-processing must not be injected + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "delete", 2L), + changeRow(2L, "Bob", "insert", 2L))) + + val df = sql( + s"SELECT * FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')") + + val plan = df.queryExecution.analyzed.treeString + assert(!plan.contains("CarryOverRemoval"), + s"Plan should NOT contain CarryOverRemoval without rowId. Plan:\n$plan") + assert(!plan.contains("_ins_cnt"), + s"Plan should NOT contain update detection window without rowId. Plan:\n$plan") + } + + // =========================================================================== + // Combined + // =========================================================================== + + test("carry-over removal and update detection combined") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + // v1: insert Alice, Bob + // v2: Alice carry-over (CoW), Bob real update (Bob -> Robert) + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "insert", 1L), + changeRow(1L, "Alice", "delete", 2L), // carry-over + changeRow(1L, "Alice", "insert", 2L), // carry-over + changeRow(2L, "Bob", "delete", 2L), // update preimage + changeRow(2L, "Robert", "insert", 2L))) // update postimage + + val rows = sql( + s"SELECT id, name, _change_type FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')") + .orderBy("_commit_version", "id", "_change_type") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}").toSet + + // v1 inserts + assert(descs.contains("1:Alice:insert")) + assert(descs.contains("2:Bob:insert")) + // Alice carry-over dropped + assert(!descs.contains("1:Alice:delete")) + // Bob -> Robert as update + assert(descs.contains("2:Bob:update_preimage")) + assert(descs.contains("2:Robert:update_postimage")) + // Should be exactly 4 rows + assert(rows.length == 4, s"Expected 4 rows, got ${rows.length}: ${descs.mkString(",")}") + } + + // =========================================================================== + // computeUpdates default (false) keeps raw delete+insert + // =========================================================================== + + test("without computeUpdates, delete+insert with different data stays raw") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "insert", 1L), + changeRow(1L, "Alice", "delete", 2L), // carry-over (CoW) + changeRow(1L, "Alice", "insert", 2L), // carry-over (CoW) + changeRow(2L, "Bob", "delete", 2L), // real change pre + changeRow(2L, "Robert", "insert", 2L))) // real change post + + // Default computeUpdates=false: do NOT relabel, but DO drop carry-overs + val rows = sql( + s"SELECT id, name, _change_type FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2") + .orderBy("_commit_version", "id", "_change_type") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}") + + assert(descs.contains("2:Bob:delete"), s"Bob delete remains raw. Got: ${descs.mkString(",")}") + assert(descs.contains("2:Robert:insert"), "Robert insert remains raw") + assert(!descs.exists(_.contains("update_")), "No update_* without computeUpdates") + assert(!descs.contains("1:Alice:delete"), "Alice carry-over removed") + } + + test("update detection on pure inserts leaves them as inserts") { + catalog.setChangelogProperties(ident, ChangelogProperties( + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "insert", 2L))) + + val rows = sql( + s"SELECT id, _change_type FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')") + .collect() + + assert(rows.length == 2) + assert(rows.forall(_.getString(1) == "insert"), + s"Pure inserts must stay 'insert'. Got: ${rows.map(_.getString(1)).mkString(",")}") + } + + // Pins the CASE-WHEN default branch in injectUpdateDetection: when a (rowId, rowVersion) + // partition has _del_cnt=1 but _ins_cnt=0, isUpdate is false and _change_type stays 'delete'. + test("update detection keeps delete as-is when partition has no paired insert") { + catalog.setChangelogProperties(ident, ChangelogProperties( + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(1L, "Alice", "delete", 2L))) // unpaired delete at v2 + + val rows = sql( + s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}") + + assert(descs.contains("1:Alice:insert:v1")) + assert(descs.contains("1:Alice:delete:v2"), + s"Unpaired delete must stay 'delete', not become update_preimage. " + + s"Got: ${descs.mkString(",")}") + assert(!descs.exists(_.contains("update_")), + "No update_* labels when no insert partner exists in the partition") + } + + // =========================================================================== + // Range edge cases + // =========================================================================== + + test("single-version range FROM VERSION X TO VERSION X") { + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "insert", 1L), + changeRow(3L, "Charlie", "insert", 2L))) + + val rows = sql( + s"SELECT id, _change_type, _commit_version FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 2 TO VERSION 2 WITH (deduplicationMode = 'none')") + .collect() + + assert(rows.length == 1, s"Single version: 1 row. Got ${rows.length}") + assert(rows(0).getLong(0) == 3L) + assert(rows(0).getString(1) == "insert") + } + + test("multiple operations across versions") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + catalog.addChangeRows(ident, Seq( + // v1: insert 3 rows + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "insert", 1L), + changeRow(3L, "Charlie", "insert", 1L), + // v2: delete Alice; CoW carry-overs for Bob/Charlie + changeRow(1L, "Alice", "delete", 2L), + changeRow(2L, "Bob", "delete", 2L), + changeRow(2L, "Bob", "insert", 2L), + changeRow(3L, "Charlie", "delete", 2L), + changeRow(3L, "Charlie", "insert", 2L), + // v3: update Bob -> Robert; CoW carry-over for Charlie + changeRow(2L, "Bob", "delete", 3L), + changeRow(2L, "Robert", "insert", 3L), + changeRow(3L, "Charlie", "delete", 3L), + changeRow(3L, "Charlie", "insert", 3L), + // v4: insert Diana + changeRow(4L, "Diana", "insert", 4L))) + + val rows = sql( + s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 4 WITH (computeUpdates = 'true')") + .orderBy("_commit_version", "id", "_change_type") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}").toSet + + // v1 + assert(descs.contains("1:Alice:insert:v1")) + assert(descs.contains("2:Bob:insert:v1")) + assert(descs.contains("3:Charlie:insert:v1")) + // v2 + assert(descs.contains("1:Alice:delete:v2")) + assert(!descs.contains("2:Bob:delete:v2"), "Bob carry-over dropped") + assert(!descs.contains("3:Charlie:delete:v2"), "Charlie carry-over dropped") + // v3 + assert(descs.contains("2:Bob:update_preimage:v3")) + assert(descs.contains("2:Robert:update_postimage:v3")) + assert(!descs.contains("3:Charlie:delete:v3"), "Charlie carry-over dropped in v3") + // v4 + assert(descs.contains("4:Diana:insert:v4")) + } + + test("EXCLUSIVE start bound skips the start version") { + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "insert", 2L), + changeRow(3L, "Charlie", "insert", 3L))) + + val rows = sql( + s"SELECT id, _commit_version FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 EXCLUSIVE TO VERSION 3 " + + s"WITH (deduplicationMode = 'none')") + .orderBy("_commit_version") + .collect() + + assert(!rows.exists(_.getLong(1) == 1L), "v1 must be excluded") + assert(rows.exists(_.getLong(0) == 2L), "Bob (v2) included") + assert(rows.exists(_.getLong(0) == 3L), "Charlie (v3) included") + } + + test("open-ended range (no TO clause) reads to latest") { + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "insert", 2L), + changeRow(3L, "Charlie", "insert", 3L))) + + val rows = sql( + s"SELECT id, _commit_version FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 WITH (deduplicationMode = 'none')") + .orderBy("_commit_version", "id") + .collect() + + assert(rows.length == 3, s"Open-ended range should see all 3. Got ${rows.length}") + assert(rows.exists(r => r.getLong(0) == 3L && r.getLong(1) == 3L)) + } + + test("DELETE all rows: no carry-over inserts at v2") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "insert", 1L), + changeRow(1L, "Alice", "delete", 2L), + changeRow(2L, "Bob", "delete", 2L))) + + val rows = sql( + s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2") + .orderBy("_commit_version", "id") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}") + + assert(descs.contains("1:Alice:insert:v1")) + assert(descs.contains("2:Bob:insert:v1")) + assert(descs.contains("1:Alice:delete:v2")) + assert(descs.contains("2:Bob:delete:v2")) + assert(!descs.exists(_.contains("insert:v2")), "No inserts at v2") + } + + test("UPDATE all rows: every row gets update_pre/postimage, no carry-overs") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "insert", 1L), + changeRow(1L, "Alice", "delete", 2L), + changeRow(1L, "Alice_updated", "insert", 2L), + changeRow(2L, "Bob", "delete", 2L), + changeRow(2L, "Bob_updated", "insert", 2L))) + + val rows = sql( + s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')") + .orderBy("_commit_version", "id", "_change_type") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}").toSet + + assert(descs.contains("1:Alice:update_preimage:v2")) + assert(descs.contains("1:Alice_updated:update_postimage:v2")) + assert(descs.contains("2:Bob:update_preimage:v2")) + assert(descs.contains("2:Bob_updated:update_postimage:v2")) + assert(rows.length == 6, s"Expected 2 inserts + 2 pre + 2 post. Got ${rows.length}") + } + + test("carry-over removal with many rows: only real change remains") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + // 10 inserts at v1, then at v2: delete row 5; CoW writes 9 carry-over pairs + 1 real delete + val v1Inserts = (1 to 10).map(i => + changeRow(i.toLong, ('A' + i - 1).toChar.toString, "insert", 1L)) + val v2Carryovers = (1 to 10).filter(_ != 5).flatMap { i => + val name = ('A' + i - 1).toChar.toString + Seq( + changeRow(i.toLong, name, "delete", 2L), + changeRow(i.toLong, name, "insert", 2L)) + } + val v2RealDelete = Seq(changeRow(5L, "E", "delete", 2L)) + catalog.addChangeRows(ident, v1Inserts ++ v2Carryovers ++ v2RealDelete) + + val rows = sql( + s"SELECT id, name, _change_type FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 2 TO VERSION 2") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}") + assert(rows.length == 1, + s"Only 1 real change should remain (9 carry-overs dropped). Got: ${descs.mkString(",")}") + assert(descs.contains("5:E:delete")) + } + + test("carry-over removal with mixed types (DOUBLE, BOOLEAN)") { + val mixedTable = "events_mixed" + val mixedIdent = Identifier.of(Array.empty, mixedTable) + val cat = catalog + if (cat.tableExists(mixedIdent)) cat.dropTable(mixedIdent) + cat.clearChangeRows(mixedIdent) + cat.createTable( + mixedIdent, + Array( + Column.create("id", LongType), + Column.create("name", StringType), + Column.create("score", DoubleType), + Column.create("active", BooleanType)), + Array.empty[Transform], + Collections.emptyMap[String, String]()) + cat.setChangelogProperties(mixedIdent, ChangelogProperties( + containsCarryoverRows = true, + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + def mixedRow( + id: Long, name: String, score: Double, active: Boolean, + ct: String, v: Long): InternalRow = { + InternalRow( + id, UTF8String.fromString(name), score, active, + UTF8String.fromString(ct), v, 0L) + } + + cat.addChangeRows(mixedIdent, Seq( + mixedRow(1L, "Alice", 95.5, true, "insert", 1L), + mixedRow(2L, "Bob", 87.3, false, "insert", 1L), + // v2: update Alice's score; Bob is carry-over + mixedRow(1L, "Alice", 95.5, true, "delete", 2L), + mixedRow(1L, "Alice", 99.0, true, "insert", 2L), + mixedRow(2L, "Bob", 87.3, false, "delete", 2L), // carry-over + mixedRow(2L, "Bob", 87.3, false, "insert", 2L))) // carry-over + + val rows = sql( + s"SELECT id, name, score, active, _change_type FROM $catalogName.$mixedTable " + + s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')") + .orderBy("_commit_version", "id", "_change_type") + .collect() + + val descs = rows.map(r => s"${r.getLong(0)}:${r.getString(4)}") + assert(descs.contains("1:update_preimage")) + assert(descs.contains("1:update_postimage")) + assert(!descs.contains("2:delete"), + s"Bob carry-over must be dropped despite DOUBLE/BOOLEAN. Got: ${descs.mkString(",")}") + + // Verify the score values + val pre = rows.find(r => r.getLong(0) == 1L && r.getString(4) == "update_preimage").get + val post = rows.find(r => r.getLong(0) == 1L && r.getString(4) == "update_postimage").get + assert(pre.getDouble(2) == 95.5) + assert(post.getDouble(2) == 99.0) + } + + // =========================================================================== + // Iterator correctness: state-machine paths + null handling in dataColumnsEqual + // =========================================================================== + + // Pins the "pendingDelete always null, input is all inserts" path of CarryOverIterator. + // Even with containsCarryoverRows=true, a stream of only inserts must pass through + // unchanged; the iterator should never enter the delete/insert pairing branch. + test("carry-over iterator passes append-only stream through unchanged") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "insert", 1L), + changeRow(3L, "Charlie", "insert", 1L))) + + val rows = sql( + s"SELECT id, name, _change_type FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 1") + .orderBy("id") + .collect() + + assert(rows.length == 3, s"All 3 inserts must survive, got ${rows.length}") + assert(rows.forall(_.getString(2) == "insert"), + s"Every row must stay 'insert'. Got: ${rows.map(_.getString(2)).mkString(",")}") + } + + // Pins null handling in CarryOverIterator.dataColumnsEqual: + // - both-null -> equal -> carry-over dropped + // - one-null -> not equal -> both rows survive + test("carry-over removal handles null data values correctly") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + // id=1: data null throughout (both-null carry-over, must be dropped) + // id=2: real change, "Bob" -> null (one-null, must survive as both rows) + catalog.addChangeRows(ident, Seq( + changeRow(1L, null, "insert", 1L), + changeRow(2L, "Bob", "insert", 1L), + changeRow(1L, null, "delete", 2L), // both-null carry-over delete + changeRow(1L, null, "insert", 2L), // both-null carry-over insert (drop pair) + changeRow(2L, "Bob", "delete", 2L), // one-null vs value: real change pre + changeRow(2L, null, "insert", 2L))) // one-null vs value: real change post + + val rows = sql( + s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2") + .orderBy("_commit_version", "id", "_change_type") + .collect() + + // Use a string that makes null visible in the description + val descs = rows.map { r => + val nameStr = if (r.isNullAt(1)) "" else r.getString(1) + s"${r.getLong(0)}:$nameStr:${r.getString(2)}:v${r.getLong(3)}" + } + + // v1 inserts preserved + assert(descs.contains("1::insert:v1"), s"Got: ${descs.mkString(",")}") + assert(descs.contains("2:Bob:insert:v1")) + // id=1 both-null pair dropped as carry-over + assert(!descs.contains("1::delete:v2"), + s"both-null pair must be dropped as carry-over. Got: ${descs.mkString(",")}") + assert(!descs.contains("1::insert:v2"), + s"both-null pair must be dropped as carry-over. Got: ${descs.mkString(",")}") + // id=2 value-vs-null real change survives + assert(descs.contains("2:Bob:delete:v2"), + s"value-vs-null pair must survive. Got: ${descs.mkString(",")}") + assert(descs.contains("2::insert:v2"), + s"value-vs-null pair must survive. Got: ${descs.mkString(",")}") + assert(rows.length == 4, + s"Expected 4 rows (2 v1 inserts + 2 id=2 v2 real change). Got ${rows.length}") + } + + // =========================================================================== + // Regression: BinaryType carry-over (reference-equality bug) + // =========================================================================== + + // Two carry-over rows with equal-by-content but reference-unequal byte arrays. + // Expectation: Bob's delete+insert pair is identical data and must be dropped. + // If `dataColumnsEqual` uses `==` (reference equality) on `Array[Byte]`, this test fails + // because the two payloads are different JVM objects. + test("carry-over removal with BinaryType column drops content-equal pair") { + val binTable = "events_binary" + val binIdent = Identifier.of(Array.empty, binTable) + val cat = catalog + if (cat.tableExists(binIdent)) cat.dropTable(binIdent) + cat.clearChangeRows(binIdent) + cat.createTable( + binIdent, + Array( + Column.create("id", LongType), + Column.create("payload", BinaryType)), + Array.empty[Transform], + Collections.emptyMap[String, String]()) + cat.setChangelogProperties(binIdent, ChangelogProperties( + containsCarryoverRows = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + def binRow(id: Long, payload: Array[Byte], ct: String, v: Long): InternalRow = { + InternalRow(id, payload, UTF8String.fromString(ct), v, 0L) + } + + // Bob's carry-over: two distinct byte[] instances holding identical bytes. + val bobPayloadA = Array[Byte](1, 2, 3, 4) + val bobPayloadB = Array[Byte](1, 2, 3, 4) + assert(bobPayloadA ne bobPayloadB, "test precondition: distinct array instances") + + cat.addChangeRows(binIdent, Seq( + binRow(1L, Array[Byte](9, 9), "insert", 1L), + binRow(2L, bobPayloadA, "insert", 1L), + // v2: real delete for Alice; Bob carry-over with distinct-but-equal byte arrays + binRow(1L, Array[Byte](9, 9), "delete", 2L), + binRow(2L, bobPayloadA, "delete", 2L), // carry-over (same instance as v1 insert) + binRow(2L, bobPayloadB, "insert", 2L))) // carry-over (distinct instance, equal bytes) + + val rows = sql( + s"SELECT id, _change_type, _commit_version FROM $catalogName.$binTable " + + s"CHANGES FROM VERSION 1 TO VERSION 2") + .orderBy("_commit_version", "id", "_change_type") + .collect() + + val descs = rows.map(r => s"${r.getLong(0)}:${r.getString(1)}:v${r.getLong(2)}") + + // v1 inserts retained + assert(descs.contains("1:insert:v1"), s"v1 insert for id=1. Got: ${descs.mkString(",")}") + assert(descs.contains("2:insert:v1"), s"v1 insert for id=2. Got: ${descs.mkString(",")}") + // v2 real delete retained + assert(descs.contains("1:delete:v2"), "Real delete for id=1 retained") + // v2 Bob carry-over must be dropped despite distinct byte[] instances + assert(!descs.contains("2:delete:v2"), + s"Bob BinaryType carry-over delete must be dropped. Got: ${descs.mkString(",")}") + assert(!descs.contains("2:insert:v2"), + s"Bob BinaryType carry-over insert must be dropped. Got: ${descs.mkString(",")}") + } + + // =========================================================================== + // Behavior divergence from getChangeRows: no-op UPDATE + // =========================================================================== + + test("no-op UPDATE (unchanged data) is dropped as carry-over") { + // SPIP B.6: connector-agnostic carry-over removal compares data columns. A no-op + // UPDATE produces byte-identical delete+insert, indistinguishable from a CoW + // carry-over, and is therefore dropped. Legacy getChangeRows would have emitted + // update_preimage/update_postimage here. + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("_commit_version"))) + + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + // v2 no-op update: identical delete + insert + changeRow(1L, "Alice", "delete", 2L), + changeRow(1L, "Alice", "insert", 2L))) + + val rows = sql( + s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')") + .orderBy("_commit_version", "_change_type") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}") + + assert(descs.contains("1:Alice:insert:v1")) + assert(!descs.exists(_.contains("update_")), + "No-op UPDATE indistinguishable from carry-over, so it is dropped") + assert(!descs.contains("1:Alice:delete:v2")) + assert(!descs.contains("1:Alice:insert:v2")) + assert(rows.length == 1, "Only v1 insert remains; v2 no-op UPDATE silently dropped") + } +} From 91136e1c0b0566396afae9b6f5e6ea977c11b2d9 Mon Sep 17 00:00:00 2001 From: Sandro Speh Date: Mon, 20 Apr 2026 13:08:42 +0000 Subject: [PATCH 2/5] Polish --- .../analysis/ResolveChangelogTable.scala | 28 +++---------------- .../datasources/v2/ChangelogTable.scala | 4 +-- .../sql/execution/CarryOverIterator.scala | 16 ++++------- .../sql/execution/CarryOverRemovalExec.scala | 5 ---- .../connector/ChangelogResolutionSuite.scala | 2 +- ...lveChangelogTablePostProcessingSuite.scala | 2 +- 6 files changed, 12 insertions(+), 45 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala index 91e0e63304be7..f5e04ccfd1fd0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala @@ -109,6 +109,7 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { val changeTypeAttr = plan.output.find(_.name == "_change_type").get val rowIdExprs = V2ExpressionUtils.resolveRefs[NamedExpression](cl.rowId().toSeq, plan) val rowVersionExpr = V2ExpressionUtils.resolveRef[NamedExpression](cl.rowVersion(), plan) + // Layer 1: Window with _del_cnt, _ins_cnt val insertIf = If( EqualTo(changeTypeAttr, Literal("insert")), Literal(1), Literal(null, IntegerType)) @@ -190,24 +191,8 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { val sortOrder = (rowIdExprs ++ Seq(rowVersionExpr, changeTypeAttr)) .map(e => SortOrder(e, Ascending)) val sorted = Sort(sortOrder, global = false, repartitioned) - // 4. Build CarryOverRemoval with column NAMES, not ordinals. - // - // TODO(review discussion): We pass column names and resolve them to ordinals at execution - // time in CarryOverRemovalExec.doExecute(). Ordinals computed at analysis time were tried - // first but broke: ColumnPruning / projection rewrites between analysis and physical - // planning can reorder or re-number columns, so analysis-time ordinals pointed at the - // wrong fields by the time the exec node ran. Resolving names against child.output at - // execute time dodges that. Flagging this as a discussion point. @Johan: is - // name-resolution the right idiom here, or is there a cleaner way to pin ordinals - // through the optimizer? - // - // TODO(Sandro): Support nested rowId paths (e.g. Seq("payload", "id")). Spark's - // NamedReference API admits them, and prior commits on this branch (see git log before - // this commit) implemented struct-field extraction in CarryOverRemovalExec plus a - // "keep the parent struct in data comparison" special case in the dataColumnNames - // computation below. Restore from that history when the first real connector needs it. - // The top-level-only restriction is enforced in ChangelogTable.validateSchema, so here - // we can take `fieldNames()(0)` directly. + // 4. Build CarryOverRemoval keyed by column names (ordinals would break under + // ColumnPruning); nested rowId paths are rejected in ChangelogTable.validateSchema. val rowIdColumnNames = cl.rowId().map(_.fieldNames()(0)).toSeq val rowVersionColumnName = cl.rowVersion().fieldNames()(0) // e.g. "_commit_version" @@ -215,18 +200,13 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { val excluded = metadataNames ++ rowIdColumnNames.toSet val dataColumnNames = plan.output.map(_.name).filterNot(excluded.contains) - // TODO(review discussion): pinning every relation attribute via requiredAttributes is a - // blunt instrument: it disables ColumnPruning for the entire subtree rooted at - // CarryOverRemoval. @Johan, is there a more surgical way to keep just the data columns - // alive through the optimizer (e.g. a dedicated trait, or marking the logical node as - // requires-all-output)? For now this is defensive. val requiredAttrs = plan.output CarryOverRemoval(sorted, rowIdColumnNames, rowVersionColumnName, dataColumnNames, requiredAttrs) } // --------------------------------------------------------------------------- - // Net Change Computation TODO + // Net Change Computation // --------------------------------------------------------------------------- /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala index 82cab863ae9b4..3ad5cbf7a005d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala @@ -79,9 +79,7 @@ object ChangelogTable { if (needsRowId && cl.rowId().isEmpty) { throw QueryCompilationErrors.changelogMissingRowIdError(cl.name) } - // TODO(Sandro): relax when nested rowId support is re-added (see ResolveChangelogTable - // for the reasoning and prior-commit pointer). For now, every rowId must resolve to a - // top-level column of the changelog schema. + // Assumption: every rowId is a top-level column (nested paths rejected until supported) cl.rowId().foreach { ref => val parts = ref.fieldNames() if (parts.length != 1) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverIterator.scala index 632c502896717..3f9f09b5d66ab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverIterator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverIterator.scala @@ -31,12 +31,6 @@ import org.apache.spark.sql.types.{DataType, StructType} * - If data columns differ -> real change, emit both rows * - Unpaired delete or insert -> emit as-is * - * TODO(@Johan): Input rows may be mutable/reused by the upstream iterator. Any row buffered - * for comparison against the next row is defensively copied via `.copy()`. This is the - * standard DSv2 pattern, but the per-row copy cost is non-trivial on large scans. Confirm - * whether the upstream stage here guarantees non-reused rows (in which case we could drop the - * copy), or whether we should keep the defensive copy. - * * @param input pre-sorted iterator of InternalRow * @param rowIdOrdinals column indices for row identity columns (supports composite keys) * @param rowVersionOrdinal column index for row version comparison @@ -106,27 +100,27 @@ class CarryOverIterator( * Each tick handles one of four input states, resolved in this priority order: * * (1) `pendingNext` non-null. A row re-queued by a previous tick (cross-group or stray - * delete). If it's a delete, becomes `pendingDelete` and we loop; otherwise it's the + * delete). If it's a delete, becomes `pendingDelete` and we loop. Otherwise it's the * next thing to emit and we're done. * - * (2) No `pendingDelete` and input empty. Nothing left to do; exit. + * (2) No `pendingDelete` and input empty. Nothing left to do -> exit. * * (3) `pendingDelete` non-null and input empty. The buffered delete has no partner and * must be emitted as-is (real change). * * (4) No `pendingDelete` and input has more. Read the next row. If it's a delete, buffer - * it (needs the next row to classify); otherwise emit directly. + * it (needs the next row to classify). Otherwise emit directly. * * (5) `pendingDelete` non-null and input has more. Read the next row and decide: * - same (rowId, rowVersion) AND `_change_type == insert` AND all data columns equal * -> CoW carry-over: drop both. - * - same group AND insert AND data differs: real UPDATE-as-delete-insert; emit the + * - same group AND insert AND data differs: real UPDATE-as-delete-insert -> emit the * delete now, re-queue the insert as `pendingNext` for emission next tick. * - different group OR not an insert: the buffered delete was unpaired; emit it and * re-queue the peeked row. * * Invariant: sort order is (rowId, rowVersion, `_change_type ASC`) so `delete` always - * precedes `insert` within a group; this lets us hold at most one unmatched delete at a + * precedes `insert` within a group. This lets us hold at most one unmatched delete at a * time without multi-row lookahead. * * Termination: every iteration either (a) consumes one row from `input`, (b) resolves a diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverRemovalExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverRemovalExec.scala index d7c084d83d848..e0815b14d48ab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverRemovalExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverRemovalExec.scala @@ -31,11 +31,6 @@ import org.apache.spark.sql.catalyst.expressions.Attribute * All ordinals are resolved dynamically from the actual child output at execution time, * because the optimizer may insert projections that change column positions. * - * rowId columns are restricted to top-level columns in the child output. Nested rowId paths - * (e.g. `payload.id`) are rejected at analysis time by - * [[org.apache.spark.sql.catalyst.analysis.ResolveChangelogTable]]. If a future connector - * needs nested row identity, see the TODO there for the strategy and prior-commit pointer. - * * @param child the pre-sorted child plan * @param rowIdColumnNames top-level names of row identity columns (supports composite keys) * @param rowVersionColumnName name of the row version column diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala index a8ee47f8d0581..b00ba46ac56ef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala @@ -208,7 +208,7 @@ class ChangelogResolutionSuite extends QueryTest with SharedSparkSession { } // =========================================================================== - // Generic changelog schema validation (SPARK-55951 completion) + // Generic changelog schema validation // =========================================================================== private def stubInfo(): ChangelogInfo = new ChangelogInfo( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala index 19e550328b51e..c7c93c74f8ff5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala @@ -756,7 +756,7 @@ class ResolveChangelogTablePostProcessingSuite // =========================================================================== test("no-op UPDATE (unchanged data) is dropped as carry-over") { - // SPIP B.6: connector-agnostic carry-over removal compares data columns. A no-op + // Connector-agnostic carry-over removal compares data columns. A no-op // UPDATE produces byte-identical delete+insert, indistinguishable from a CoW // carry-over, and is therefore dropped. Legacy getChangeRows would have emitted // update_preimage/update_postimage here. From 0865a0c0bbec4fe50a2e40260f4621ae947934a5 Mon Sep 17 00:00:00 2001 From: Sandro Speh Date: Tue, 21 Apr 2026 16:05:08 +0000 Subject: [PATCH 3/5] Simplify carry-over removal by changing rowVersion semantics --- .../analysis/ResolveChangelogTable.scala | 303 +++++++------ .../plans/logical/CarryOverRemoval.scala | 58 --- .../sql/errors/QueryCompilationErrors.scala | 15 - .../datasources/v2/ChangelogTable.scala | 21 +- .../catalog/InMemoryChangelogCatalog.scala | 9 +- .../sql/execution/CarryOverIterator.scala | 232 ---------- .../sql/execution/CarryOverRemovalExec.scala | 80 ---- .../spark/sql/execution/SparkStrategies.scala | 4 - .../connector/ChangelogResolutionSuite.scala | 49 +-- ...lveChangelogTablePostProcessingSuite.scala | 407 +++++++++--------- 10 files changed, 401 insertions(+), 777 deletions(-) delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CarryOverRemoval.scala delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverIterator.scala delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverRemovalExec.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala index f5e04ccfd1fd0..9cc60b5578aa7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate.Count +import org.apache.spark.sql.catalyst.expressions.aggregate.{Count, Max, Min} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreeNodeTag @@ -31,9 +31,14 @@ import org.apache.spark.sql.types.IntegerType * carry-over removal and/or update detection plans. * * Fires after [[ResolveRelations]] has wrapped the connector's [[Changelog]] in a - * [[DataSourceV2Relation]]. + * [[DataSourceV2Relation]]. Skipped entirely when [[Changelog.rowId]] is empty (no row identity + * available; raw delete/insert rows are returned as-is). * - * Transformation order: carry-over removal BEFORE update detection. + * Carry-over removal and update detection are fused into a single pass over a + * (rowId, _commit_version)-partitioned Window: the Filter drops CoW carry-over pairs + * (same rowVersion on both sides) and the subsequent Project relabels real delete+insert + * pairs as update_preimage / update_postimage. Net change computation runs afterwards + * as a separate rule step. */ object ResolveChangelogTable extends Rule[LogicalPlan] { @@ -44,20 +49,29 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { if (isAlreadyTransformed(plan)) return plan var updatedPlan = plan updatedPlan = plan.resolveOperatorsUp { - case rel @ DataSourceV2Relation(table: ChangelogTable, _, _, _, _, _) => + // Guard: without row identity, carry-over removal and update detection cannot + // correctly group rows. A match-miss leaves the relation unchanged -- exactly what + // we want for connectors that surface an empty rowId(). + case rel @ DataSourceV2Relation(table: ChangelogTable, _, _, _, _, _) + if table.changelog.rowId().nonEmpty => val changelog = table.changelog val options = table.changelogInfo + + val requiresCarryOverRemoval = + options.deduplicationMode() != ChangelogInfo.DeduplicationMode.NONE && + changelog.containsCarryoverRows() + val requiresUpdateDetection = + options.computeUpdates() && changelog.representsUpdateAsDeleteAndInsert() + val requiresNetChanges = + options.deduplicationMode() == ChangelogInfo.DeduplicationMode.NET_CHANGES && + changelog.containsIntermediateChanges() + var updatedRel: LogicalPlan = rel - // Transformations in order: 1. carry-over removal, 2. update detection, 3. net changes. - if (options.deduplicationMode() != ChangelogInfo.DeduplicationMode.NONE && - changelog.containsCarryoverRows()) { - updatedRel = injectCarryoverRemoval(rel, changelog) + if (requiresCarryOverRemoval || requiresUpdateDetection) { + updatedRel = addRowLevelPostProcessing( + rel, changelog, requiresCarryOverRemoval, requiresUpdateDetection) } - if (options.computeUpdates() && changelog.representsUpdateAsDeleteAndInsert()) { - updatedRel = injectUpdateDetection(updatedRel, changelog) - } - if (options.deduplicationMode() == ChangelogInfo.DeduplicationMode.NET_CHANGES && - changelog.containsIntermediateChanges()) { + if (requiresNetChanges) { updatedRel = injectNetChangeComputation(updatedRel, changelog) } updatedRel @@ -69,154 +83,191 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { } // --------------------------------------------------------------------------- - // Guard + // Row Level Post Processing (Update Detection & Carry-over Removal) // --------------------------------------------------------------------------- /** - * Returns true if this plan has already been transformed by this rule. - * Uses a plan-level tag to prevent re-processing on subsequent rule executor iterations. + * Adds row-level post-processing (carry-over removal and/or update detection) on top of + * the given plan: + * - both active -> Window(counts + rv bounds) -> Filter -> Project(relabel) -> Drop helpers + * - carry-over only -> Window(counts + rv bounds) -> Filter -> Drop helpers + * - update only -> Window(counts only) -> Project(relabel) -> Drop helpers + * - neither -> plan is returned unchanged (caller guards this case) */ - private def isAlreadyTransformed(plan: LogicalPlan): Boolean = { - plan.getTagValue(CHANGELOG_TRANSFORMED_TAG).getOrElse(false) + private def addRowLevelPostProcessing( + plan: LogicalPlan, + cl: Changelog, + requiresCarryOverRemoval: Boolean, + requiresUpdateDetection: Boolean): LogicalPlan = { + if (requiresCarryOverRemoval && requiresUpdateDetection) { + val planWithWindow = addWindowWithInsertAndDeleteCountsAndRowVersionBounds(plan, cl) + val planWithFilter = addCarryOverPairFilter(planWithWindow) + val planWithProjection = addUpdateRelabelProjection(planWithFilter) + removeHelperColumns(planWithProjection) + } else if (requiresCarryOverRemoval) { + val planWithWindow = addWindowWithInsertAndDeleteCountsAndRowVersionBounds(plan, cl) + val planWithFilter = addCarryOverPairFilter(planWithWindow) + removeHelperColumns(planWithFilter) + } else if (requiresUpdateDetection) { + val planWithWindow = addWindowWithInsertAndDeleteCountsOnly(plan, cl) + val planWithProjection = addUpdateRelabelProjection(planWithWindow) + removeHelperColumns(planWithProjection) + } else { + plan + } } - // --------------------------------------------------------------------------- - // Update Detection (window-based) - // --------------------------------------------------------------------------- - /** - * Injects a window-based update detection plan above the given plan. - * - * Adds a Window node partitioned by (rowId, rowVersion) that counts deletes and inserts - * per partition, then rewrites _change_type: - * - insert becomes update_postimage (when both delete and insert exist in same partition) - * - delete becomes update_preimage (when both delete and insert exist in same partition) - * - otherwise keeps original _change_type - * - * Plan shape: - * Project (drop _ins_cnt, _del_cnt) - * |-- Project (rewrite _change_type via CASE WHEN) - * |-- Window (partition by rowId+rowVersion, compute _del_cnt/_ins_cnt) - * |-- [input plan] - * - * @param plan the input plan (DataSourceV2Relation or already-transformed plan) - * @param cl the Changelog providing rowId() and rowVersion() - * @return plan with update detection injected + * Adds a Window node partitioned by (rowId, _commit_version) that computes + * `_del_cnt` (number of `delete` rows in the partition) and `_ins_cnt` (number of + * `insert` rows in the partition). These counts drive update detection: a partition + * with `_del_cnt >= 1 AND _ins_cnt >= 1` is relabeled as update_preimage / + * update_postimage. */ - private def injectUpdateDetection( - plan: LogicalPlan, - cl: Changelog): LogicalPlan = { - val changeTypeAttr = plan.output.find(_.name == "_change_type").get + private def addWindowWithInsertAndDeleteCountsOnly( + plan: LogicalPlan, cl: Changelog): LogicalPlan = { + val changeTypeAttr = getAttribute(plan, "_change_type") val rowIdExprs = V2ExpressionUtils.resolveRefs[NamedExpression](cl.rowId().toSeq, plan) - val rowVersionExpr = V2ExpressionUtils.resolveRef[NamedExpression](cl.rowVersion(), plan) + val commitVersionAttr = getAttribute(plan, "_commit_version") + val partitionByCols = rowIdExprs ++ Seq(commitVersionAttr) + val windowSpec = WindowSpecDefinition(partitionByCols, Nil, UnspecifiedFrame) - // Layer 1: Window with _del_cnt, _ins_cnt - val insertIf = If( - EqualTo(changeTypeAttr, Literal("insert")), Literal(1), Literal(null, IntegerType)) - val deleteIf = If( - EqualTo(changeTypeAttr, Literal("delete")), Literal(1), Literal(null, IntegerType)) + val insertIf = If(EqualTo(changeTypeAttr, Literal("insert")), + Literal(1), Literal(null, IntegerType)) + val deleteIf = If(EqualTo(changeTypeAttr, Literal("delete")), + Literal(1), Literal(null, IntegerType)) - val partitionByCols = rowIdExprs ++ Seq(rowVersionExpr) + val insCntAlias = Alias(WindowExpression( + Count(Seq(insertIf)).toAggregateExpression(), windowSpec), "_ins_cnt")() + val delCntAlias = Alias(WindowExpression( + Count(Seq(deleteIf)).toAggregateExpression(), windowSpec), "_del_cnt")() + + Window(Seq(delCntAlias, insCntAlias), partitionByCols, Nil, plan) + } + + /** + * Adds a Window node partitioned by (rowId, _commit_version) that computes + * `_del_cnt` (number of `delete` rows in the partition), `_ins_cnt` (number of `insert` + * rows in the partition), `_min_rv` (minimum of the connector's `rowVersion()` + * expression in the partition) and `_max_rv` (maximum of `rowVersion()` in the + * partition). Used whenever carry-over detection is needed: within a delete+insert + * pair, `_min_rv == _max_rv` signals that both sides share the same rowVersion and + * the pair is a CoW carry-over. + */ + private def addWindowWithInsertAndDeleteCountsAndRowVersionBounds( + plan: LogicalPlan, cl: Changelog): LogicalPlan = { + val changeTypeAttr = getAttribute(plan, "_change_type") + val rowIdExprs = V2ExpressionUtils.resolveRefs[NamedExpression](cl.rowId().toSeq, plan) + val commitVersionAttr = getAttribute(plan, "_commit_version") + val rowVersionExpr = V2ExpressionUtils.resolveRef[NamedExpression](cl.rowVersion(), plan) + val partitionByCols = rowIdExprs ++ Seq(commitVersionAttr) val windowSpec = WindowSpecDefinition(partitionByCols, Nil, UnspecifiedFrame) - val insCntAlias = Alias( - WindowExpression(Count(Seq(insertIf)).toAggregateExpression(), windowSpec), "_ins_cnt")() - val delCntAlias = Alias( - WindowExpression(Count(Seq(deleteIf)).toAggregateExpression(), windowSpec), "_del_cnt")() + val insertIf = If(EqualTo(changeTypeAttr, Literal("insert")), + Literal(1), Literal(null, IntegerType)) + val deleteIf = If(EqualTo(changeTypeAttr, Literal("delete")), + Literal(1), Literal(null, IntegerType)) - val windowNode = Window( - plan.output ++ Seq(delCntAlias, insCntAlias), partitionByCols, Nil, plan) + val insCntAlias = Alias(WindowExpression( + Count(Seq(insertIf)).toAggregateExpression(), windowSpec), "_ins_cnt")() + val delCntAlias = Alias(WindowExpression( + Count(Seq(deleteIf)).toAggregateExpression(), windowSpec), "_del_cnt")() + val minRvAlias = Alias(WindowExpression( + Min(rowVersionExpr).toAggregateExpression(), windowSpec), "_min_rv")() + val maxRvAlias = Alias(WindowExpression( + Max(rowVersionExpr).toAggregateExpression(), windowSpec), "_max_rv")() - // Layer 2: Project rewriting _change_type (preserve exprId!) - val insCntRef = insCntAlias.toAttribute - val delCntRef = delCntAlias.toAttribute + Window(Seq(delCntAlias, insCntAlias, minRvAlias, maxRvAlias), + partitionByCols, Nil, plan) + } - val isUpdate = And( - GreaterThanOrEqual(delCntRef, Literal(1L)), - GreaterThanOrEqual(insCntRef, Literal(1L)) - ) + /** + * Adds a Filter node that drops rows belonging to a CoW carry-over pair. + * Expects the input to expose `_del_cnt`, `_ins_cnt`, `_min_rv`, `_max_rv`. + * A pair is a carry-over iff `_del_cnt = 1 AND _ins_cnt = 1 AND _min_rv = _max_rv`. + */ + private def addCarryOverPairFilter(input: LogicalPlan): LogicalPlan = { + val delCnt = getAttribute(input, "_del_cnt") + val insCnt = getAttribute(input, "_ins_cnt") + val minRv = getAttribute(input, "_min_rv") + val maxRv = getAttribute(input, "_max_rv") - val updateType = If( - EqualTo(changeTypeAttr, Literal("insert")), - Literal("update_postimage"), - Literal("update_preimage") - ) + val isCarryoverPair = And( + And(EqualTo(delCnt, Literal(1L)), EqualTo(insCnt, Literal(1L))), + EqualTo(minRv, maxRv)) + Filter(Not(isCarryoverPair), input) + } + + /** + * Adds a Project node that rewrites `_change_type` to `update_preimage` / + * `update_postimage` whenever a delete+insert pair is present in the partition. + * Expects the input to expose `_del_cnt` and `_ins_cnt`. + */ + private def addUpdateRelabelProjection(input: LogicalPlan): LogicalPlan = { + val changeTypeAttr = getAttribute(input, "_change_type") + val delCnt = getAttribute(input, "_del_cnt") + val insCnt = getAttribute(input, "_ins_cnt") + val isUpdate = And( + GreaterThanOrEqual(delCnt, Literal(1L)), + GreaterThanOrEqual(insCnt, Literal(1L))) + val updateType = If(EqualTo(changeTypeAttr, Literal("insert")), + Literal("update_postimage"), Literal("update_preimage")) val caseExpr = CaseWhen(Seq((isUpdate, updateType)), changeTypeAttr) - val newChangeTypeAlias = Alias(caseExpr, "_change_type")() - val projectList = windowNode.output.map { attr => - if (attr.name == "_change_type") newChangeTypeAlias else attr - } - val layer2 = Project(projectList, windowNode) - // Layer 3: Project dropping _del_cnt, _ins_cnt - val layer3 = Project( - layer2.output.filter(a => a.name != "_ins_cnt" && a.name != "_del_cnt"), layer2) - layer3 + val projectList = input.output.map { attr => + if (attr.name == "_change_type") Alias(caseExpr, "_change_type")() + else attr + } + Project(projectList, input) } // --------------------------------------------------------------------------- - // Carry-Over Removal (sort-based) + // Net Change Computation (future) // --------------------------------------------------------------------------- /** - * Injects carry-over removal above the given plan. - * - * Adds Repartition + Sort + CarryOverRemoval that compares consecutive delete+insert - * pairs field-by-field and drops identical pairs. - * - * Plan shape: - * CarryOverRemoval (custom logical node; CarryOverRemovalExec physical node) - * |-- Sort [rowId ASC, rowVersion ASC, _change_type ASC] - * |-- RepartitionByExpression (rowId, rowVersion) - * |-- [input plan] - * - * @param plan the input plan - * @param cl the Changelog providing rowId() and rowVersion() - * @return plan with carry-over removal injected + * Collapses multiple changes per row identity into the net effect. Not in + * scope for this implementation. */ - private def injectCarryoverRemoval( + private def injectNetChangeComputation( plan: LogicalPlan, cl: Changelog): LogicalPlan = { - // 1. Resolve rowId/rowVersion attributes - val changeTypeAttr = plan.output.find(_.name == "_change_type").get - val rowIdExprs = - V2ExpressionUtils.resolveRefs[NamedExpression](cl.rowId().toSeq, plan) - val rowVersionExpr = - V2ExpressionUtils.resolveRef[NamedExpression](cl.rowVersion(), plan) - // 2. RepartitionByExpression - val partitionExpr = rowIdExprs ++ Seq(rowVersionExpr) - val repartitioned = RepartitionByExpression(partitionExpr, plan, None) - // 3. Sort (rowId, rowVersion, _change_type ASC; "delete" sorts before "insert") - val sortOrder = (rowIdExprs ++ Seq(rowVersionExpr, changeTypeAttr)) - .map(e => SortOrder(e, Ascending)) - val sorted = Sort(sortOrder, global = false, repartitioned) - // 4. Build CarryOverRemoval keyed by column names (ordinals would break under - // ColumnPruning); nested rowId paths are rejected in ChangelogTable.validateSchema. - val rowIdColumnNames = cl.rowId().map(_.fieldNames()(0)).toSeq - val rowVersionColumnName = cl.rowVersion().fieldNames()(0) // e.g. "_commit_version" - - val metadataNames = Set("_change_type", "_commit_version", "_commit_timestamp") - val excluded = metadataNames ++ rowIdColumnNames.toSet - val dataColumnNames = plan.output.map(_.name).filterNot(excluded.contains) - - val requiredAttrs = plan.output - - CarryOverRemoval(sorted, rowIdColumnNames, rowVersionColumnName, dataColumnNames, requiredAttrs) + throw new UnsupportedOperationException( + "Net change computation is not yet supported") } // --------------------------------------------------------------------------- - // Net Change Computation + // Helpers // --------------------------------------------------------------------------- /** - * Injects net change computation above the given plan. - * Collapses multiple changes per row identity into the net effect. + * Returns true if this plan has already been transformed by this rule. + * Uses a plan-level tag to prevent re-processing on subsequent rule executor iterations. */ - private def injectNetChangeComputation( - plan: LogicalPlan, - cl: Changelog): LogicalPlan = { - throw new UnsupportedOperationException( - "Net change computation is not yet supported") + private def isAlreadyTransformed(plan: LogicalPlan): Boolean = { + plan.getTagValue(CHANGELOG_TRANSFORMED_TAG).getOrElse(false) } + + /** + * Removes any helper columns (`_del_cnt`, `_ins_cnt`, `_min_rv`, `_max_rv`) that + * earlier steps added to the plan. Helper columns not present in the input are + * silently ignored, so the same method works for all three branches in + * [[addRowLevelPostProcessing]]. + */ + private def removeHelperColumns(input: LogicalPlan): LogicalPlan = { + val helperNames = Set("_del_cnt", "_ins_cnt", "_min_rv", "_max_rv") + Project(input.output.filterNot(a => helperNames.contains(a.name)), input) + } + + /** + * Looks up an attribute by name in a plan's output. Throws a clear error if missing -- + * used for required columns like `_change_type` / `_commit_version` / helper columns + * added by earlier steps; a missing column is always a programming error. + */ + private def getAttribute(plan: LogicalPlan, name: String): Attribute = + plan.output.find(_.name == name).getOrElse( + throw new IllegalStateException( + s"Required column '$name' not found in plan output: " + + plan.output.map(_.name).mkString(", "))) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CarryOverRemoval.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CarryOverRemoval.scala deleted file mode 100644 index deced45bf6f07..0000000000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CarryOverRemoval.scala +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.plans.logical - -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet} - -/** - * Logical plan node for CDC carry-over removal. - * - * Expects its child to be pre-sorted by (rowId, rowVersion, _change_type ASC) within each - * partition, with data repartitioned by (rowId, rowVersion). - * - * Column references are stored as names (not ordinals) because the optimizer may insert - * projections that change column positions between analysis and execution. - * - * @param child the pre-sorted, repartitioned child plan - * @param rowIdColumnNames top-level names of row identity columns, e.g. Seq("id") or - * Seq("pk1", "pk2") for composite keys. Nested rowId paths are - * rejected at analysis time; see the TODO in - * [[org.apache.spark.sql.catalyst.analysis.ResolveChangelogTable]]. - * @param rowVersionColumnName name of the row version column (e.g. "_commit_version") - * @param dataColumnNames names of data columns for field-by-field comparison - * @param requiredAttributes attributes the optimizer must not project away - */ -case class CarryOverRemoval( - child: LogicalPlan, - rowIdColumnNames: Seq[String], - rowVersionColumnName: String, - dataColumnNames: Seq[String], - requiredAttributes: Seq[Attribute] = Seq.empty) extends UnaryNode { - - override def output: Seq[Attribute] = child.output - - // Tell the optimizer we still need every data column at execution time so ColumnPruning - // doesn't drop columns that CarryOverRemovalExec compares field-by-field. - override def references: AttributeSet = - AttributeSet(requiredAttributes) ++ super.references - - override def maxRows: Option[Long] = child.maxRows - - override protected def withNewChildInternal(newChild: LogicalPlan): CarryOverRemoval = - copy(child = newChild) -} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 19025f4078a7f..d824468aada10 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -3870,21 +3870,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat "actualType" -> actualType)) } - def changelogNestedRowIdError( - changelogName: String, rowId: String): AnalysisException = { - new AnalysisException( - errorClass = "INVALID_CHANGELOG_SCHEMA.NESTED_ROW_ID", - messageParameters = Map( - "changelogName" -> changelogName, - "rowId" -> rowId)) - } - - def changelogMissingRowIdError(changelogName: String): AnalysisException = { - new AnalysisException( - errorClass = "INVALID_CHANGELOG_SCHEMA.MISSING_ROW_ID", - messageParameters = Map("changelogName" -> changelogName)) - } - def invalidCdcOptionConflictingRangeTypes(): Throwable = { new AnalysisException( errorClass = "INVALID_CDC_OPTION.CONFLICTING_RANGE_TYPES", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala index 3ad5cbf7a005d..fa445b9123017 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala @@ -19,8 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2 import java.util.{EnumSet => JEnumSet, Set => JSet} -import org.apache.spark.sql.connector.catalog.{ - Changelog, ChangelogInfo, Column, SupportsRead, Table, TableCapability} +import org.apache.spark.sql.connector.catalog.{Changelog, ChangelogInfo, Column, SupportsRead, Table, TableCapability} import org.apache.spark.sql.connector.catalog.TableCapability.{BATCH_READ, MICRO_BATCH_READ} import org.apache.spark.sql.connector.read.ScanBuilder import org.apache.spark.sql.errors.QueryCompilationErrors @@ -69,23 +68,5 @@ object ChangelogTable { check("_change_type", StringType) check("_commit_version") // connector-defined, any type accepted check("_commit_timestamp", TimestampType) - // If the connector advertises any property that requires row identity for Spark-side - // post-processing, rowId() must be non-empty. Checked here so bad connectors fail at - // relation construction rather than silently having post-processing skipped. - val needsRowId = - cl.containsCarryoverRows || - cl.representsUpdateAsDeleteAndInsert || - cl.containsIntermediateChanges - if (needsRowId && cl.rowId().isEmpty) { - throw QueryCompilationErrors.changelogMissingRowIdError(cl.name) - } - // Assumption: every rowId is a top-level column (nested paths rejected until supported) - cl.rowId().foreach { ref => - val parts = ref.fieldNames() - if (parts.length != 1) { - throw QueryCompilationErrors.changelogNestedRowIdError( - cl.name, parts.mkString(".")) - } - } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala index b31715d2aa762..ff3baf16065d8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala @@ -135,6 +135,8 @@ class InMemoryChangelogCatalog extends InMemoryCatalog { * @param containsIntermediateChanges whether multiple changes per row may exist * @param representsUpdateAsDeleteAndInsert whether updates appear as raw delete+insert * @param rowIdNames optional row identity columns as top-level names (e.g. Seq("id")) + * @param rowIdPaths optional row identity paths for nested struct fields + * (e.g. Seq(Seq("payload", "id"))); takes precedence over rowIdNames * @param rowVersionName optional row version column (e.g. Some("_commit_version")) */ case class ChangelogProperties( @@ -142,6 +144,7 @@ case class ChangelogProperties( containsIntermediateChanges: Boolean = false, representsUpdateAsDeleteAndInsert: Boolean = false, rowIdNames: Seq[String] = Seq.empty, + rowIdPaths: Seq[Seq[String]] = Seq.empty, rowVersionName: Option[String] = None) /** @@ -174,7 +177,11 @@ class InMemoryChangelog( properties.representsUpdateAsDeleteAndInsert override def rowId(): Array[NamedReference] = { - properties.rowIdNames.map(name => FieldReference.column(name): NamedReference).toArray + if (properties.rowIdPaths.nonEmpty) { + properties.rowIdPaths.map(parts => FieldReference(parts): NamedReference).toArray + } else { + properties.rowIdNames.map(name => FieldReference.column(name): NamedReference).toArray + } } override def rowVersion(): NamedReference = properties.rowVersionName match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverIterator.scala deleted file mode 100644 index 3f9f09b5d66ab..0000000000000 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverIterator.scala +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution - -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.util.TypeUtils -import org.apache.spark.sql.types.{DataType, StructType} - -/** - * Streaming iterator for CDC carry-over removal. - * - * Runs a three-buffer state machine (`pendingDelete`, `pendingEmit`, `pendingNext`; see field - * docs below) over a pre-sorted iterator (sorted by rowId, rowVersion, _change_type ASC), - * comparing consecutive delete+insert pairs with the same (rowId, rowVersion): - * - If all data columns are identical -> carry-over, drop both rows - * - If data columns differ -> real change, emit both rows - * - Unpaired delete or insert -> emit as-is - * - * @param input pre-sorted iterator of InternalRow - * @param rowIdOrdinals column indices for row identity columns (supports composite keys) - * @param rowVersionOrdinal column index for row version comparison - * @param changeTypeOrdinal column index for _change_type (String: "delete" or "insert") - * @param dataOrdinals column indices for data column comparison (field-by-field equality) - * @param schema the output schema for generic data column comparison - */ -class CarryOverIterator( - input: Iterator[InternalRow], - rowIdOrdinals: Array[Int], - rowVersionOrdinal: Int, - changeTypeOrdinal: Int, - dataOrdinals: Array[Int], - schema: StructType) extends Iterator[InternalRow] { - - // State-machine buffers. - // - // pendingDelete: a delete row that has NOT yet decided its fate. Set when we read a delete - // from input; cleared when we either emit it (no paired insert / paired - // with different data) or drop it (paired with identical insert = carry-over). - // - // pendingEmit: the row to return on the next next()/hasNext() call. Serves as the - // "ready" signal: advance() runs until pendingEmit is non-null or input - // is drained. Cleared by next() after emission. - // - // pendingNext: a row read from input that could not be consumed in the current pass and - // must be re-examined on the next advance() tick. Set when the peeked-at - // row turns out to be cross-group (new rowId) or a stray delete. - // - // Invariants at every iteration boundary (hasNext -> advance -> next): - // - At most one of pendingDelete / pendingEmit is set on entry to advance(). - // - pendingNext may be set independently of the other two. - // - advance() exits only when pendingEmit != null OR input is drained AND pendingDelete - // is null AND pendingNext is null. - private var pendingDelete: InternalRow = null - private var pendingEmit: InternalRow = null - private var pendingNext: InternalRow = null - - // Cached at construction: types (for generic InternalRow.get) and type-aware orderings for - // data columns. Orderings are needed because `==` on Array[Byte] / ArrayData / InternalRow - // is reference equality. - private val rowIdTypes: Array[DataType] = - rowIdOrdinals.map(i => schema(i).dataType) - private val rowVersionType: DataType = schema(rowVersionOrdinal).dataType - private val dataColumnTypes: Array[DataType] = - dataOrdinals.map(i => schema(i).dataType) - private val dataColumnOrderings: Array[Ordering[Any]] = - dataColumnTypes.map(TypeUtils.getInterpretedOrdering) - - override def hasNext: Boolean = { - if (pendingEmit != null) return true - advance() - pendingEmit != null - } - - override def next(): InternalRow = { - if (!hasNext) throw new NoSuchElementException - val result = pendingEmit - pendingEmit = null - result - } - - /** - * Drives the carry-over removal state machine forward until `pendingEmit` holds a row to - * return or the input is drained with nothing left to emit. - * - * Each tick handles one of four input states, resolved in this priority order: - * - * (1) `pendingNext` non-null. A row re-queued by a previous tick (cross-group or stray - * delete). If it's a delete, becomes `pendingDelete` and we loop. Otherwise it's the - * next thing to emit and we're done. - * - * (2) No `pendingDelete` and input empty. Nothing left to do -> exit. - * - * (3) `pendingDelete` non-null and input empty. The buffered delete has no partner and - * must be emitted as-is (real change). - * - * (4) No `pendingDelete` and input has more. Read the next row. If it's a delete, buffer - * it (needs the next row to classify). Otherwise emit directly. - * - * (5) `pendingDelete` non-null and input has more. Read the next row and decide: - * - same (rowId, rowVersion) AND `_change_type == insert` AND all data columns equal - * -> CoW carry-over: drop both. - * - same group AND insert AND data differs: real UPDATE-as-delete-insert -> emit the - * delete now, re-queue the insert as `pendingNext` for emission next tick. - * - different group OR not an insert: the buffered delete was unpaired; emit it and - * re-queue the peeked row. - * - * Invariant: sort order is (rowId, rowVersion, `_change_type ASC`) so `delete` always - * precedes `insert` within a group. This lets us hold at most one unmatched delete at a - * time without multi-row lookahead. - * - * Termination: every iteration either (a) consumes one row from `input`, (b) resolves a - * buffered state by clearing `pendingNext` or `pendingDelete`, or (c) sets `pendingEmit` - * and exits. No path leaves all three buffers unchanged while `input` still has rows, so - * no infinite loop is possible. - */ - private def advance(): Unit = { - while (pendingEmit == null) { - if (pendingNext != null) { - val row = pendingNext - pendingNext = null - if (getChangeType(row) == "delete") { - pendingDelete = row - } else { - pendingEmit = row - return - } - } - if (pendingDelete == null && !input.hasNext) { - return - } else if (pendingDelete != null && !input.hasNext) { - pendingEmit = pendingDelete - pendingDelete = null - } else if (pendingDelete == null && input.hasNext) { - val row = input.next().copy() - if (getChangeType(row) == "delete") { - pendingDelete = row - } else { - pendingEmit = row - } - } else { - // pendingDelete != null && input.hasNext - val nextRow = input.next().copy() - if (getChangeType(nextRow) == "insert" && sameGroup(pendingDelete, nextRow)) { - if (dataColumnsEqual(pendingDelete, nextRow)) { - // carry-over: drop both - pendingDelete = null - } else { - pendingEmit = pendingDelete - pendingDelete = null - pendingNext = nextRow - } - } else { - // no partner - pendingEmit = pendingDelete - pendingDelete = null - pendingNext = nextRow - } - } - } - } - - /** - * Checks whether two rows have the same rowId and rowVersion values. - * rowId columns are always top-level in `plan.output`; connectors with nested identity - * fields currently have to project them to the top level (see - * [[org.apache.spark.sql.catalyst.analysis.ResolveChangelogTable]] for the restriction - * point and the TODO that tracks re-adding nested support). - */ - private def sameGroup(a: InternalRow, b: InternalRow): Boolean = { - val n = rowIdOrdinals.length - var k = 0 - while (k < n) { - val i = rowIdOrdinals(k) - val aNull = a.isNullAt(i) - val bNull = b.isNullAt(i) - if (aNull != bNull) return false - if (!aNull && a.get(i, rowIdTypes(k)) != b.get(i, rowIdTypes(k))) return false - k += 1 - } - val aNull = a.isNullAt(rowVersionOrdinal) - val bNull = b.isNullAt(rowVersionOrdinal) - if (aNull != bNull) return false - if (aNull) return true - a.get(rowVersionOrdinal, rowVersionType) == b.get(rowVersionOrdinal, rowVersionType) - } - - /** Checks whether two rows have identical data column values via type-aware orderings. */ - private def dataColumnsEqual(a: InternalRow, b: InternalRow): Boolean = { - val n = dataOrdinals.length - var k = 0 - while (k < n) { - val i = dataOrdinals(k) - val aNull = a.isNullAt(i) - val bNull = b.isNullAt(i) - if (aNull != bNull) return false - if (!aNull) { - val dt = dataColumnTypes(k) - if (dataColumnOrderings(k).compare(a.get(i, dt), b.get(i, dt)) != 0) return false - } - k += 1 - } - true - } - - /** - * Returns the _change_type string value from a row. Fails with a descriptive error if - * the column is null; a null _change_type indicates a misbehaving connector. - */ - private def getChangeType(row: InternalRow): String = { - if (row.isNullAt(changeTypeOrdinal)) { - throw new IllegalStateException( - "Connector emitted a change row with null _change_type; " + - "expected 'insert', 'delete', 'update_preimage', or 'update_postimage'.") - } - row.getUTF8String(changeTypeOrdinal).toString - } -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverRemovalExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverRemovalExec.scala deleted file mode 100644 index e0815b14d48ab..0000000000000 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CarryOverRemovalExec.scala +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution - -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.Attribute - -/** - * Physical execution node for CDC carry-over removal. - * - * Expects input pre-sorted by (rowId, rowVersion, _change_type ASC) within each partition. - * Uses [[CarryOverIterator]] to compare consecutive delete+insert pairs and drop identical - * pairs (CoW carry-over artifacts). - * - * All ordinals are resolved dynamically from the actual child output at execution time, - * because the optimizer may insert projections that change column positions. - * - * @param child the pre-sorted child plan - * @param rowIdColumnNames top-level names of row identity columns (supports composite keys) - * @param rowVersionColumnName name of the row version column - * @param dataColumnNames names of data columns for comparison - */ -case class CarryOverRemovalExec( - child: SparkPlan, - rowIdColumnNames: Seq[String], - rowVersionColumnName: String, - dataColumnNames: Seq[String]) extends UnaryExecNode { - - override def output: Seq[Attribute] = child.output - - override protected def doExecute(): RDD[InternalRow] = { - val outputSchema = child.schema - val outputNames = child.output.map(_.name) - - val changeTypeOrdinal = resolveOrdinal(outputNames, "_change_type") - val rowVersionOrdinal = resolveOrdinal(outputNames, rowVersionColumnName) - val rowIdOrdinals = rowIdColumnNames.map(resolveOrdinal(outputNames, _)).toArray - val dataOrdinals = dataColumnNames.map(resolveOrdinal(outputNames, _)).toArray - - val rdd = child.execute() - rdd.mapPartitionsInternal { iter => - new CarryOverIterator( - iter, - rowIdOrdinals, - rowVersionOrdinal, - changeTypeOrdinal, - dataOrdinals, - outputSchema - ) - } - } - - /** Resolves a column name to its ordinal index, throwing a clear error if not found. */ - private def resolveOrdinal(outputNames: Seq[String], name: String): Int = { - val idx = outputNames.indexOf(name) - require(idx >= 0, - s"Column '$name' not found in CarryOverRemovalExec child output: " + - s"${outputNames.mkString(", ")}") - idx - } - - override protected def withNewChildInternal(newChild: SparkPlan): CarryOverRemovalExec = - copy(child = newChild) -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 94719293299c6..919c1b97fd0f7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -941,10 +941,6 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { execution.DeserializeToObjectExec(deserializer, objAttr, planLater(child)) :: Nil case logical.SerializeFromObject(serializer, child) => execution.SerializeFromObjectExec(serializer, planLater(child)) :: Nil - case logical.CarryOverRemoval(child, rowIdColumnNames, rowVersionColumnName, - dataColumnNames, _) => - execution.CarryOverRemovalExec( - planLater(child), rowIdColumnNames, rowVersionColumnName, dataColumnNames) :: Nil case logical.MapPartitions(f, objAttr, child) => execution.MapPartitionsExec(f, objAttr, planLater(child)) :: Nil case logical.MapPartitionsInR(f, p, b, is, os, objAttr, child) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala index b00ba46ac56ef..1191528254179 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala @@ -17,14 +17,14 @@ package org.apache.spark.sql.connector -import java.util +import java.util.Collections import org.apache.spark.sql.{AnalysisException, QueryTest} import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.ChangelogRange -import org.apache.spark.sql.connector.expressions.{FieldReference, NamedReference} +import org.apache.spark.sql.connector.expressions.{NamedReference, Transform} import org.apache.spark.sql.connector.read.ScanBuilder import org.apache.spark.sql.execution.datasources.v2.{ChangelogTable, DataSourceV2Relation} import org.apache.spark.sql.test.SharedSparkSession @@ -66,8 +66,8 @@ class ChangelogResolutionSuite extends QueryTest with SharedSparkSession { Array( Column.create("id", LongType), Column.create("data", StringType)), - Array.empty, - new util.HashMap[String, String]()) + Array.empty[Transform], + Collections.emptyMap[String, String]()) val noCdcCat = spark.sessionState.catalogManager.catalog(noCdcCatalogName).asTableCatalog val ident2 = Identifier.of(Array.empty, "test_table") @@ -79,8 +79,8 @@ class ChangelogResolutionSuite extends QueryTest with SharedSparkSession { Array( Column.create("id", LongType), Column.create("data", StringType)), - Array.empty, - new util.HashMap[String, String]()) + Array.empty[Transform], + Collections.emptyMap[String, String]()) } test("CHANGES clause resolves to DataSourceV2Relation with ChangelogTable") { @@ -299,43 +299,6 @@ class ChangelogResolutionSuite extends QueryTest with SharedSparkSession { validChangeType, validVersion, validTimestamp), stubInfo()) } - - test("ChangelogTable - post-processing flag with empty rowId throws") { - // Connector advertises containsCarryoverRows=true but returns no rowId(). Spark cannot - // post-process without row identity, so relation construction must fail fast. - val bad = new BrokenChangelog( - "no_rowid_cl", - Array( - Column.create("id", LongType), - Column.create("_change_type", StringType), - Column.create("_commit_version", LongType), - Column.create("_commit_timestamp", TimestampType))) { - override def containsCarryoverRows(): Boolean = true - override def rowId(): Array[NamedReference] = Array.empty - } - checkError( - intercept[AnalysisException] { ChangelogTable(bad, stubInfo()) }, - condition = "INVALID_CHANGELOG_SCHEMA.MISSING_ROW_ID", - parameters = Map("changelogName" -> "no_rowid_cl")) - } - - test("ChangelogTable - nested rowId (multi-segment NamedReference) throws") { - // Nested rowId paths (e.g. Seq("payload", "id")) are intentionally rejected for now. - // See the TODO in ResolveChangelogTable for the re-enable plan. - val nested = new BrokenChangelog( - "nested_cl", - Array( - Column.create("payload", LongType), - Column.create("_change_type", StringType), - Column.create("_commit_version", LongType), - Column.create("_commit_timestamp", TimestampType))) { - override def rowId(): Array[NamedReference] = Array(FieldReference(Seq("payload", "id"))) - } - checkError( - intercept[AnalysisException] { ChangelogTable(nested, stubInfo()) }, - condition = "INVALID_CHANGELOG_SCHEMA.NESTED_ROW_ID", - parameters = Map("changelogName" -> "nested_cl", "rowId" -> "payload.id")) - } } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala index c7c93c74f8ff5..9eb136940424b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.connector.catalog.{ import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{ - BinaryType, BooleanType, DoubleType, LongType, StringType} + BinaryType, BooleanType, DoubleType, LongType, StringType, StructField, StructType} import org.apache.spark.unsafe.types.UTF8String /** @@ -67,7 +67,8 @@ class ResolveChangelogTablePostProcessingSuite ident, Array( Column.create("id", LongType), - Column.create("name", StringType)), + Column.create("name", StringType), + Column.create("row_commit_version", LongType)), Array.empty[Transform], Collections.emptyMap[String, String]()) } @@ -82,17 +83,25 @@ class ResolveChangelogTablePostProcessingSuite /** * Helper to create a change row matching schema - * (id, name, _change_type, _commit_version, _commit_timestamp). + * (id, name, row_commit_version, _change_type, _commit_version, _commit_timestamp). + * + * `rowCommitVersion` follows Delta row-tracking semantics: carry-over pairs (CoW-rewritten + * unchanged rows) share the same value on both sides; real updates carry the OLD value on + * the delete side and the NEW value on the insert side. Defaults to `commitVersion` for + * tests that don't exercise carry-over removal. */ private def changeRow( id: Long, name: String, changeType: String, commitVersion: Long, + rowCommitVersion: Long = -1L, commitTimestamp: Long = 0L): InternalRow = { + val rcv = if (rowCommitVersion == -1L) commitVersion else rowCommitVersion InternalRow( id, UTF8String.fromString(name), + rcv, UTF8String.fromString(changeType), commitVersion, commitTimestamp) @@ -106,16 +115,17 @@ class ResolveChangelogTablePostProcessingSuite catalog.setChangelogProperties(ident, ChangelogProperties( containsCarryoverRows = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) - // v1: insert Alice and Bob - // v2: real delete Alice, carry-over for Bob (delete + insert with identical data) + // v1: insert Alice and Bob (rcv=1 each) + // v2: real delete Alice (preimage carries old rcv=1); + // carry-over for Bob (CoW, rcv unchanged on both sides) catalog.addChangeRows(ident, Seq( - changeRow(1L, "Alice", "insert", 1L), - changeRow(2L, "Bob", "insert", 1L), - changeRow(1L, "Alice", "delete", 2L), - changeRow(2L, "Bob", "delete", 2L), // carry-over - changeRow(2L, "Bob", "insert", 2L))) // carry-over + changeRow(1L, "Alice", "insert", 1L, rowCommitVersion = 1L), + changeRow(2L, "Bob", "insert", 1L, rowCommitVersion = 1L), + changeRow(1L, "Alice", "delete", 2L, rowCommitVersion = 1L), + changeRow(2L, "Bob", "delete", 2L, rowCommitVersion = 1L), // carry-over + changeRow(2L, "Bob", "insert", 2L, rowCommitVersion = 1L))) // carry-over (same rcv) val rows = sql( s"SELECT id, name, _change_type, _commit_version " + @@ -142,12 +152,12 @@ class ResolveChangelogTablePostProcessingSuite catalog.setChangelogProperties(ident, ChangelogProperties( containsCarryoverRows = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) catalog.addChangeRows(ident, Seq( - changeRow(1L, "Alice", "insert", 1L), - changeRow(2L, "Bob", "delete", 2L), - changeRow(2L, "Bob", "insert", 2L))) + changeRow(1L, "Alice", "insert", 1L, rowCommitVersion = 1L), + changeRow(2L, "Bob", "delete", 2L, rowCommitVersion = 1L), + changeRow(2L, "Bob", "insert", 2L, rowCommitVersion = 1L))) val rows = sql( s"SELECT id FROM $catalogName.$testTableName " + @@ -166,7 +176,7 @@ class ResolveChangelogTablePostProcessingSuite containsCarryoverRows = false, // no carry-overs in this test representsUpdateAsDeleteAndInsert = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) catalog.addChangeRows(ident, Seq( changeRow(1L, "Alice", "insert", 1L), @@ -196,7 +206,7 @@ class ResolveChangelogTablePostProcessingSuite catalog.setChangelogProperties(ident, ChangelogProperties( representsUpdateAsDeleteAndInsert = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) catalog.addChangeRows(ident, Seq( changeRow(1L, "Alice", "insert", 1L), @@ -244,17 +254,17 @@ class ResolveChangelogTablePostProcessingSuite containsCarryoverRows = true, representsUpdateAsDeleteAndInsert = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) - // v1: insert Alice, Bob - // v2: Alice carry-over (CoW), Bob real update (Bob -> Robert) + // v1: insert Alice (rcv=1), Bob (rcv=1) + // v2: Alice carry-over (CoW, rcv unchanged), Bob real update (old rcv=1, new rcv=2) catalog.addChangeRows(ident, Seq( - changeRow(1L, "Alice", "insert", 1L), - changeRow(2L, "Bob", "insert", 1L), - changeRow(1L, "Alice", "delete", 2L), // carry-over - changeRow(1L, "Alice", "insert", 2L), // carry-over - changeRow(2L, "Bob", "delete", 2L), // update preimage - changeRow(2L, "Robert", "insert", 2L))) // update postimage + changeRow(1L, "Alice", "insert", 1L, rowCommitVersion = 1L), + changeRow(2L, "Bob", "insert", 1L, rowCommitVersion = 1L), + changeRow(1L, "Alice", "delete", 2L, rowCommitVersion = 1L), // carry-over + changeRow(1L, "Alice", "insert", 2L, rowCommitVersion = 1L), // carry-over + changeRow(2L, "Bob", "delete", 2L, rowCommitVersion = 1L), // update preimage (old rcv) + changeRow(2L, "Robert", "insert", 2L, rowCommitVersion = 2L))) // update postimage (new rcv) val rows = sql( s"SELECT id, name, _change_type FROM $catalogName.$testTableName " + @@ -286,15 +296,17 @@ class ResolveChangelogTablePostProcessingSuite containsCarryoverRows = true, representsUpdateAsDeleteAndInsert = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) catalog.addChangeRows(ident, Seq( - changeRow(1L, "Alice", "insert", 1L), - changeRow(2L, "Bob", "insert", 1L), - changeRow(1L, "Alice", "delete", 2L), // carry-over (CoW) - changeRow(1L, "Alice", "insert", 2L), // carry-over (CoW) - changeRow(2L, "Bob", "delete", 2L), // real change pre - changeRow(2L, "Robert", "insert", 2L))) // real change post + changeRow(1L, "Alice", "insert", 1L, rowCommitVersion = 1L), + changeRow(2L, "Bob", "insert", 1L, rowCommitVersion = 1L), + // Alice: carry-over (CoW, rcv unchanged on both sides) + changeRow(1L, "Alice", "delete", 2L, rowCommitVersion = 1L), + changeRow(1L, "Alice", "insert", 2L, rowCommitVersion = 1L), + // Bob -> Robert: real change (old rcv on pre, new rcv on post) + changeRow(2L, "Bob", "delete", 2L, rowCommitVersion = 1L), + changeRow(2L, "Robert", "insert", 2L, rowCommitVersion = 2L))) // Default computeUpdates=false: do NOT relabel, but DO drop carry-overs val rows = sql( @@ -316,7 +328,7 @@ class ResolveChangelogTablePostProcessingSuite catalog.setChangelogProperties(ident, ChangelogProperties( representsUpdateAsDeleteAndInsert = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) catalog.addChangeRows(ident, Seq( changeRow(1L, "Alice", "insert", 1L), @@ -332,34 +344,6 @@ class ResolveChangelogTablePostProcessingSuite s"Pure inserts must stay 'insert'. Got: ${rows.map(_.getString(1)).mkString(",")}") } - // Pins the CASE-WHEN default branch in injectUpdateDetection: when a (rowId, rowVersion) - // partition has _del_cnt=1 but _ins_cnt=0, isUpdate is false and _change_type stays 'delete'. - test("update detection keeps delete as-is when partition has no paired insert") { - catalog.setChangelogProperties(ident, ChangelogProperties( - representsUpdateAsDeleteAndInsert = true, - rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) - - catalog.addChangeRows(ident, Seq( - changeRow(1L, "Alice", "insert", 1L), - changeRow(1L, "Alice", "delete", 2L))) // unpaired delete at v2 - - val rows = sql( - s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " + - s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')") - .collect() - - val descs = rows.map(r => - s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}") - - assert(descs.contains("1:Alice:insert:v1")) - assert(descs.contains("1:Alice:delete:v2"), - s"Unpaired delete must stay 'delete', not become update_preimage. " + - s"Got: ${descs.mkString(",")}") - assert(!descs.exists(_.contains("update_")), - "No update_* labels when no insert partner exists in the partition") - } - // =========================================================================== // Range edge cases // =========================================================================== @@ -385,26 +369,27 @@ class ResolveChangelogTablePostProcessingSuite containsCarryoverRows = true, representsUpdateAsDeleteAndInsert = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) catalog.addChangeRows(ident, Seq( - // v1: insert 3 rows - changeRow(1L, "Alice", "insert", 1L), - changeRow(2L, "Bob", "insert", 1L), - changeRow(3L, "Charlie", "insert", 1L), - // v2: delete Alice; CoW carry-overs for Bob/Charlie - changeRow(1L, "Alice", "delete", 2L), - changeRow(2L, "Bob", "delete", 2L), - changeRow(2L, "Bob", "insert", 2L), - changeRow(3L, "Charlie", "delete", 2L), - changeRow(3L, "Charlie", "insert", 2L), - // v3: update Bob -> Robert; CoW carry-over for Charlie - changeRow(2L, "Bob", "delete", 3L), - changeRow(2L, "Robert", "insert", 3L), - changeRow(3L, "Charlie", "delete", 3L), - changeRow(3L, "Charlie", "insert", 3L), - // v4: insert Diana - changeRow(4L, "Diana", "insert", 4L))) + // v1: insert 3 rows (rcv=1 each) + changeRow(1L, "Alice", "insert", 1L, rowCommitVersion = 1L), + changeRow(2L, "Bob", "insert", 1L, rowCommitVersion = 1L), + changeRow(3L, "Charlie", "insert", 1L, rowCommitVersion = 1L), + // v2: delete Alice (preimage carries old rcv=1); CoW carry-overs for Bob/Charlie + // keep rcv=1 on both sides (row unchanged). + changeRow(1L, "Alice", "delete", 2L, rowCommitVersion = 1L), + changeRow(2L, "Bob", "delete", 2L, rowCommitVersion = 1L), + changeRow(2L, "Bob", "insert", 2L, rowCommitVersion = 1L), + changeRow(3L, "Charlie", "delete", 2L, rowCommitVersion = 1L), + changeRow(3L, "Charlie", "insert", 2L, rowCommitVersion = 1L), + // v3: update Bob -> Robert (old rcv=1, new rcv=3); CoW carry-over for Charlie (rcv=1) + changeRow(2L, "Bob", "delete", 3L, rowCommitVersion = 1L), + changeRow(2L, "Robert", "insert", 3L, rowCommitVersion = 3L), + changeRow(3L, "Charlie", "delete", 3L, rowCommitVersion = 1L), + changeRow(3L, "Charlie", "insert", 3L, rowCommitVersion = 1L), + // v4: insert Diana (rcv=4) + changeRow(4L, "Diana", "insert", 4L, rowCommitVersion = 4L))) val rows = sql( s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " + @@ -431,6 +416,19 @@ class ResolveChangelogTablePostProcessingSuite assert(descs.contains("4:Diana:insert:v4")) } + test("larger insert batch returns all rows") { + catalog.addChangeRows(ident, (1 to 5).map(i => + changeRow(i.toLong, ('A' + i - 1).toChar.toString, "insert", 1L))) + + val rows = sql( + s"SELECT id, _change_type FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 1 WITH (deduplicationMode = 'none')") + .collect() + + assert(rows.length == 5) + assert(rows.forall(_.getString(1) == "insert")) + } + test("EXCLUSIVE start bound skips the start version") { catalog.addChangeRows(ident, Seq( changeRow(1L, "Alice", "insert", 1L), @@ -469,13 +467,14 @@ class ResolveChangelogTablePostProcessingSuite catalog.setChangelogProperties(ident, ChangelogProperties( containsCarryoverRows = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) + // v1 inserts carry rcv=1; v2 deletes carry the old rcv=1 (rcv tracks last modification) catalog.addChangeRows(ident, Seq( - changeRow(1L, "Alice", "insert", 1L), - changeRow(2L, "Bob", "insert", 1L), - changeRow(1L, "Alice", "delete", 2L), - changeRow(2L, "Bob", "delete", 2L))) + changeRow(1L, "Alice", "insert", 1L, rowCommitVersion = 1L), + changeRow(2L, "Bob", "insert", 1L, rowCommitVersion = 1L), + changeRow(1L, "Alice", "delete", 2L, rowCommitVersion = 1L), + changeRow(2L, "Bob", "delete", 2L, rowCommitVersion = 1L))) val rows = sql( s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " + @@ -498,15 +497,16 @@ class ResolveChangelogTablePostProcessingSuite containsCarryoverRows = true, representsUpdateAsDeleteAndInsert = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) + // Every v2 row is a real update: delete side carries old rcv=1, insert side new rcv=2. catalog.addChangeRows(ident, Seq( - changeRow(1L, "Alice", "insert", 1L), - changeRow(2L, "Bob", "insert", 1L), - changeRow(1L, "Alice", "delete", 2L), - changeRow(1L, "Alice_updated", "insert", 2L), - changeRow(2L, "Bob", "delete", 2L), - changeRow(2L, "Bob_updated", "insert", 2L))) + changeRow(1L, "Alice", "insert", 1L, rowCommitVersion = 1L), + changeRow(2L, "Bob", "insert", 1L, rowCommitVersion = 1L), + changeRow(1L, "Alice", "delete", 2L, rowCommitVersion = 1L), + changeRow(1L, "Alice_updated", "insert", 2L, rowCommitVersion = 2L), + changeRow(2L, "Bob", "delete", 2L, rowCommitVersion = 1L), + changeRow(2L, "Bob_updated", "insert", 2L, rowCommitVersion = 2L))) val rows = sql( s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " + @@ -524,22 +524,38 @@ class ResolveChangelogTablePostProcessingSuite assert(rows.length == 6, s"Expected 2 inserts + 2 pre + 2 post. Got ${rows.length}") } + test("append-only workload: all inserts, no carry-over needed") { + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + changeRow(2L, "Bob", "insert", 2L), + changeRow(3L, "Charlie", "insert", 3L))) + + val rows = sql( + s"SELECT id, _change_type FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 3") + .collect() + + assert(rows.length == 3) + assert(rows.forall(_.getString(1) == "insert")) + } + test("carry-over removal with many rows: only real change remains") { catalog.setChangelogProperties(ident, ChangelogProperties( containsCarryoverRows = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) - // 10 inserts at v1, then at v2: delete row 5; CoW writes 9 carry-over pairs + 1 real delete + // 10 inserts at v1 (rcv=1 each). At v2: delete row 5; CoW writes 9 carry-over pairs + // (rcv unchanged since v1, i.e. rcv=1 on both sides) plus 1 real delete (rcv=1, old). val v1Inserts = (1 to 10).map(i => - changeRow(i.toLong, ('A' + i - 1).toChar.toString, "insert", 1L)) + changeRow(i.toLong, ('A' + i - 1).toChar.toString, "insert", 1L, rowCommitVersion = 1L)) val v2Carryovers = (1 to 10).filter(_ != 5).flatMap { i => val name = ('A' + i - 1).toChar.toString Seq( - changeRow(i.toLong, name, "delete", 2L), - changeRow(i.toLong, name, "insert", 2L)) + changeRow(i.toLong, name, "delete", 2L, rowCommitVersion = 1L), + changeRow(i.toLong, name, "insert", 2L, rowCommitVersion = 1L)) } - val v2RealDelete = Seq(changeRow(5L, "E", "delete", 2L)) + val v2RealDelete = Seq(changeRow(5L, "E", "delete", 2L, rowCommitVersion = 1L)) catalog.addChangeRows(ident, v1Inserts ++ v2Carryovers ++ v2RealDelete) val rows = sql( @@ -566,31 +582,32 @@ class ResolveChangelogTablePostProcessingSuite Column.create("id", LongType), Column.create("name", StringType), Column.create("score", DoubleType), - Column.create("active", BooleanType)), + Column.create("active", BooleanType), + Column.create("row_commit_version", LongType)), Array.empty[Transform], Collections.emptyMap[String, String]()) cat.setChangelogProperties(mixedIdent, ChangelogProperties( containsCarryoverRows = true, representsUpdateAsDeleteAndInsert = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) def mixedRow( id: Long, name: String, score: Double, active: Boolean, - ct: String, v: Long): InternalRow = { + ct: String, v: Long, rcv: Long): InternalRow = { InternalRow( - id, UTF8String.fromString(name), score, active, + id, UTF8String.fromString(name), score, active, rcv, UTF8String.fromString(ct), v, 0L) } cat.addChangeRows(mixedIdent, Seq( - mixedRow(1L, "Alice", 95.5, true, "insert", 1L), - mixedRow(2L, "Bob", 87.3, false, "insert", 1L), - // v2: update Alice's score; Bob is carry-over - mixedRow(1L, "Alice", 95.5, true, "delete", 2L), - mixedRow(1L, "Alice", 99.0, true, "insert", 2L), - mixedRow(2L, "Bob", 87.3, false, "delete", 2L), // carry-over - mixedRow(2L, "Bob", 87.3, false, "insert", 2L))) // carry-over + mixedRow(1L, "Alice", 95.5, true, "insert", 1L, rcv = 1L), + mixedRow(2L, "Bob", 87.3, false, "insert", 1L, rcv = 1L), + // v2: update Alice's score (old rcv=1, new rcv=2); Bob is carry-over (rcv unchanged) + mixedRow(1L, "Alice", 95.5, true, "delete", 2L, rcv = 1L), + mixedRow(1L, "Alice", 99.0, true, "insert", 2L, rcv = 2L), + mixedRow(2L, "Bob", 87.3, false, "delete", 2L, rcv = 1L), // carry-over + mixedRow(2L, "Bob", 87.3, false, "insert", 2L, rcv = 1L))) // carry-over val rows = sql( s"SELECT id, name, score, active, _change_type FROM $catalogName.$mixedTable " + @@ -612,80 +629,67 @@ class ResolveChangelogTablePostProcessingSuite } // =========================================================================== - // Iterator correctness: state-machine paths + null handling in dataColumnsEqual + // Regression: nested rowId + nested rowVersion end-to-end // =========================================================================== - // Pins the "pendingDelete always null, input is all inserts" path of CarryOverIterator. - // Even with containsCarryoverRows=true, a stream of only inserts must pass through - // unchanged; the iterator should never enter the delete/insert pairing branch. - test("carry-over iterator passes append-only stream through unchanged") { - catalog.setChangelogProperties(ident, ChangelogProperties( - containsCarryoverRows = true, - rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) - - catalog.addChangeRows(ident, Seq( - changeRow(1L, "Alice", "insert", 1L), - changeRow(2L, "Bob", "insert", 1L), - changeRow(3L, "Charlie", "insert", 1L))) + // rowId is payload.id (nested); rowVersion is also row-level. A delete+insert pair with + // the same payload.id but different row_commit_version is a real update and must survive. + // A pair with matching row_commit_version would be a CoW carry-over and would be dropped. + test("nested rowId must not hide sibling-field changes") { + val nestedTable = "events_nested" + val nestedIdent = Identifier.of(Array.empty, nestedTable) + val cat = catalog + if (cat.tableExists(nestedIdent)) cat.dropTable(nestedIdent) + cat.clearChangeRows(nestedIdent) - val rows = sql( - s"SELECT id, name, _change_type FROM $catalogName.$testTableName " + - s"CHANGES FROM VERSION 1 TO VERSION 1") - .orderBy("id") - .collect() + val payloadType = StructType(Seq( + StructField("id", LongType), + StructField("value", StringType))) - assert(rows.length == 3, s"All 3 inserts must survive, got ${rows.length}") - assert(rows.forall(_.getString(2) == "insert"), - s"Every row must stay 'insert'. Got: ${rows.map(_.getString(2)).mkString(",")}") - } + cat.createTable( + nestedIdent, + Array( + Column.create("payload", payloadType), + Column.create("row_commit_version", LongType)), + Array.empty[Transform], + Collections.emptyMap[String, String]()) - // Pins null handling in CarryOverIterator.dataColumnsEqual: - // - both-null -> equal -> carry-over dropped - // - one-null -> not equal -> both rows survive - test("carry-over removal handles null data values correctly") { - catalog.setChangelogProperties(ident, ChangelogProperties( + cat.setChangelogProperties(nestedIdent, ChangelogProperties( containsCarryoverRows = true, - rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowIdPaths = Seq(Seq("payload", "id")), + rowVersionName = Some("row_commit_version"))) - // id=1: data null throughout (both-null carry-over, must be dropped) - // id=2: real change, "Bob" -> null (one-null, must survive as both rows) - catalog.addChangeRows(ident, Seq( - changeRow(1L, null, "insert", 1L), - changeRow(2L, "Bob", "insert", 1L), - changeRow(1L, null, "delete", 2L), // both-null carry-over delete - changeRow(1L, null, "insert", 2L), // both-null carry-over insert (drop pair) - changeRow(2L, "Bob", "delete", 2L), // one-null vs value: real change pre - changeRow(2L, null, "insert", 2L))) // one-null vs value: real change post + def nestedRow(id: Long, value: String, ct: String, v: Long, rcv: Long): InternalRow = { + InternalRow( + InternalRow(id, UTF8String.fromString(value)), + rcv, + UTF8String.fromString(ct), v, 0L) + } + + cat.addChangeRows(nestedIdent, Seq( + nestedRow(1L, "original", "insert", 1L, rcv = 1L), + // v2 update: rowId same, rowVersion differs (old rcv=1 on preimage, new rcv=2 on postimage) + nestedRow(1L, "original", "delete", 2L, rcv = 1L), + nestedRow(1L, "CHANGED", "insert", 2L, rcv = 2L))) val rows = sql( - s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " + - s"CHANGES FROM VERSION 1 TO VERSION 2") - .orderBy("_commit_version", "id", "_change_type") + s"SELECT payload.id AS id, payload.value AS value, _change_type, _commit_version " + + s"FROM $catalogName.$nestedTable CHANGES FROM VERSION 1 TO VERSION 2") + .orderBy("_commit_version", "_change_type") .collect() - // Use a string that makes null visible in the description - val descs = rows.map { r => - val nameStr = if (r.isNullAt(1)) "" else r.getString(1) - s"${r.getLong(0)}:$nameStr:${r.getString(2)}:v${r.getLong(3)}" - } + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}") - // v1 inserts preserved - assert(descs.contains("1::insert:v1"), s"Got: ${descs.mkString(",")}") - assert(descs.contains("2:Bob:insert:v1")) - // id=1 both-null pair dropped as carry-over - assert(!descs.contains("1::delete:v2"), - s"both-null pair must be dropped as carry-over. Got: ${descs.mkString(",")}") - assert(!descs.contains("1::insert:v2"), - s"both-null pair must be dropped as carry-over. Got: ${descs.mkString(",")}") - // id=2 value-vs-null real change survives - assert(descs.contains("2:Bob:delete:v2"), - s"value-vs-null pair must survive. Got: ${descs.mkString(",")}") - assert(descs.contains("2::insert:v2"), - s"value-vs-null pair must survive. Got: ${descs.mkString(",")}") - assert(rows.length == 4, - s"Expected 4 rows (2 v1 inserts + 2 id=2 v2 real change). Got ${rows.length}") + assert(descs.contains("1:original:insert:v1"), + s"v1 insert must survive. Got: ${descs.mkString(",")}") + assert(descs.contains("1:original:delete:v2"), + s"v2 delete must survive (payload.value differs from insert). Got: ${descs.mkString(",")}") + assert(descs.contains("1:CHANGED:insert:v2"), + s"v2 insert must survive (payload.value differs from delete). Got: ${descs.mkString(",")}") + assert(rows.length == 3, + s"Expected 3 rows (v1 insert + v2 delete + v2 insert). Got ${rows.length}: " + + descs.mkString(",")) } // =========================================================================== @@ -706,30 +710,35 @@ class ResolveChangelogTablePostProcessingSuite binIdent, Array( Column.create("id", LongType), - Column.create("payload", BinaryType)), + Column.create("payload", BinaryType), + Column.create("row_commit_version", LongType)), Array.empty[Transform], Collections.emptyMap[String, String]()) cat.setChangelogProperties(binIdent, ChangelogProperties( containsCarryoverRows = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) - def binRow(id: Long, payload: Array[Byte], ct: String, v: Long): InternalRow = { - InternalRow(id, payload, UTF8String.fromString(ct), v, 0L) + def binRow(id: Long, payload: Array[Byte], ct: String, v: Long, rcv: Long): InternalRow = { + InternalRow(id, payload, rcv, UTF8String.fromString(ct), v, 0L) } - // Bob's carry-over: two distinct byte[] instances holding identical bytes. + // Bob's carry-over: two distinct byte[] instances; but partition-based carry-over + // detection looks only at rowVersion, not byte content, so the distinct-instance + // concern is moot under the new design. Kept as a regression guard for schemas + // containing BinaryType columns. val bobPayloadA = Array[Byte](1, 2, 3, 4) val bobPayloadB = Array[Byte](1, 2, 3, 4) assert(bobPayloadA ne bobPayloadB, "test precondition: distinct array instances") cat.addChangeRows(binIdent, Seq( - binRow(1L, Array[Byte](9, 9), "insert", 1L), - binRow(2L, bobPayloadA, "insert", 1L), - // v2: real delete for Alice; Bob carry-over with distinct-but-equal byte arrays - binRow(1L, Array[Byte](9, 9), "delete", 2L), - binRow(2L, bobPayloadA, "delete", 2L), // carry-over (same instance as v1 insert) - binRow(2L, bobPayloadB, "insert", 2L))) // carry-over (distinct instance, equal bytes) + binRow(1L, Array[Byte](9, 9), "insert", 1L, rcv = 1L), + binRow(2L, bobPayloadA, "insert", 1L, rcv = 1L), + // v2: real delete for Alice (preimage carries old rcv=1); + // Bob carry-over (rcv unchanged=1 on both sides) + binRow(1L, Array[Byte](9, 9), "delete", 2L, rcv = 1L), + binRow(2L, bobPayloadA, "delete", 2L, rcv = 1L), // carry-over + binRow(2L, bobPayloadB, "insert", 2L, rcv = 1L))) // carry-over (same rcv) val rows = sql( s"SELECT id, _change_type, _commit_version FROM $catalogName.$binTable " + @@ -752,25 +761,26 @@ class ResolveChangelogTablePostProcessingSuite } // =========================================================================== - // Behavior divergence from getChangeRows: no-op UPDATE + // No-op UPDATE is correctly preserved as update_preimage/postimage // =========================================================================== - test("no-op UPDATE (unchanged data) is dropped as carry-over") { - // Connector-agnostic carry-over removal compares data columns. A no-op - // UPDATE produces byte-identical delete+insert, indistinguishable from a CoW - // carry-over, and is therefore dropped. Legacy getChangeRows would have emitted - // update_preimage/update_postimage here. + test("no-op UPDATE is labeled as update (row_commit_version differs on pre/post)") { + // Under the partition-based design (SPIP B.6 updated), a no-op UPDATE is no longer + // silently dropped. Delta bumps row_commit_version on every UPDATE even when data is + // byte-identical, so the delete side carries the OLD rcv and the insert side the NEW + // rcv. Carry-over removal sees different rowVersions -> real change -> both survive + // and get labeled as update_preimage / update_postimage by update detection. catalog.setChangelogProperties(ident, ChangelogProperties( containsCarryoverRows = true, representsUpdateAsDeleteAndInsert = true, rowIdNames = Seq("id"), - rowVersionName = Some("_commit_version"))) + rowVersionName = Some("row_commit_version"))) catalog.addChangeRows(ident, Seq( - changeRow(1L, "Alice", "insert", 1L), - // v2 no-op update: identical delete + insert - changeRow(1L, "Alice", "delete", 2L), - changeRow(1L, "Alice", "insert", 2L))) + changeRow(1L, "Alice", "insert", 1L, rowCommitVersion = 1L), + // v2 no-op update: identical data, but rcv differs (Delta bumps it on any UPDATE) + changeRow(1L, "Alice", "delete", 2L, rowCommitVersion = 1L), + changeRow(1L, "Alice", "insert", 2L, rowCommitVersion = 2L))) val rows = sql( s"SELECT id, name, _change_type, _commit_version FROM $catalogName.$testTableName " + @@ -782,10 +792,11 @@ class ResolveChangelogTablePostProcessingSuite s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}:v${r.getLong(3)}") assert(descs.contains("1:Alice:insert:v1")) - assert(!descs.exists(_.contains("update_")), - "No-op UPDATE indistinguishable from carry-over, so it is dropped") - assert(!descs.contains("1:Alice:delete:v2")) - assert(!descs.contains("1:Alice:insert:v2")) - assert(rows.length == 1, "Only v1 insert remains; v2 no-op UPDATE silently dropped") + assert(descs.contains("1:Alice:update_preimage:v2"), + s"No-op UPDATE preimage must be labeled. Got: ${descs.mkString(",")}") + assert(descs.contains("1:Alice:update_postimage:v2"), + s"No-op UPDATE postimage must be labeled. Got: ${descs.mkString(",")}") + assert(rows.length == 3, + s"Expected v1 insert + v2 update pre/post = 3 rows. Got ${rows.length}") } } From fbe03c36dd41fbc47471f641f09365404ceeec13 Mon Sep 17 00:00:00 2001 From: Sandro Speh Date: Wed, 22 Apr 2026 16:03:07 +0000 Subject: [PATCH 4/5] First PR comment batch --- .../analysis/ResolveChangelogTable.scala | 158 +++++++------ .../sql/errors/QueryCompilationErrors.scala | 34 +++ .../datasources/v2/ChangelogTable.scala | 36 +++ .../connector/ChangelogResolutionSuite.scala | 57 ++++- ...lveChangelogTablePostProcessingSuite.scala | 218 +++++++++++++++++- 5 files changed, 410 insertions(+), 93 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala index 9cc60b5578aa7..0df9ccec393e6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala @@ -23,8 +23,9 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.connector.catalog.{Changelog, ChangelogInfo} +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.v2.{ChangelogTable, DataSourceV2Relation} -import org.apache.spark.sql.types.IntegerType +import org.apache.spark.sql.types.{IntegerType, StringType} /** * Post-processes a resolved [[DataSourceV2Relation]] backed by a [[ChangelogTable]] to inject @@ -45,6 +46,15 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { private val CHANGELOG_TRANSFORMED_TAG = TreeNodeTag[Boolean]("changelog_transformed") + private object HelperColumn { + final val DelCnt = "_del_cnt" + final val InsCnt = "_ins_cnt" + final val MinRv = "_min_rv" + final val MaxRv = "_max_rv" + + val all: Set[String] = Set(DelCnt, InsCnt, MinRv, MaxRv) + } + override def apply(plan: LogicalPlan): LogicalPlan = { if (isAlreadyTransformed(plan)) return plan var updatedPlan = plan @@ -57,6 +67,11 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { val changelog = table.changelog val options = table.changelogInfo + // Net change computation is not yet implemented. + if (options.deduplicationMode() == ChangelogInfo.DeduplicationMode.NET_CHANGES) { + throw QueryCompilationErrors.cdcNetChangesNotYetSupported(changelog.name()) + } + val requiresCarryOverRemoval = options.deduplicationMode() != ChangelogInfo.DeduplicationMode.NONE && changelog.containsCarryoverRows() @@ -66,6 +81,15 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { options.deduplicationMode() == ChangelogInfo.DeduplicationMode.NET_CHANGES && changelog.containsIntermediateChanges() + // If carry-overs are surfaced and update detection is enabled, carry-overs will + // falsely be classified as updates, leading to false results. Hence we throw. + if (requiresUpdateDetection && + changelog.containsCarryoverRows() && + options.deduplicationMode() == ChangelogInfo.DeduplicationMode.NONE) { + throw QueryCompilationErrors.cdcUpdateDetectionRequiresCarryOverRemoval( + changelog.name()) + } + var updatedRel: LogicalPlan = rel if (requiresCarryOverRemoval || requiresUpdateDetection) { updatedRel = addRowLevelPostProcessing( @@ -92,74 +116,37 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { * - both active -> Window(counts + rv bounds) -> Filter -> Project(relabel) -> Drop helpers * - carry-over only -> Window(counts + rv bounds) -> Filter -> Drop helpers * - update only -> Window(counts only) -> Project(relabel) -> Drop helpers - * - neither -> plan is returned unchanged (caller guards this case) + * - neither -> not invoked (caller guards this case) */ private def addRowLevelPostProcessing( plan: LogicalPlan, cl: Changelog, requiresCarryOverRemoval: Boolean, requiresUpdateDetection: Boolean): LogicalPlan = { - if (requiresCarryOverRemoval && requiresUpdateDetection) { - val planWithWindow = addWindowWithInsertAndDeleteCountsAndRowVersionBounds(plan, cl) - val planWithFilter = addCarryOverPairFilter(planWithWindow) - val planWithProjection = addUpdateRelabelProjection(planWithFilter) - removeHelperColumns(planWithProjection) - } else if (requiresCarryOverRemoval) { - val planWithWindow = addWindowWithInsertAndDeleteCountsAndRowVersionBounds(plan, cl) - val planWithFilter = addCarryOverPairFilter(planWithWindow) - removeHelperColumns(planWithFilter) - } else if (requiresUpdateDetection) { - val planWithWindow = addWindowWithInsertAndDeleteCountsOnly(plan, cl) - val planWithProjection = addUpdateRelabelProjection(planWithWindow) - removeHelperColumns(planWithProjection) - } else { - plan - } - } - - /** - * Adds a Window node partitioned by (rowId, _commit_version) that computes - * `_del_cnt` (number of `delete` rows in the partition) and `_ins_cnt` (number of - * `insert` rows in the partition). These counts drive update detection: a partition - * with `_del_cnt >= 1 AND _ins_cnt >= 1` is relabeled as update_preimage / - * update_postimage. - */ - private def addWindowWithInsertAndDeleteCountsOnly( - plan: LogicalPlan, cl: Changelog): LogicalPlan = { - val changeTypeAttr = getAttribute(plan, "_change_type") - val rowIdExprs = V2ExpressionUtils.resolveRefs[NamedExpression](cl.rowId().toSeq, plan) - val commitVersionAttr = getAttribute(plan, "_commit_version") - val partitionByCols = rowIdExprs ++ Seq(commitVersionAttr) - val windowSpec = WindowSpecDefinition(partitionByCols, Nil, UnspecifiedFrame) - - val insertIf = If(EqualTo(changeTypeAttr, Literal("insert")), - Literal(1), Literal(null, IntegerType)) - val deleteIf = If(EqualTo(changeTypeAttr, Literal("delete")), - Literal(1), Literal(null, IntegerType)) - - val insCntAlias = Alias(WindowExpression( - Count(Seq(insertIf)).toAggregateExpression(), windowSpec), "_ins_cnt")() - val delCntAlias = Alias(WindowExpression( - Count(Seq(deleteIf)).toAggregateExpression(), windowSpec), "_del_cnt")() - - Window(Seq(delCntAlias, insCntAlias), partitionByCols, Nil, plan) + // Row-version bounds in the Window are needed iff we filter carry-over pairs. + var modifiedPlan = addPostProcessingWindow(plan, cl, + includeRowVersionBounds = requiresCarryOverRemoval) + if (requiresCarryOverRemoval) modifiedPlan = addCarryOverPairFilter(modifiedPlan) + if (requiresUpdateDetection) modifiedPlan = addUpdateRelabelProjection(modifiedPlan) + removeHelperColumns(modifiedPlan) } /** * Adds a Window node partitioned by (rowId, _commit_version) that computes - * `_del_cnt` (number of `delete` rows in the partition), `_ins_cnt` (number of `insert` - * rows in the partition), `_min_rv` (minimum of the connector's `rowVersion()` - * expression in the partition) and `_max_rv` (maximum of `rowVersion()` in the - * partition). Used whenever carry-over detection is needed: within a delete+insert - * pair, `_min_rv == _max_rv` signals that both sides share the same rowVersion and - * the pair is a CoW carry-over. + * `_del_cnt` and `_ins_cnt` per partition, and, when `includeRowVersionBounds` + * is true, additionally `_min_rv` / `_max_rv` (min/max of `Changelog.rowVersion()`). + * + * `_del_cnt` / `_ins_cnt` drive update detection (1 each -> relabel as + * update_preimage / update_postimage). `_min_rv` / `_max_rv` drive carry-over + * detection (within a delete+insert pair, equal bounds signal a CoW carry-over). */ - private def addWindowWithInsertAndDeleteCountsAndRowVersionBounds( - plan: LogicalPlan, cl: Changelog): LogicalPlan = { + private def addPostProcessingWindow( + plan: LogicalPlan, + cl: Changelog, + includeRowVersionBounds: Boolean): LogicalPlan = { val changeTypeAttr = getAttribute(plan, "_change_type") val rowIdExprs = V2ExpressionUtils.resolveRefs[NamedExpression](cl.rowId().toSeq, plan) val commitVersionAttr = getAttribute(plan, "_commit_version") - val rowVersionExpr = V2ExpressionUtils.resolveRef[NamedExpression](cl.rowVersion(), plan) val partitionByCols = rowIdExprs ++ Seq(commitVersionAttr) val windowSpec = WindowSpecDefinition(partitionByCols, Nil, UnspecifiedFrame) @@ -169,16 +156,22 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { Literal(1), Literal(null, IntegerType)) val insCntAlias = Alias(WindowExpression( - Count(Seq(insertIf)).toAggregateExpression(), windowSpec), "_ins_cnt")() + Count(Seq(insertIf)).toAggregateExpression(), windowSpec), HelperColumn.InsCnt)() val delCntAlias = Alias(WindowExpression( - Count(Seq(deleteIf)).toAggregateExpression(), windowSpec), "_del_cnt")() - val minRvAlias = Alias(WindowExpression( - Min(rowVersionExpr).toAggregateExpression(), windowSpec), "_min_rv")() - val maxRvAlias = Alias(WindowExpression( - Max(rowVersionExpr).toAggregateExpression(), windowSpec), "_max_rv")() - - Window(Seq(delCntAlias, insCntAlias, minRvAlias, maxRvAlias), - partitionByCols, Nil, plan) + Count(Seq(deleteIf)).toAggregateExpression(), windowSpec), HelperColumn.DelCnt)() + val baseAliases = Seq(delCntAlias, insCntAlias) + val rowVersionAliases = if (includeRowVersionBounds) { + val rowVersionExpr = + V2ExpressionUtils.resolveRef[NamedExpression](cl.rowVersion(), plan) + Seq( + Alias(WindowExpression( + Min(rowVersionExpr).toAggregateExpression(), windowSpec), HelperColumn.MinRv)(), + Alias(WindowExpression( + Max(rowVersionExpr).toAggregateExpression(), windowSpec), HelperColumn.MaxRv)()) + } else { + Seq.empty + } + Window(baseAliases ++ rowVersionAliases, partitionByCols, Nil, plan) } /** @@ -187,10 +180,10 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { * A pair is a carry-over iff `_del_cnt = 1 AND _ins_cnt = 1 AND _min_rv = _max_rv`. */ private def addCarryOverPairFilter(input: LogicalPlan): LogicalPlan = { - val delCnt = getAttribute(input, "_del_cnt") - val insCnt = getAttribute(input, "_ins_cnt") - val minRv = getAttribute(input, "_min_rv") - val maxRv = getAttribute(input, "_max_rv") + val delCnt = getAttribute(input, HelperColumn.DelCnt) + val insCnt = getAttribute(input, HelperColumn.InsCnt) + val minRv = getAttribute(input, HelperColumn.MinRv) + val maxRv = getAttribute(input, HelperColumn.MaxRv) val isCarryoverPair = And( And(EqualTo(delCnt, Literal(1L)), EqualTo(insCnt, Literal(1L))), @@ -205,15 +198,21 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { */ private def addUpdateRelabelProjection(input: LogicalPlan): LogicalPlan = { val changeTypeAttr = getAttribute(input, "_change_type") - val delCnt = getAttribute(input, "_del_cnt") - val insCnt = getAttribute(input, "_ins_cnt") + val delCnt = getAttribute(input, HelperColumn.DelCnt) + val insCnt = getAttribute(input, HelperColumn.InsCnt) val isUpdate = And( - GreaterThanOrEqual(delCnt, Literal(1L)), - GreaterThanOrEqual(insCnt, Literal(1L))) + EqualTo(delCnt, Literal(1L)), + EqualTo(insCnt, Literal(1L))) + val isInvalid = Or(GreaterThan(delCnt, Literal(1L)), GreaterThan(insCnt, Literal(1L))) val updateType = If(EqualTo(changeTypeAttr, Literal("insert")), Literal("update_postimage"), Literal("update_preimage")) - val caseExpr = CaseWhen(Seq((isUpdate, updateType)), changeTypeAttr) + + val raiseInvalid = RaiseError( + Literal("INVALID_CDC_OPTION.UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION"), + CreateMap(Nil), + StringType) + val caseExpr = CaseWhen(Seq(isInvalid -> raiseInvalid, isUpdate -> updateType), changeTypeAttr) val projectList = input.output.map { attr => if (attr.name == "_change_type") Alias(caseExpr, "_change_type")() @@ -233,8 +232,7 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { private def injectNetChangeComputation( plan: LogicalPlan, cl: Changelog): LogicalPlan = { - throw new UnsupportedOperationException( - "Net change computation is not yet supported") + plan } // --------------------------------------------------------------------------- @@ -250,14 +248,12 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { } /** - * Removes any helper columns (`_del_cnt`, `_ins_cnt`, `_min_rv`, `_max_rv`) that - * earlier steps added to the plan. Helper columns not present in the input are - * silently ignored, so the same method works for all three branches in - * [[addRowLevelPostProcessing]]. + * Removes any helper columns (see [[HelperColumn]]) that earlier steps added to the + * plan. Helper columns not present in the input are silently ignored, so this method + * can be applied unconditionally regardless of which post-processing steps ran. */ private def removeHelperColumns(input: LogicalPlan): LogicalPlan = { - val helperNames = Set("_del_cnt", "_ins_cnt", "_min_rv", "_max_rv") - Project(input.output.filterNot(a => helperNames.contains(a.name)), input) + Project(input.output.filterNot(a => HelperColumn.all.contains(a.name)), input) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index d824468aada10..d4d50c2674c72 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -3870,6 +3870,40 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat "actualType" -> actualType)) } + def changelogNullableRowVersionError( + changelogName: String, columnName: String): AnalysisException = { + new AnalysisException( + errorClass = "INVALID_CHANGELOG_SCHEMA.NULLABLE_ROW_VERSION", + messageParameters = Map( + "changelogName" -> changelogName, + "columnName" -> columnName)) + } + + def changelogMissingRowIdError(changelogName: String): AnalysisException = { + new AnalysisException( + errorClass = "INVALID_CHANGELOG_SCHEMA.MISSING_ROW_ID", + messageParameters = Map("changelogName" -> changelogName)) + } + + def changelogMissingRowVersionError(changelogName: String): AnalysisException = { + new AnalysisException( + errorClass = "INVALID_CHANGELOG_SCHEMA.MISSING_ROW_VERSION", + messageParameters = Map("changelogName" -> changelogName)) + } + + def cdcUpdateDetectionRequiresCarryOverRemoval( + changelogName: String): AnalysisException = { + new AnalysisException( + errorClass = "INVALID_CDC_OPTION.UPDATE_DETECTION_REQUIRES_CARRY_OVER_REMOVAL", + messageParameters = Map("changelogName" -> changelogName)) + } + + def cdcNetChangesNotYetSupported(changelogName: String): AnalysisException = { + new AnalysisException( + errorClass = "INVALID_CDC_OPTION.NET_CHANGES_NOT_YET_SUPPORTED", + messageParameters = Map("changelogName" -> changelogName)) + } + def invalidCdcOptionConflictingRangeTypes(): Throwable = { new AnalysisException( errorClass = "INVALID_CDC_OPTION.CONFLICTING_RANGE_TYPES", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala index fa445b9123017..39022403487c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala @@ -21,6 +21,7 @@ import java.util.{EnumSet => JEnumSet, Set => JSet} import org.apache.spark.sql.connector.catalog.{Changelog, ChangelogInfo, Column, SupportsRead, Table, TableCapability} import org.apache.spark.sql.connector.catalog.TableCapability.{BATCH_READ, MICRO_BATCH_READ} +import org.apache.spark.sql.connector.expressions.NamedReference import org.apache.spark.sql.connector.read.ScanBuilder import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.types.{DataType, StringType, TimestampType} @@ -68,5 +69,40 @@ object ChangelogTable { check("_change_type", StringType) check("_commit_version") // connector-defined, any type accepted check("_commit_timestamp", TimestampType) + + val needsRowId = cl.containsCarryoverRows() || + cl.representsUpdateAsDeleteAndInsert() || + cl.containsIntermediateChanges() + val needsRowVersion = cl.containsCarryoverRows() || + cl.representsUpdateAsDeleteAndInsert() + + if (needsRowId) { + val rowIds = try cl.rowId() catch { + case _: UnsupportedOperationException => Array.empty[NamedReference] + } + if (rowIds == null || rowIds.isEmpty) { + throw QueryCompilationErrors.changelogMissingRowIdError(cl.name) + } + } + + if (needsRowVersion) { + val rowVersionRef = try Some(cl.rowVersion()) catch { + case _: UnsupportedOperationException => None + } + if (rowVersionRef.isEmpty) { + throw QueryCompilationErrors.changelogMissingRowVersionError(cl.name) + } + rowVersionRef.foreach { ref => + val fieldNames = ref.fieldNames() + require(fieldNames.length == 1, + s"Nested rowVersion is not supported: ${fieldNames.mkString(".")}") + val columnName = fieldNames(0) + val col = byName.getOrElse(columnName, + throw QueryCompilationErrors.changelogMissingColumnError(cl.name, columnName)) + if (col.nullable) { + throw QueryCompilationErrors.changelogNullableRowVersionError(cl.name, columnName) + } + } + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala index 1191528254179..c873dcefa287e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.ChangelogRange -import org.apache.spark.sql.connector.expressions.{NamedReference, Transform} +import org.apache.spark.sql.connector.expressions.{FieldReference, NamedReference, Transform} import org.apache.spark.sql.connector.read.ScanBuilder import org.apache.spark.sql.execution.datasources.v2.{ChangelogTable, DataSourceV2Relation} import org.apache.spark.sql.test.SharedSparkSession @@ -217,8 +217,8 @@ class ChangelogResolutionSuite extends QueryTest with SharedSparkSession { false) private def cl(name: String, cols: (String, org.apache.spark.sql.types.DataType)*) - : BrokenChangelog = { - new BrokenChangelog(name, cols.map { case (n, t) => Column.create(n, t) }.toArray) + : TestChangelog = { + new TestChangelog(name, cols.map { case (n, t) => Column.create(n, t) }.toArray) } private def missing(columnName: String): Map[String, String] = @@ -299,21 +299,64 @@ class ChangelogResolutionSuite extends QueryTest with SharedSparkSession { validChangeType, validVersion, validTimestamp), stubInfo()) } + + test("ChangelogTable - nullable rowVersion column fails") { + val cl = new TestChangelog( + "bad_cl", + Array( + Column.create("id", LongType, false), + Column.create("_change_type", StringType), + Column.create("_commit_version", LongType), + Column.create("_commit_timestamp", TimestampType), + Column.create("row_commit_version", LongType)), + carryoverRows = true, + rowIdRefs = Array(FieldReference.column("id")), + rowVersionRef = Some(FieldReference.column("row_commit_version"))) + checkError( + intercept[AnalysisException] { ChangelogTable(cl, stubInfo()) }, + condition = "INVALID_CHANGELOG_SCHEMA.NULLABLE_ROW_VERSION", + parameters = Map( + "changelogName" -> "bad_cl", + "columnName" -> "row_commit_version")) + } + + test("ChangelogTable - non-nullable rowVersion column passes") { + val cl = new TestChangelog( + "good_cl", + Array( + Column.create("id", LongType, false), + Column.create("_change_type", StringType), + Column.create("_commit_version", LongType), + Column.create("_commit_timestamp", TimestampType), + Column.create("row_commit_version", LongType, false)), + carryoverRows = true, + rowIdRefs = Array(FieldReference.column("id")), + rowVersionRef = Some(FieldReference.column("row_commit_version"))) + ChangelogTable(cl, stubInfo()) + } } /** * Test-only [[Changelog]] implementation that returns a hand-crafted schema. Used to * exercise [[ChangelogTable]]'s schema validation without going through a real catalog. + * + * Defaults match a minimal connector with no post-processing capabilities. Tests opt + * into capability flags or `rowVersion()` overrides via constructor params. */ -private class BrokenChangelog( +private class TestChangelog( nameArg: String, - cols: Array[Column]) extends Changelog { + cols: Array[Column], + carryoverRows: Boolean = false, + rowIdRefs: Array[NamedReference] = Array.empty, + rowVersionRef: Option[NamedReference] = None) extends Changelog { override def name(): String = nameArg override def columns(): Array[Column] = cols - override def containsCarryoverRows(): Boolean = false + override def containsCarryoverRows(): Boolean = carryoverRows override def containsIntermediateChanges(): Boolean = false override def representsUpdateAsDeleteAndInsert(): Boolean = false - override def rowId(): Array[NamedReference] = Array.empty + override def rowId(): Array[NamedReference] = rowIdRefs + override def rowVersion(): NamedReference = + rowVersionRef.getOrElse(super.rowVersion()) override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = { throw new UnsupportedOperationException("not needed for schema validation tests") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala index 9eb136940424b..b3dbc0a64ad04 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala @@ -21,7 +21,8 @@ import java.util.Collections import org.scalatest.BeforeAndAfterEach -import org.apache.spark.sql.QueryTest +import org.apache.spark.SparkRuntimeException +import org.apache.spark.sql.{AnalysisException, QueryTest} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.connector.catalog.{ ChangelogProperties, Column, Identifier, InMemoryChangelogCatalog} @@ -68,7 +69,7 @@ class ResolveChangelogTablePostProcessingSuite Array( Column.create("id", LongType), Column.create("name", StringType), - Column.create("row_commit_version", LongType)), + Column.create("row_commit_version", LongType, false)), Array.empty[Transform], Collections.emptyMap[String, String]()) } @@ -223,6 +224,71 @@ class ResolveChangelogTablePostProcessingSuite "Delete and insert in different versions should not be labeled as update") } + // =========================================================================== + // Composite rowId: partitioning uses every rowId column + // =========================================================================== + // + // With a composite rowId such as Seq("id", "name"), the (rowId, _commit_version) + // window partition must include BOTH columns. A regression that drops one of the + // rowId columns would either falsely merge two different row identities into one + // partition (silently mislabeling unrelated delete/insert pairs as updates) or + // trip the UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION runtime guard. + + test("update detection with composite rowId keeps different (id, name) tuples raw") { + catalog.setChangelogProperties(ident, ChangelogProperties( + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id", "name"), + rowVersionName = Some("row_commit_version"))) + + // delete (1, Alice) and insert (1, Bob) at v2. These are DIFFERENT composite + // rowIds; they must NOT be relabeled as update. + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "delete", 2L), + changeRow(1L, "Bob", "insert", 2L))) + + val rows = sql( + s"SELECT id, name, _change_type FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 2 TO VERSION 2 WITH (computeUpdates = 'true')") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}").toSet + + assert(descs == Set("1:Alice:delete", "1:Bob:insert"), + s"Composite rowId must keep different (id, name) tuples raw. Got: $descs") + } + + test("carry-over removal with composite rowId removes pairs per (id, name) tuple") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + rowIdNames = Seq("id", "name"), + rowVersionName = Some("row_commit_version"))) + + // Two independent carry-over pairs at v2, both with id=1 but different names. + // With correct composite-rowId partitioning, each pair lives in its own + // (id, name, _commit_version) partition, has _del_cnt=1 / _ins_cnt=1 and equal + // _min_rv / _max_rv, and gets dropped. With broken (id-only) partitioning, the + // four rows would collapse into one partition with _del_cnt=2 / _ins_cnt=2 and + // the carry-over filter (which requires =1) would keep them all. + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L, rowCommitVersion = 1L), + changeRow(1L, "Bob", "insert", 1L, rowCommitVersion = 1L), + changeRow(1L, "Alice", "delete", 2L, rowCommitVersion = 1L), + changeRow(1L, "Alice", "insert", 2L, rowCommitVersion = 1L), + changeRow(1L, "Bob", "delete", 2L, rowCommitVersion = 1L), + changeRow(1L, "Bob", "insert", 2L, rowCommitVersion = 1L))) + + val rows = sql( + s"SELECT id, name, _change_type, _commit_version " + + s"FROM $catalogName.$testTableName CHANGES FROM VERSION 2 TO VERSION 2") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}") + assert(rows.isEmpty, + s"Both Alice and Bob carry-over pairs at v2 should be removed. Got: ${descs.mkString(",")}") + } + // =========================================================================== // No row identity: post-processing skipped // =========================================================================== @@ -344,6 +410,148 @@ class ResolveChangelogTablePostProcessingSuite s"Pure inserts must stay 'insert'. Got: ${rows.map(_.getString(1)).mkString(",")}") } + // =========================================================================== + // Keep Carry-over Rows and deduplication flag tests + // =========================================================================== + + test("computeUpdates with deduplicationMode=none is rejected on COW connector") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("row_commit_version"))) + + checkError( + intercept[AnalysisException] { + sql(s"SELECT * FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2 " + + s"WITH (computeUpdates = 'true', deduplicationMode = 'none')") + }, + condition = "INVALID_CDC_OPTION.UPDATE_DETECTION_REQUIRES_CARRY_OVER_REMOVAL", + parameters = Map("changelogName" -> s"$catalogName.${testTableName}_changelog")) + } + + test("computeUpdates with deduplicationMode=none is allowed on non-COW connector") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = false, // MOR-style: no carry-overs possible + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("row_commit_version"))) + + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "insert", 1L), + // v2: Alice -> Robert (delete old, insert new) + changeRow(1L, "Alice", "delete", 2L), + changeRow(1L, "Robert", "insert", 2L))) + + val rows = sql( + s"SELECT id, name, _change_type FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2 " + + s"WITH (computeUpdates = 'true', deduplicationMode = 'none')") + .collect() + + val descs = rows.map(r => + s"${r.getLong(0)}:${r.getString(1)}:${r.getString(2)}") + assert(descs.contains("1:Alice:update_preimage"), + s"Expected Alice update_preimage. Got: ${descs.mkString(",")}") + assert(descs.contains("1:Robert:update_postimage"), + s"Expected Robert update_postimage. Got: ${descs.mkString(",")}") + } + + // =========================================================================== + // Contract enforcement: at most one delete + one insert per (rowId, version) + // =========================================================================== + // + // With `representsUpdateAsDeleteAndInsert = true` and `containsIntermediateChanges = false`, + // the `Changelog` contract guarantees at most one logical change per (rowId, _commit_version) + // partition. The update-relabel projection enforces this at runtime: if it sees more than one + // delete or more than one insert in a partition, it raises + // INVALID_CDC_OPTION.UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION instead of silently + // mislabeling extra rows as updates. + + test("update detection raises on multiple inserts for same (rowId, _commit_version)") { + catalog.setChangelogProperties(ident, ChangelogProperties( + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("row_commit_version"))) + + // Contract violation: 2 inserts for id=1 at v2. + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "delete", 2L), + changeRow(1L, "Alice2", "insert", 2L), + changeRow(1L, "Alice3", "insert", 2L))) + + checkError( + intercept[SparkRuntimeException] { + sql(s"SELECT * FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 2 TO VERSION 2 WITH (computeUpdates = 'true')") + .collect() + }, + condition = "INVALID_CDC_OPTION.UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION", + parameters = Map.empty) + } + + test("update detection raises on multiple deletes for same (rowId, _commit_version)") { + catalog.setChangelogProperties(ident, ChangelogProperties( + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("row_commit_version"))) + + // Contract violation: 2 deletes for id=1 at v2. + catalog.addChangeRows(ident, Seq( + changeRow(1L, "Alice", "delete", 2L), + changeRow(1L, "Alice2", "delete", 2L), + changeRow(1L, "Alice3", "insert", 2L))) + + checkError( + intercept[SparkRuntimeException] { + sql(s"SELECT * FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 2 TO VERSION 2 WITH (computeUpdates = 'true')") + .collect() + }, + condition = "INVALID_CDC_OPTION.UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION", + parameters = Map.empty) + } + + // =========================================================================== + // Net changes deduplication: not yet supported + // =========================================================================== + // + // `deduplicationMode = netChanges` collapses multiple changes per row identity into the + // net effect. It is not yet implemented in [[ResolveChangelogTable]]. + + test("deduplicationMode=netChanges is rejected when connector emits intermediate changes") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsIntermediateChanges = true, + rowIdNames = Seq("id"), + rowVersionName = Some("row_commit_version"))) + + checkError( + intercept[AnalysisException] { + sql(s"SELECT * FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2 " + + s"WITH (deduplicationMode = 'netChanges')") + }, + condition = "INVALID_CDC_OPTION.NET_CHANGES_NOT_YET_SUPPORTED", + parameters = Map("changelogName" -> s"$catalogName.${testTableName}_changelog")) + } + + test("deduplicationMode=netChanges is rejected even when connector has no intermediate changes") { + catalog.setChangelogProperties(ident, ChangelogProperties( + containsIntermediateChanges = false, + rowIdNames = Seq("id"), + rowVersionName = Some("row_commit_version"))) + + checkError( + intercept[AnalysisException] { + sql(s"SELECT * FROM $catalogName.$testTableName " + + s"CHANGES FROM VERSION 1 TO VERSION 2 " + + s"WITH (deduplicationMode = 'netChanges')") + }, + condition = "INVALID_CDC_OPTION.NET_CHANGES_NOT_YET_SUPPORTED", + parameters = Map("changelogName" -> s"$catalogName.${testTableName}_changelog")) + } + // =========================================================================== // Range edge cases // =========================================================================== @@ -583,7 +791,7 @@ class ResolveChangelogTablePostProcessingSuite Column.create("name", StringType), Column.create("score", DoubleType), Column.create("active", BooleanType), - Column.create("row_commit_version", LongType)), + Column.create("row_commit_version", LongType, false)), Array.empty[Transform], Collections.emptyMap[String, String]()) cat.setChangelogProperties(mixedIdent, ChangelogProperties( @@ -650,7 +858,7 @@ class ResolveChangelogTablePostProcessingSuite nestedIdent, Array( Column.create("payload", payloadType), - Column.create("row_commit_version", LongType)), + Column.create("row_commit_version", LongType, false)), Array.empty[Transform], Collections.emptyMap[String, String]()) @@ -711,7 +919,7 @@ class ResolveChangelogTablePostProcessingSuite Array( Column.create("id", LongType), Column.create("payload", BinaryType), - Column.create("row_commit_version", LongType)), + Column.create("row_commit_version", LongType, false)), Array.empty[Transform], Collections.emptyMap[String, String]()) cat.setChangelogProperties(binIdent, ChangelogProperties( From b7809e34f6580db90f00e721e13842bda826c521 Mon Sep 17 00:00:00 2001 From: Sandro Speh Date: Thu, 23 Apr 2026 08:44:48 +0000 Subject: [PATCH 5/5] Second batch of PR feedback --- .../resources/error/error-conditions.json | 34 ++++- .../analysis/ResolveChangelogTable.scala | 136 ++++++++++++------ .../sql/errors/QueryCompilationErrors.scala | 15 ++ .../datasources/v2/ChangelogTable.scala | 57 ++++---- .../catalog/InMemoryChangelogCatalog.scala | 6 +- .../connector/ChangelogResolutionSuite.scala | 65 +++++++++ ...lveChangelogTablePostProcessingSuite.scala | 109 +++----------- 7 files changed, 263 insertions(+), 159 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index f1e493bf8d704..595f29d5a2928 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3272,6 +3272,26 @@ "message" : [ "`startingVersion` is required when `endingVersion` is specified for CDC queries." ] + }, + "NET_CHANGES_NOT_YET_SUPPORTED" : { + "message" : [ + "The `deduplicationMode = netChanges` option on connector `` is not yet supported. Use `deduplicationMode = dropCarryovers` (default) or `deduplicationMode = none` instead." + ] + }, + "STREAMING_POST_PROCESSING_NOT_SUPPORTED" : { + "message" : [ + "Change Data Capture (CDC) streaming reads on connector `` do not yet support post-processing (carry-over removal, update detection, or net change computation). The requested combination of options would require post-processing, which is currently only available for batch reads. Use a batch read, or set `deduplicationMode = none` and `computeUpdates = false` to receive raw change rows in streaming." + ] + }, + "UPDATE_DETECTION_REQUIRES_CARRY_OVER_REMOVAL" : { + "message" : [ + "`computeUpdates` cannot be used with `deduplicationMode=none` on connector `` because the connector emits copy-on-write carry-over pairs (`containsCarryoverRows()` returns true) that would be silently mislabeled as updates. Set `deduplicationMode` to `dropCarryovers` or `netChanges`." + ] + }, + "UNEXPECTED_MULTIPLE_CHANGES_PER_ROW_VERSION" : { + "message" : [ + "Connector emitted multiple delete or insert rows for the same `(rowId, _commit_version)` partition. The `Changelog` contract requires at most one logical change per row identity per commit when `containsIntermediateChanges() = false`. Either fix the connector to deduplicate intermediate states, or set `containsIntermediateChanges() = true` and use `deduplicationMode = netChanges`." + ] } }, "sqlState" : "42K03" @@ -3296,9 +3316,19 @@ "Connector advertises one or more post-processing properties (`containsCarryoverRows`, `representsUpdateAsDeleteAndInsert`, `containsIntermediateChanges`) that require row identity, but `Changelog.rowId()` returned an empty array. Either set all three to `false`, or return at least one row-id `NamedReference`." ] }, - "NESTED_ROW_ID" : { + "MISSING_ROW_VERSION" : { + "message" : [ + "Connector advertises `containsCarryoverRows` or `representsUpdateAsDeleteAndInsert` is `true`, but `Changelog.rowVersion()` is not implemented. Override `rowVersion()` to return a `NamedReference` pointing to a non-nullable column in `Changelog.columns()`." + ] + }, + "NESTED_ROW_VERSION" : { + "message" : [ + "Row version `` references a nested column. Spark requires `Changelog.rowVersion()` to point to a top-level column of `Changelog.columns()`." + ] + }, + "NULLABLE_ROW_VERSION" : { "message" : [ - "rowId `` is nested. Spark currently requires row identity columns to be top-level columns of the changelog schema. Project the nested field to a top-level column in `Changelog.columns()` and point `Changelog.rowId()` at that column." + "The row version column `` is nullable. Spark requires a non-nullable row version column when `containsCarryoverRows` or `representsUpdateAsDeleteAndInsert` is `true`, to safely distinguish copy-on-write carry-over pairs from real updates. Mark the column non-nullable in `Changelog.columns()`." ] } }, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala index 0df9ccec393e6..9cc74b6dc8f3f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveChangelogTable.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.{Count, Max, Min} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.connector.catalog.{Changelog, ChangelogInfo} import org.apache.spark.sql.errors.QueryCompilationErrors @@ -28,18 +29,24 @@ import org.apache.spark.sql.execution.datasources.v2.{ChangelogTable, DataSource import org.apache.spark.sql.types.{IntegerType, StringType} /** - * Post-processes a resolved [[DataSourceV2Relation]] backed by a [[ChangelogTable]] to inject - * carry-over removal and/or update detection plans. + * Post-processes a resolved [[ChangelogTable]] read to apply CDC option semantics + * (carry-over removal, update detection) and to enforce supported option combinations. * * Fires after [[ResolveRelations]] has wrapped the connector's [[Changelog]] in a - * [[DataSourceV2Relation]]. Skipped entirely when [[Changelog.rowId]] is empty (no row identity - * available; raw delete/insert rows are returned as-is). + * [[ChangelogTable]]. Both batch ([[DataSourceV2Relation]]) and streaming + * ([[StreamingRelationV2]]) reads are handled: + * - Batch: the requested post-processing passes are injected as logical operators on top + * of the relation. Carry-over removal and update detection are fused into a single + * pass over a (rowId, _commit_version)-partitioned Window: the Filter drops CoW + * carry-over pairs (same rowVersion on both sides) and the subsequent Project relabels + * real delete+insert pairs as update_preimage / update_postimage. + * - Streaming: post-processing is not yet supported. If the requested options would + * require any post-processing, the rule throws an explicit [[AnalysisException]] to + * prevent silent wrong results. Streams that don't require post-processing pass + * through unchanged. * - * Carry-over removal and update detection are fused into a single pass over a - * (rowId, _commit_version)-partitioned Window: the Filter drops CoW carry-over pairs - * (same rowVersion on both sides) and the subsequent Project relabels real delete+insert - * pairs as update_preimage / update_postimage. Net change computation runs afterwards - * as a separate rule step. + * Net change computation (`deduplicationMode = netChanges`) is not yet implemented and + * is rejected up-front for both batch and streaming. */ object ResolveChangelogTable extends Rule[LogicalPlan] { @@ -59,46 +66,31 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { if (isAlreadyTransformed(plan)) return plan var updatedPlan = plan updatedPlan = plan.resolveOperatorsUp { - // Guard: without row identity, carry-over removal and update detection cannot - // correctly group rows. A match-miss leaves the relation unchanged -- exactly what - // we want for connectors that surface an empty rowId(). - case rel @ DataSourceV2Relation(table: ChangelogTable, _, _, _, _, _) - if table.changelog.rowId().nonEmpty => + case rel @ DataSourceV2Relation(table: ChangelogTable, _, _, _, _, _) => val changelog = table.changelog - val options = table.changelogInfo - - // Net change computation is not yet implemented. - if (options.deduplicationMode() == ChangelogInfo.DeduplicationMode.NET_CHANGES) { - throw QueryCompilationErrors.cdcNetChangesNotYetSupported(changelog.name()) - } - - val requiresCarryOverRemoval = - options.deduplicationMode() != ChangelogInfo.DeduplicationMode.NONE && - changelog.containsCarryoverRows() - val requiresUpdateDetection = - options.computeUpdates() && changelog.representsUpdateAsDeleteAndInsert() - val requiresNetChanges = - options.deduplicationMode() == ChangelogInfo.DeduplicationMode.NET_CHANGES && - changelog.containsIntermediateChanges() - - // If carry-overs are surfaced and update detection is enabled, carry-overs will - // falsely be classified as updates, leading to false results. Hence we throw. - if (requiresUpdateDetection && - changelog.containsCarryoverRows() && - options.deduplicationMode() == ChangelogInfo.DeduplicationMode.NONE) { - throw QueryCompilationErrors.cdcUpdateDetectionRequiresCarryOverRemoval( - changelog.name()) - } + val req = evaluateRequirements(changelog, table.changelogInfo) var updatedRel: LogicalPlan = rel - if (requiresCarryOverRemoval || requiresUpdateDetection) { + if (req.requiresCarryOverRemoval || req.requiresUpdateDetection) { updatedRel = addRowLevelPostProcessing( - rel, changelog, requiresCarryOverRemoval, requiresUpdateDetection) + rel, changelog, req.requiresCarryOverRemoval, req.requiresUpdateDetection) } - if (requiresNetChanges) { + if (req.requiresNetChanges) { updatedRel = injectNetChangeComputation(updatedRel, changelog) } updatedRel + + case rel @ StreamingRelationV2(_, _, table: ChangelogTable, _, _, _, _, _, _) => + // Streaming CDC reads do not yet apply post-processing. Run the same option / + // capability validation as the batch path so silent wrong results are impossible: + // either no post-processing would be required (fall through, return raw stream), + // or we throw an explicit AnalysisException. + val changelog = table.changelog + val req = evaluateRequirements(changelog, table.changelogInfo) + if (req.needsAny) { + throw QueryCompilationErrors.cdcStreamingPostProcessingNotSupported(changelog.name()) + } + rel } if (updatedPlan ne plan) { updatedPlan.setTagValue(CHANGELOG_TRANSFORMED_TAG, true) @@ -106,6 +98,60 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { updatedPlan } + // --------------------------------------------------------------------------- + // Option validation & Requirement Computation + // --------------------------------------------------------------------------- + + /** + * Captures which post-processing passes a CDC query requires, derived from the + * user-provided [[ChangelogInfo]] options and the connector-declared [[Changelog]] + * capability flags. + */ + private case class PostProcessingRequirements( + requiresCarryOverRemoval: Boolean, + requiresUpdateDetection: Boolean, + requiresNetChanges: Boolean) { + def needsAny: Boolean = + requiresCarryOverRemoval || requiresUpdateDetection || requiresNetChanges + } + + /** + * Validates CDC option/capability combinations and computes which post-processing + * passes are required. Throws an [[org.apache.spark.sql.AnalysisException]] for + * unsupported or contradictory combinations (currently: `netChanges` deduplication, + * and `computeUpdates` with surfaced carry-overs but no carry-over removal). + */ + private def evaluateRequirements( + changelog: Changelog, + options: ChangelogInfo): PostProcessingRequirements = { + // Net change computation is not yet implemented. + if (options.deduplicationMode() == ChangelogInfo.DeduplicationMode.NET_CHANGES) { + throw QueryCompilationErrors.cdcNetChangesNotYetSupported(changelog.name()) + } + + val requiresCarryOverRemoval = + options.deduplicationMode() != ChangelogInfo.DeduplicationMode.NONE && + changelog.containsCarryoverRows() + val requiresUpdateDetection = + options.computeUpdates() && changelog.representsUpdateAsDeleteAndInsert() + val requiresNetChanges = + options.deduplicationMode() == ChangelogInfo.DeduplicationMode.NET_CHANGES && + changelog.containsIntermediateChanges() + + // If carry-overs are surfaced and update detection is enabled without carry-over + // removal, carry-overs would be falsely classified as updates, leading to wrong + // results. Hence we throw. + if (requiresUpdateDetection && + changelog.containsCarryoverRows() && + options.deduplicationMode() == ChangelogInfo.DeduplicationMode.NONE) { + throw QueryCompilationErrors.cdcUpdateDetectionRequiresCarryOverRemoval( + changelog.name()) + } + + PostProcessingRequirements( + requiresCarryOverRemoval, requiresUpdateDetection, requiresNetChanges) + } + // --------------------------------------------------------------------------- // Row Level Post Processing (Update Detection & Carry-over Removal) // --------------------------------------------------------------------------- @@ -222,17 +268,17 @@ object ResolveChangelogTable extends Rule[LogicalPlan] { } // --------------------------------------------------------------------------- - // Net Change Computation (future) + // Net Change Computation // --------------------------------------------------------------------------- /** - * Collapses multiple changes per row identity into the net effect. Not in - * scope for this implementation. + * Collapses multiple changes per row identity into the net effect. + * Not yet implemented. */ private def injectNetChangeComputation( plan: LogicalPlan, cl: Changelog): LogicalPlan = { - plan + plan } // --------------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index d4d50c2674c72..6b167af84aa8a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -3891,6 +3891,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat messageParameters = Map("changelogName" -> changelogName)) } + def changelogNestedRowVersionError( + changelogName: String, reference: String): AnalysisException = { + new AnalysisException( + errorClass = "INVALID_CHANGELOG_SCHEMA.NESTED_ROW_VERSION", + messageParameters = Map( + "changelogName" -> changelogName, + "reference" -> reference)) + } + def cdcUpdateDetectionRequiresCarryOverRemoval( changelogName: String): AnalysisException = { new AnalysisException( @@ -3904,6 +3913,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat messageParameters = Map("changelogName" -> changelogName)) } + def cdcStreamingPostProcessingNotSupported(changelogName: String): AnalysisException = { + new AnalysisException( + errorClass = "INVALID_CDC_OPTION.STREAMING_POST_PROCESSING_NOT_SUPPORTED", + messageParameters = Map("changelogName" -> changelogName)) + } + def invalidCdcOptionConflictingRangeTypes(): Throwable = { new AnalysisException( errorClass = "INVALID_CDC_OPTION.CONFLICTING_RANGE_TYPES", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala index 39022403487c6..74de2600d053a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ChangelogTable.scala @@ -70,38 +70,47 @@ object ChangelogTable { check("_commit_version") // connector-defined, any type accepted check("_commit_timestamp", TimestampType) + // `rowId()` / `rowVersion()` default to throwing UnsupportedOperationException for + // connectors that haven't opted in. Translate that into "not declared" so we can + // reason about it as Option/empty-array below. + val rowIds: Array[NamedReference] = try cl.rowId() catch { + case _: UnsupportedOperationException => Array.empty + } + val rowVersionRef: Option[NamedReference] = try Some(cl.rowVersion()) catch { + case _: UnsupportedOperationException => None + } + + // Capability-driven presence checks: a connector that advertises a capability which + // requires row identity or row versioning must actually expose those references. + // Otherwise post-processing would crash with an UnsupportedOperationException at + // runtime instead of producing a clean AnalysisException here. val needsRowId = cl.containsCarryoverRows() || cl.representsUpdateAsDeleteAndInsert() || cl.containsIntermediateChanges() + if (needsRowId && (rowIds == null || rowIds.isEmpty)) { + throw QueryCompilationErrors.changelogMissingRowIdError(cl.name) + } + val needsRowVersion = cl.containsCarryoverRows() || cl.representsUpdateAsDeleteAndInsert() - - if (needsRowId) { - val rowIds = try cl.rowId() catch { - case _: UnsupportedOperationException => Array.empty[NamedReference] - } - if (rowIds == null || rowIds.isEmpty) { - throw QueryCompilationErrors.changelogMissingRowIdError(cl.name) - } + if (needsRowVersion && rowVersionRef.isEmpty) { + throw QueryCompilationErrors.changelogMissingRowVersionError(cl.name) } - if (needsRowVersion) { - val rowVersionRef = try Some(cl.rowVersion()) catch { - case _: UnsupportedOperationException => None - } - if (rowVersionRef.isEmpty) { - throw QueryCompilationErrors.changelogMissingRowVersionError(cl.name) + // Schema constraints on rowVersion: must be a top-level non-nullable column. + // Nullable rowVersions break carry-over detection (NULL = NULL is unknown, so a + // delete+insert pair would be misclassified as a real update). + rowVersionRef.foreach { ref => + val fieldNames = ref.fieldNames() + if (fieldNames.length != 1) { + throw QueryCompilationErrors.changelogNestedRowVersionError( + cl.name, fieldNames.mkString(".")) } - rowVersionRef.foreach { ref => - val fieldNames = ref.fieldNames() - require(fieldNames.length == 1, - s"Nested rowVersion is not supported: ${fieldNames.mkString(".")}") - val columnName = fieldNames(0) - val col = byName.getOrElse(columnName, - throw QueryCompilationErrors.changelogMissingColumnError(cl.name, columnName)) - if (col.nullable) { - throw QueryCompilationErrors.changelogNullableRowVersionError(cl.name, columnName) - } + val columnName = fieldNames(0) + val col = byName.getOrElse(columnName, + throw QueryCompilationErrors.changelogMissingColumnError(cl.name, columnName)) + if (col.nullable) { + throw QueryCompilationErrors.changelogNullableRowVersionError(cl.name, columnName) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala index ff3baf16065d8..3a37b0a84fa26 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryChangelogCatalog.scala @@ -137,7 +137,11 @@ class InMemoryChangelogCatalog extends InMemoryCatalog { * @param rowIdNames optional row identity columns as top-level names (e.g. Seq("id")) * @param rowIdPaths optional row identity paths for nested struct fields * (e.g. Seq(Seq("payload", "id"))); takes precedence over rowIdNames - * @param rowVersionName optional row version column (e.g. Some("_commit_version")) + * @param rowVersionName optional row version column (e.g. Some("row_commit_version")); + * must be a per-row version that distinguishes carry-overs from + * real updates. Do NOT pass the commit version, which is constant + * within a partition and would cause every delete+insert pair to + * look like a carry-over */ case class ChangelogProperties( containsCarryoverRows: Boolean = false, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala index c873dcefa287e..4f295752e96fd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ChangelogResolutionSuite.scala @@ -334,6 +334,71 @@ class ChangelogResolutionSuite extends QueryTest with SharedSparkSession { rowVersionRef = Some(FieldReference.column("row_commit_version"))) ChangelogTable(cl, stubInfo()) } + + // =========================================================================== + // Streaming post-processing rejection + // =========================================================================== + // + // Streaming CDC reads bypass the post-processing analyzer rule's transformation + // path. To prevent silent wrong results when the requested options would require + // post-processing, the rule throws an explicit AnalysisException for streaming. + + /** Re-creates the test table with non-nullable columns suitable as rowId / rowVersion. */ + private def recreatePostProcessingTable(): Identifier = { + val cat = spark.sessionState.catalogManager.catalog(cdcCatalogName).asTableCatalog + val ident = Identifier.of(Array.empty, "test_table") + if (cat.tableExists(ident)) cat.dropTable(ident) + cat.createTable( + ident, + Array( + Column.create("id", LongType, false), + Column.create("row_commit_version", LongType, false)), + Array.empty[Transform], + Collections.emptyMap[String, String]()) + ident + } + + test("DataStreamReader - changes() with carry-over capability throws") { + val ident = recreatePostProcessingTable() + val cat = spark.sessionState.catalogManager + .catalog(cdcCatalogName) + .asInstanceOf[InMemoryChangelogCatalog] + cat.setChangelogProperties(ident, ChangelogProperties( + containsCarryoverRows = true, + rowIdNames = Seq("id"), + rowVersionName = Some("row_commit_version"))) + + checkError( + intercept[AnalysisException] { + spark.readStream + .changes(s"$cdcCatalogName.test_table") + .queryExecution.analyzed + }, + condition = "INVALID_CDC_OPTION.STREAMING_POST_PROCESSING_NOT_SUPPORTED", + parameters = Map("changelogName" -> s"$cdcCatalogName.test_table_changelog")) + } + + test("DataStreamReader - changes() with computeUpdates throws") { + val ident = recreatePostProcessingTable() + val cat = spark.sessionState.catalogManager + .catalog(cdcCatalogName) + .asInstanceOf[InMemoryChangelogCatalog] + cat.setChangelogProperties(ident, ChangelogProperties( + representsUpdateAsDeleteAndInsert = true, + rowIdNames = Seq("id"), + rowVersionName = Some("row_commit_version"))) + + checkError( + intercept[AnalysisException] { + spark.readStream + .option("computeUpdates", "true") + .option("deduplicationMode", "none") + .changes(s"$cdcCatalogName.test_table") + .queryExecution.analyzed + }, + condition = "INVALID_CDC_OPTION.STREAMING_POST_PROCESSING_NOT_SUPPORTED", + parameters = Map("changelogName" -> s"$cdcCatalogName.test_table_changelog")) + } } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala index b3dbc0a64ad04..be97ceb04fe3e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ResolveChangelogTablePostProcessingSuite.scala @@ -305,10 +305,10 @@ class ResolveChangelogTablePostProcessingSuite s"CHANGES FROM VERSION 1 TO VERSION 2 WITH (computeUpdates = 'true')") val plan = df.queryExecution.analyzed.treeString - assert(!plan.contains("CarryOverRemoval"), - s"Plan should NOT contain CarryOverRemoval without rowId. Plan:\n$plan") + assert(!plan.contains("_del_cnt"), + s"Plan must not contain post-processing window helpers without rowId. Plan:\n$plan") assert(!plan.contains("_ins_cnt"), - s"Plan should NOT contain update detection window without rowId. Plan:\n$plan") + s"Plan must not contain post-processing window helpers without rowId. Plan:\n$plan") } // =========================================================================== @@ -778,7 +778,7 @@ class ResolveChangelogTablePostProcessingSuite assert(descs.contains("5:E:delete")) } - test("carry-over removal with mixed types (DOUBLE, BOOLEAN)") { + test("carry-over removal with mixed types (DOUBLE, BOOLEAN, BINARY)") { val mixedTable = "events_mixed" val mixedIdent = Identifier.of(Array.empty, mixedTable) val cat = catalog @@ -791,6 +791,7 @@ class ResolveChangelogTablePostProcessingSuite Column.create("name", StringType), Column.create("score", DoubleType), Column.create("active", BooleanType), + Column.create("payload", BinaryType), Column.create("row_commit_version", LongType, false)), Array.empty[Transform], Collections.emptyMap[String, String]()) @@ -801,21 +802,24 @@ class ResolveChangelogTablePostProcessingSuite rowVersionName = Some("row_commit_version"))) def mixedRow( - id: Long, name: String, score: Double, active: Boolean, + id: Long, name: String, score: Double, active: Boolean, payload: Array[Byte], ct: String, v: Long, rcv: Long): InternalRow = { InternalRow( - id, UTF8String.fromString(name), score, active, rcv, + id, UTF8String.fromString(name), score, active, payload, rcv, UTF8String.fromString(ct), v, 0L) } + val alicePayload = Array[Byte](1, 2, 3) + val bobPayload = Array[Byte](4, 5, 6) + cat.addChangeRows(mixedIdent, Seq( - mixedRow(1L, "Alice", 95.5, true, "insert", 1L, rcv = 1L), - mixedRow(2L, "Bob", 87.3, false, "insert", 1L, rcv = 1L), + mixedRow(1L, "Alice", 95.5, true, alicePayload, "insert", 1L, rcv = 1L), + mixedRow(2L, "Bob", 87.3, false, bobPayload, "insert", 1L, rcv = 1L), // v2: update Alice's score (old rcv=1, new rcv=2); Bob is carry-over (rcv unchanged) - mixedRow(1L, "Alice", 95.5, true, "delete", 2L, rcv = 1L), - mixedRow(1L, "Alice", 99.0, true, "insert", 2L, rcv = 2L), - mixedRow(2L, "Bob", 87.3, false, "delete", 2L, rcv = 1L), // carry-over - mixedRow(2L, "Bob", 87.3, false, "insert", 2L, rcv = 1L))) // carry-over + mixedRow(1L, "Alice", 95.5, true, alicePayload, "delete", 2L, rcv = 1L), + mixedRow(1L, "Alice", 99.0, true, alicePayload, "insert", 2L, rcv = 2L), + mixedRow(2L, "Bob", 87.3, false, bobPayload, "delete", 2L, rcv = 1L), // carry-over + mixedRow(2L, "Bob", 87.3, false, bobPayload, "insert", 2L, rcv = 1L))) // carry-over val rows = sql( s"SELECT id, name, score, active, _change_type FROM $catalogName.$mixedTable " + @@ -827,9 +831,9 @@ class ResolveChangelogTablePostProcessingSuite assert(descs.contains("1:update_preimage")) assert(descs.contains("1:update_postimage")) assert(!descs.contains("2:delete"), - s"Bob carry-over must be dropped despite DOUBLE/BOOLEAN. Got: ${descs.mkString(",")}") + s"Bob carry-over must be dropped despite DOUBLE/BOOLEAN/BINARY. Got: " + + descs.mkString(",")) - // Verify the score values val pre = rows.find(r => r.getLong(0) == 1L && r.getString(4) == "update_preimage").get val post = rows.find(r => r.getLong(0) == 1L && r.getString(4) == "update_postimage").get assert(pre.getDouble(2) == 95.5) @@ -900,84 +904,15 @@ class ResolveChangelogTablePostProcessingSuite descs.mkString(",")) } - // =========================================================================== - // Regression: BinaryType carry-over (reference-equality bug) - // =========================================================================== - - // Two carry-over rows with equal-by-content but reference-unequal byte arrays. - // Expectation: Bob's delete+insert pair is identical data and must be dropped. - // If `dataColumnsEqual` uses `==` (reference equality) on `Array[Byte]`, this test fails - // because the two payloads are different JVM objects. - test("carry-over removal with BinaryType column drops content-equal pair") { - val binTable = "events_binary" - val binIdent = Identifier.of(Array.empty, binTable) - val cat = catalog - if (cat.tableExists(binIdent)) cat.dropTable(binIdent) - cat.clearChangeRows(binIdent) - cat.createTable( - binIdent, - Array( - Column.create("id", LongType), - Column.create("payload", BinaryType), - Column.create("row_commit_version", LongType, false)), - Array.empty[Transform], - Collections.emptyMap[String, String]()) - cat.setChangelogProperties(binIdent, ChangelogProperties( - containsCarryoverRows = true, - rowIdNames = Seq("id"), - rowVersionName = Some("row_commit_version"))) - - def binRow(id: Long, payload: Array[Byte], ct: String, v: Long, rcv: Long): InternalRow = { - InternalRow(id, payload, rcv, UTF8String.fromString(ct), v, 0L) - } - - // Bob's carry-over: two distinct byte[] instances; but partition-based carry-over - // detection looks only at rowVersion, not byte content, so the distinct-instance - // concern is moot under the new design. Kept as a regression guard for schemas - // containing BinaryType columns. - val bobPayloadA = Array[Byte](1, 2, 3, 4) - val bobPayloadB = Array[Byte](1, 2, 3, 4) - assert(bobPayloadA ne bobPayloadB, "test precondition: distinct array instances") - - cat.addChangeRows(binIdent, Seq( - binRow(1L, Array[Byte](9, 9), "insert", 1L, rcv = 1L), - binRow(2L, bobPayloadA, "insert", 1L, rcv = 1L), - // v2: real delete for Alice (preimage carries old rcv=1); - // Bob carry-over (rcv unchanged=1 on both sides) - binRow(1L, Array[Byte](9, 9), "delete", 2L, rcv = 1L), - binRow(2L, bobPayloadA, "delete", 2L, rcv = 1L), // carry-over - binRow(2L, bobPayloadB, "insert", 2L, rcv = 1L))) // carry-over (same rcv) - - val rows = sql( - s"SELECT id, _change_type, _commit_version FROM $catalogName.$binTable " + - s"CHANGES FROM VERSION 1 TO VERSION 2") - .orderBy("_commit_version", "id", "_change_type") - .collect() - - val descs = rows.map(r => s"${r.getLong(0)}:${r.getString(1)}:v${r.getLong(2)}") - - // v1 inserts retained - assert(descs.contains("1:insert:v1"), s"v1 insert for id=1. Got: ${descs.mkString(",")}") - assert(descs.contains("2:insert:v1"), s"v1 insert for id=2. Got: ${descs.mkString(",")}") - // v2 real delete retained - assert(descs.contains("1:delete:v2"), "Real delete for id=1 retained") - // v2 Bob carry-over must be dropped despite distinct byte[] instances - assert(!descs.contains("2:delete:v2"), - s"Bob BinaryType carry-over delete must be dropped. Got: ${descs.mkString(",")}") - assert(!descs.contains("2:insert:v2"), - s"Bob BinaryType carry-over insert must be dropped. Got: ${descs.mkString(",")}") - } - // =========================================================================== // No-op UPDATE is correctly preserved as update_preimage/postimage // =========================================================================== test("no-op UPDATE is labeled as update (row_commit_version differs on pre/post)") { - // Under the partition-based design (SPIP B.6 updated), a no-op UPDATE is no longer - // silently dropped. Delta bumps row_commit_version on every UPDATE even when data is - // byte-identical, so the delete side carries the OLD rcv and the insert side the NEW - // rcv. Carry-over removal sees different rowVersions -> real change -> both survive - // and get labeled as update_preimage / update_postimage by update detection. + // A no-op UPDATE bumps row_commit_version even when data is byte-identical, so the + // delete side carries the OLD rcv and the insert side the NEW rcv. Window post-processing + // sees different rowVersions, treats this as a real change, and labels both rows as + // update_preimage / update_postimage. catalog.setChangelogProperties(ident, ChangelogProperties( containsCarryoverRows = true, representsUpdateAsDeleteAndInsert = true,