From 9f2570c89a4847125a6ecb7cc688e70b1337a271 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 9 Jul 2025 08:37:02 +0800 Subject: [PATCH 01/59] add GENERAL_TABLE v2 table capacity --- .../connector/catalog/TableCapability.java | 12 +- .../sql/connector/catalog/TableCatalog.java | 12 ++ .../sql/catalyst/analysis/Analyzer.scala | 11 +- .../analysis/RelationResolution.scala | 35 ++-- .../spark/sql/connector/catalog/V1Table.scala | 50 ++++- .../DataSourceV2GeneralTableSuite.scala | 188 ++++++++++++++++++ 6 files changed, 289 insertions(+), 19 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2GeneralTableSuite.scala diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java index 0a01c0c266b9a..13595754a3151 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java @@ -106,5 +106,15 @@ public enum TableCapability { * write modes, like {@link #TRUNCATE}, and {@link #OVERWRITE_BY_FILTER}, but cannot support * {@link #OVERWRITE_DYNAMIC}. */ - V1_BATCH_WRITE + V1_BATCH_WRITE, + + /** + * Signals that the table is a general table which does not implement read/write directly, + * but relies on Spark to interpret the table metadata and read it as a view or resolve the + * table provider into a data source. + *

+ * If this table capacity is present, other read/write capacities are ignored as this table + * instance is only used to store table metadata. + */ + GENERAL_TABLE } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index d5a36cd8bfb86..fa18543194063 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -87,6 +87,18 @@ public interface TableCatalog extends CatalogPlugin { */ String PROP_OWNER = "owner"; + /** + * A reserved property to specify the view text of a general table that represents + * a SQL view. The identifiers must be fully qualified in the view text to be + * context-independent, otherwise the behavior is undefined. + */ + String PROP_VIEW_TEXT = "view_text"; + + /** + * A prefix used to specify the Spark SQL configurations for reading this view. + */ + String VIEW_CONF_PREFIX = "view.sqlConfig."; + /** * A prefix used to pass OPTIONS in table properties */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 1224b8ba18a3d..3c01ba8b64053 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1121,7 +1121,16 @@ class Analyzer( ResolvedPersistentView( catalog, v2Ident, v1Table.catalogTable) case table => - ResolvedTable.create(catalog.asTableCatalog, ident, table) + if (table.capabilities().contains(TableCapability.GENERAL_TABLE)) { + val catalogTable = V1Table.toCatalogTable(catalog, ident, table) + if (catalogTable.tableType == CatalogTableType.VIEW) { + ResolvedPersistentView(catalog, ident, catalogTable) + } else { + ResolvedTable.create(catalog.asTableCatalog, ident, table) + } + } else { + ResolvedTable.create(catalog.asTableCatalog, ident, table) + } } case _ => None } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index e86248febd2eb..ddb818249a5ac 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -23,6 +23,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.SQLConfHelper import org.apache.spark.sql.catalyst.catalog.{ + CatalogTable, CatalogTableType, TemporaryViewRelation, UnresolvedCatalogRelation @@ -37,6 +38,7 @@ import org.apache.spark.sql.connector.catalog.{ Identifier, LookupCatalog, Table, + TableCapability, V1Table, V2TableWithV1Fallback } @@ -314,6 +316,22 @@ class RelationResolution( options: CaseInsensitiveStringMap, isStreaming: Boolean, timeTravelSpec: Option[TimeTravelSpec]): Option[LogicalPlan] = { + def createDataSourceV1Scan(v1Table: CatalogTable): LogicalPlan = { + if (isStreaming) { + if (v1Table.tableType == CatalogTableType.VIEW) { + throw QueryCompilationErrors.permanentViewNotSupportedByStreamingReadingAPIError( + ident.quoted + ) + } + SubqueryAlias( + catalog.name +: ident.asMultipartIdentifier, + UnresolvedCatalogRelation(v1Table, options, isStreaming = true) + ) + } else { + v1SessionCatalog.getRelation(v1Table, options) + } + } + table.map { // To utilize this code path to execute V1 commands, e.g. INSERT, // either it must be session catalog, or tracksPartitionsInCatalog @@ -324,19 +342,10 @@ class RelationResolution( case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) || !v1Table.catalogTable.tracksPartitionsInCatalog => - if (isStreaming) { - if (v1Table.v1Table.tableType == CatalogTableType.VIEW) { - throw QueryCompilationErrors.permanentViewNotSupportedByStreamingReadingAPIError( - ident.quoted - ) - } - SubqueryAlias( - catalog.name +: ident.asMultipartIdentifier, - UnresolvedCatalogRelation(v1Table.v1Table, options, isStreaming = true) - ) - } else { - v1SessionCatalog.getRelation(v1Table.v1Table, options) - } + createDataSourceV1Scan(v1Table.v1Table) + + case t if t.capabilities().contains(TableCapability.GENERAL_TABLE) => + createDataSourceV1Scan(V1Table.toCatalogTable(catalog, ident, t)) case table => if (isStreaming) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index eee6ddf3e58fd..ed81db4ba34ed 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -22,8 +22,9 @@ import java.util import scala.collection.mutable import scala.jdk.CollectionConverters._ -import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogUtils} -import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils, ClusterBySpec} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.V1Table.addV2TableProperties import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform} import org.apache.spark.sql.types.StructType @@ -49,7 +50,6 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table { override lazy val schema: StructType = v1Table.schema override lazy val partitioning: Array[Transform] = { - import CatalogV2Implicits._ val partitions = new mutable.ArrayBuffer[Transform]() v1Table.partitionColumnNames.foreach { col => @@ -70,7 +70,7 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table { override def name: String = v1Table.identifier.quoted override def capabilities: util.Set[TableCapability] = - util.EnumSet.noneOf(classOf[TableCapability]) + util.EnumSet.of(TableCapability.GENERAL_TABLE) override def toString: String = s"V1Table($name)" } @@ -109,6 +109,48 @@ private[sql] object V1Table { case _ => None } } + + def toCatalogTable(catalog: CatalogPlugin, ident: Identifier, t: Table): CatalogTable = { + if (t.isInstanceOf[V1Table]) { + return t.asInstanceOf[V1Table].v1Table + } + assert(t.capabilities().contains(TableCapability.GENERAL_TABLE)) + val tableType = t.properties().get(TableCatalog.PROP_TABLE_TYPE) match { + case TableSummary.VIEW_TABLE_TYPE => CatalogTableType.VIEW + case TableSummary.MANAGED_TABLE_TYPE => CatalogTableType.MANAGED + case _ => CatalogTableType.EXTERNAL + } + val location = Option(t.properties().get(TableCatalog.PROP_LOCATION)) + val viewText = Option(t.properties().get(TableCatalog.PROP_VIEW_TEXT)) + val (serdeProps, tableProps) = t.properties().asScala + .partition(_._1.startsWith(TableCatalog.OPTION_PREFIX)) + val (partCols, bucketSpec, clusterBySpec) = t.partitioning().toSeq.convertTransforms + CatalogTable( + identifier = TableIdentifier( + table = ident.name(), + database = Some(ident.namespace().lastOption.getOrElse("root")), + catalog = Some(catalog.name())), + tableType = tableType, + storage = CatalogStorageFormat.empty.copy( + locationUri = location.map(CatalogUtils.stringToURI), + // v2 table properties should be put into the serde properties as well in case + // it contains data source options. + properties = tableProps.toMap ++ serdeProps.map { + case (k, v) => k.drop(TableCatalog.OPTION_PREFIX.length) -> v + } + ), + schema = CatalogV2Util.v2ColumnsToStructType(t.columns()), + provider = Option(t.properties().get(TableCatalog.PROP_PROVIDER)), + partitionColumnNames = partCols, + bucketSpec = bucketSpec, + owner = Option(t.properties().get(TableCatalog.PROP_OWNER)).getOrElse("unknown"), + viewText = viewText, + viewOriginalText = viewText, + comment = Option(t.properties().get(TableCatalog.PROP_COMMENT)), + collation = Option(t.properties().get(TableCatalog.PROP_COLLATION)), + properties = tableProps.toMap ++ clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation) + ) + } } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2GeneralTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2GeneralTableSuite.scala new file mode 100644 index 0000000000000..be993a619da87 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2GeneralTableSuite.scala @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import java.util + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException +import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCapability, TableCatalog, TableChange, TableSummary} +import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +class DataSourceV2GeneralTableSuite extends QueryTest with SharedSparkSession { + import testImplicits._ + + override def sparkConf: SparkConf = super.sparkConf + .set("spark.sql.catalog.general_catalog", classOf[TestingGeneralCatalog].getName) + + test("file source table") { + withTempPath { path => + val loc = path.getCanonicalPath + val tableName = s"general_catalog.`$loc`.test_json" + + spark.range(10).select($"id".cast("string").as("col")).write.json(loc) + checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i.toString))) + + sql(s"INSERT INTO $tableName SELECT 'abc'") + checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i.toString)) :+ Row("abc")) + + sql(s"INSERT OVERWRITE $tableName SELECT 'xyz'") + checkAnswer(spark.table(tableName), Row("xyz")) + } + } + + test("partitioned file source table") { + withTempPath { path => + val loc = path.getCanonicalPath + val tableName = s"general_catalog.`$loc`.test_partitioned_json" + + Seq(1 -> 1, 2 -> 1).toDF("c1", "c2").write.partitionBy("c2").json(loc) + checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1))) + + sql(s"INSERT INTO $tableName SELECT 1, 2") + checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1), Row(1, 2))) + + sql(s"INSERT INTO $tableName PARTITION(c2=3) SELECT 1") + checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1), Row(1, 2), Row(1, 3))) + + sql(s"INSERT OVERWRITE $tableName PARTITION(c2=2) SELECT 10") + checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1), Row(10, 2), Row(1, 3))) + + sql(s"INSERT OVERWRITE $tableName SELECT 20, 20") + checkAnswer(spark.table(tableName), Row(20, 20)) + } + } + + // TODO: move the v2 data source table handling from V2SessionCatalog to the analyzer + ignore("v2 data source table") { + val tableName = "general_catalog.default.test_v2" + spark.table(tableName).show() + spark.table(tableName).explain(true) + checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i, -i))) + } + + test("general table as view") { + // TODO: support creating views. + withTable("spark_catalog.default.t") { + Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") + // Make sure the view config applies correctly. + intercept[Exception](spark.table("general_catalog.ansi.test_view").collect()) + checkAnswer(spark.table("general_catalog.non_ansi.test_view"), Row("b", null)) + } + } +} + +class TestingGeneralCatalog extends TableCatalog { + + override def loadTable(ident: Identifier): Table = { + ident.name() match { + case "test_json" => new TestingGeneralJsonTable(ident.namespace().head) + case "test_partitioned_json" => new TestingGeneralPartitionedJsonTable(ident.namespace().head) + case "test_v2" => new TestingGeneralV2Table + case "test_view" => new TestingGeneralViewTable(ident.namespace().head == "ansi") + case _ => throw new NoSuchTableException(ident) + } + } + + override def alterTable(ident: Identifier, changes: TableChange*): Table = { + throw new RuntimeException("shouldn't be called") + } + override def dropTable(ident: Identifier): Boolean = { + throw new RuntimeException("shouldn't be called") + } + override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = { + throw new RuntimeException("shouldn't be called") + } + override def listTables(namespace: Array[String]): Array[Identifier] = { + throw new RuntimeException("shouldn't be called") + } + + private var catalogName = "" + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + catalogName = name + } + override def name(): String = catalogName +} + +class TestingGeneralJsonTable(path: String) extends Table { + override def name(): String = "test_json" + + override def schema(): StructType = new StructType().add("col", "string") + + override def capabilities(): util.Set[TableCapability] = + util.EnumSet.of(TableCapability.GENERAL_TABLE) + + override def properties(): util.Map[String, String] = util.Map.ofEntries( + util.Map.entry(TableCatalog.PROP_PROVIDER, "json"), + util.Map.entry(TableCatalog.PROP_LOCATION, path) + ); +} + +class TestingGeneralPartitionedJsonTable(path: String) extends Table { + override def name(): String = "test_partitioned_json" + + override def schema(): StructType = new StructType().add("c1", "int").add("c2", "int") + + override def capabilities(): util.Set[TableCapability] = + util.EnumSet.of(TableCapability.GENERAL_TABLE) + + override def partitioning(): Array[Transform] = + Array(LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2")))) + + override def properties(): util.Map[String, String] = util.Map.ofEntries( + util.Map.entry(TableCatalog.PROP_PROVIDER, "json"), + util.Map.entry(TableCatalog.PROP_LOCATION, path) + ); +} + +class TestingGeneralV2Table extends Table { + override def name(): String = "test_v2" + + override def schema(): StructType = FakeV2Provider.schema + + override def capabilities(): util.Set[TableCapability] = + util.EnumSet.of(TableCapability.GENERAL_TABLE) + + override def properties(): util.Map[String, String] = util.Map.ofEntries( + util.Map.entry(TableCatalog.PROP_PROVIDER, classOf[FakeV2Provider].getName) + ); +} + +class TestingGeneralViewTable(ansi: Boolean) extends Table { + override def name(): String = "test_view" + + override def schema(): StructType = new StructType().add("col", "string").add("i", "int") + + override def capabilities(): util.Set[TableCapability] = + util.EnumSet.of(TableCapability.GENERAL_TABLE) + + override def properties(): util.Map[String, String] = util.Map.ofEntries( + util.Map.entry(TableCatalog.PROP_TABLE_TYPE, TableSummary.VIEW_TABLE_TYPE), + util.Map.entry( + TableCatalog.PROP_VIEW_TEXT, + "SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'"), + util.Map.entry( + TableCatalog.VIEW_CONF_PREFIX + SQLConf.ANSI_ENABLED.key, + ansi.toString) + ); +} From 914d81a5e3984e4a32cc8b4549427fdb26fcab99 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 11 Jul 2025 13:02:37 +0800 Subject: [PATCH 02/59] address comments --- .../spark/sql/connector/catalog/TableCapability.java | 9 +++++---- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../spark/sql/catalyst/analysis/RelationResolution.scala | 2 +- .../org/apache/spark/sql/connector/catalog/V1Table.scala | 4 ++-- .../sql/connector/DataSourceV2GeneralTableSuite.scala | 8 ++++---- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java index 13595754a3151..64e82d1857de0 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java @@ -109,12 +109,13 @@ public enum TableCapability { V1_BATCH_WRITE, /** - * Signals that the table is a general table which does not implement read/write directly, - * but relies on Spark to interpret the table metadata and read it as a view or resolve the - * table provider into a data source. + * Signals that the table is a general Spark data source table or a Spark view, which does not + * implement read/write directly, but relies on Spark to interpret the table metadata and + * resolve the table provider into a data source, or read it as a view. This affects the table + * read/write operations but not DDL operations. *

* If this table capacity is present, other read/write capacities are ignored as this table * instance is only used to store table metadata. */ - GENERAL_TABLE + SPARK_TABLE_OR_VIEW } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 3c01ba8b64053..4a41158c9455a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1121,7 +1121,7 @@ class Analyzer( ResolvedPersistentView( catalog, v2Ident, v1Table.catalogTable) case table => - if (table.capabilities().contains(TableCapability.GENERAL_TABLE)) { + if (table.capabilities().contains(TableCapability.SPARK_TABLE_OR_VIEW)) { val catalogTable = V1Table.toCatalogTable(catalog, ident, table) if (catalogTable.tableType == CatalogTableType.VIEW) { ResolvedPersistentView(catalog, ident, catalogTable) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index ddb818249a5ac..62713fb959b72 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -344,7 +344,7 @@ class RelationResolution( || !v1Table.catalogTable.tracksPartitionsInCatalog => createDataSourceV1Scan(v1Table.v1Table) - case t if t.capabilities().contains(TableCapability.GENERAL_TABLE) => + case t if t.capabilities().contains(TableCapability.SPARK_TABLE_OR_VIEW) => createDataSourceV1Scan(V1Table.toCatalogTable(catalog, ident, t)) case table => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index ed81db4ba34ed..4e237d3bf4747 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -70,7 +70,7 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table { override def name: String = v1Table.identifier.quoted override def capabilities: util.Set[TableCapability] = - util.EnumSet.of(TableCapability.GENERAL_TABLE) + util.EnumSet.of(TableCapability.SPARK_TABLE_OR_VIEW) override def toString: String = s"V1Table($name)" } @@ -114,7 +114,7 @@ private[sql] object V1Table { if (t.isInstanceOf[V1Table]) { return t.asInstanceOf[V1Table].v1Table } - assert(t.capabilities().contains(TableCapability.GENERAL_TABLE)) + assert(t.capabilities().contains(TableCapability.SPARK_TABLE_OR_VIEW)) val tableType = t.properties().get(TableCatalog.PROP_TABLE_TYPE) match { case TableSummary.VIEW_TABLE_TYPE => CatalogTableType.VIEW case TableSummary.MANAGED_TABLE_TYPE => CatalogTableType.MANAGED diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2GeneralTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2GeneralTableSuite.scala index be993a619da87..fa6468b281b04 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2GeneralTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2GeneralTableSuite.scala @@ -130,7 +130,7 @@ class TestingGeneralJsonTable(path: String) extends Table { override def schema(): StructType = new StructType().add("col", "string") override def capabilities(): util.Set[TableCapability] = - util.EnumSet.of(TableCapability.GENERAL_TABLE) + util.EnumSet.of(TableCapability.SPARK_TABLE_OR_VIEW) override def properties(): util.Map[String, String] = util.Map.ofEntries( util.Map.entry(TableCatalog.PROP_PROVIDER, "json"), @@ -144,7 +144,7 @@ class TestingGeneralPartitionedJsonTable(path: String) extends Table { override def schema(): StructType = new StructType().add("c1", "int").add("c2", "int") override def capabilities(): util.Set[TableCapability] = - util.EnumSet.of(TableCapability.GENERAL_TABLE) + util.EnumSet.of(TableCapability.SPARK_TABLE_OR_VIEW) override def partitioning(): Array[Transform] = Array(LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2")))) @@ -161,7 +161,7 @@ class TestingGeneralV2Table extends Table { override def schema(): StructType = FakeV2Provider.schema override def capabilities(): util.Set[TableCapability] = - util.EnumSet.of(TableCapability.GENERAL_TABLE) + util.EnumSet.of(TableCapability.SPARK_TABLE_OR_VIEW) override def properties(): util.Map[String, String] = util.Map.ofEntries( util.Map.entry(TableCatalog.PROP_PROVIDER, classOf[FakeV2Provider].getName) @@ -174,7 +174,7 @@ class TestingGeneralViewTable(ansi: Boolean) extends Table { override def schema(): StructType = new StructType().add("col", "string").add("i", "int") override def capabilities(): util.Set[TableCapability] = - util.EnumSet.of(TableCapability.GENERAL_TABLE) + util.EnumSet.of(TableCapability.SPARK_TABLE_OR_VIEW) override def properties(): util.Map[String, String] = util.Map.ofEntries( util.Map.entry(TableCatalog.PROP_TABLE_TYPE, TableSummary.VIEW_TABLE_TYPE), From 171a52a9053dd7573d26c0243aa287e6571296fb Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 30 Jul 2025 00:07:08 +0800 Subject: [PATCH 03/59] address comment --- .../catalog/DataSourceTableOrView.java | 227 ++++++++++++++++++ .../connector/catalog/TableCapability.java | 13 +- .../sql/catalyst/analysis/Analyzer.scala | 16 +- .../analysis/RelationResolution.scala | 4 +- .../spark/sql/connector/catalog/V1Table.scala | 37 ++- ...aSourceV2DataSourceTableOrViewSuite.scala} | 102 +++----- 6 files changed, 284 insertions(+), 115 deletions(-) create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DataSourceTableOrView.java rename sql/core/src/test/scala/org/apache/spark/sql/connector/{DataSourceV2GeneralTableSuite.scala => DataSourceV2DataSourceTableOrViewSuite.scala} (60%) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DataSourceTableOrView.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DataSourceTableOrView.java new file mode 100644 index 0000000000000..dfabb5ee1eb0c --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DataSourceTableOrView.java @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.catalog; + +import java.util.Map; +import java.util.Set; + +import org.apache.spark.sql.connector.expressions.Transform; +import org.apache.spark.sql.types.StructType; + +/** + * A concrete {@code Table} implementation that represents a general Spark data source table or + * a Spark view, which does not implement read/write directly, but relies on Spark to interpret + * the table metadata and resolve the table provider into a data source, or read it as a view. + * This affects the table read/write operations but not DDL operations. + */ +public class DataSourceTableOrView implements Table { + private final Builder builder; + + private DataSourceTableOrView(Builder builder) { + this.builder = builder; + } + + public static class Builder { + private final Column[] columns; + private String name = "data_source_table_or_view"; + private String provider = null; + private String location = null; + private String tableType = null; + private String owner = null; + private String comment = null; + private String collation = null; + private String viewText = null; + private String createVersion = ""; + private long createTime = 0; + private Map tableProps = Map.ofEntries(); + private Map serdeProps = Map.ofEntries(); + private Transform[] partitioning = new Transform[0]; + + public Builder(Column[] columns) { + assert columns != null; + this.columns = columns; + } + + public Builder(StructType schema) { + assert schema != null; + this.columns = CatalogV2Util.structTypeToV2Columns(schema); + } + + public Builder withName(String name) { + this.name = name; + return this; + } + + public Builder withProvider(String provider) { + this.provider = provider; + return this; + } + + public Builder withLocation(String location) { + this.location = location; + return this; + } + + public Builder withTableType(String tableType) { + this.tableType = tableType; + return this; + } + + public Builder withOwner(String owner) { + this.owner = owner; + return this; + } + + public Builder withComment(String comment) { + this.comment = comment; + return this; + } + + public Builder withCollation(String collation) { + this.collation = collation; + return this; + } + + public Builder withViewText(String viewText) { + this.viewText = viewText; + this.tableType = TableSummary.VIEW_TABLE_TYPE; + return this; + } + + public Builder withCreateVersion(String createVersion) { + this.createVersion = createVersion; + return this; + } + + public Builder withCreateTime(long createTime) { + this.createTime = createTime; + return this; + } + + public Builder withTableProps(Map tableProps) { + this.tableProps = tableProps; + return this; + } + + public Builder withSerdeProps(Map serdeProps) { + this.serdeProps = serdeProps; + return this; + } + + public Builder withPartitioning(Transform[] partitioning) { + this.partitioning = partitioning; + return this; + } + + public DataSourceTableOrView build() { + return new DataSourceTableOrView(this); + } + } + + public String getProvider() { + return builder.provider; + } + + public String getLocation() { + return builder.location; + } + + public String getTableType() { + return builder.tableType; + } + + public String getOwner() { + return builder.owner; + } + + public String getComment() { + return builder.comment; + } + + public String getCollation() { + return builder.collation; + } + + public String getViewText() { + return builder.viewText; + } + + public String getCreateVersion() { + return builder.createVersion; + } + + public long getCreateTime() { + return builder.createTime; + } + + public Map getTableProps() { + return builder.tableProps; + } + + public Map getSerdeProps() { + return builder.serdeProps; + } + + @Override + public Column[] columns() { + return builder.columns; + } + + @Override + public Map properties() { + Map props = new java.util.HashMap<>(builder.tableProps); + builder.serdeProps.forEach((k, v) -> props.put(TableCatalog.OPTION_PREFIX + k, v)); + if (getProvider() != null) { + props.put(TableCatalog.PROP_PROVIDER, getProvider()); + } + if (getLocation() != null) { + props.put(TableCatalog.PROP_LOCATION, getLocation()); + } + if (getTableType() != null) { + props.put(TableCatalog.PROP_TABLE_TYPE, getTableType()); + } + if (getOwner() != null) { + props.put(TableCatalog.PROP_OWNER, getOwner()); + } + if (getComment() != null) { + props.put(TableCatalog.PROP_COMMENT, getComment()); + } + if (getCollation() != null) { + props.put(TableCatalog.PROP_COLLATION, getCollation()); + } + if (getViewText() != null) { + props.put(TableCatalog.PROP_VIEW_TEXT, getViewText()); + } + return props; + } + + @Override + public Transform[] partitioning() { + return builder.partitioning; + } + + @Override + public String name() { + return builder.name; + } + + @Override + public Set capabilities() { + return Set.of(); + } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java index 64e82d1857de0..0a01c0c266b9a 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java @@ -106,16 +106,5 @@ public enum TableCapability { * write modes, like {@link #TRUNCATE}, and {@link #OVERWRITE_BY_FILTER}, but cannot support * {@link #OVERWRITE_DYNAMIC}. */ - V1_BATCH_WRITE, - - /** - * Signals that the table is a general Spark data source table or a Spark view, which does not - * implement read/write directly, but relies on Spark to interpret the table metadata and - * resolve the table provider into a data source, or read it as a view. This affects the table - * read/write operations but not DDL operations. - *

- * If this table capacity is present, other read/write capacities are ignored as this table - * instance is only used to store table metadata. - */ - SPARK_TABLE_OR_VIEW + V1_BATCH_WRITE } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 4a41158c9455a..dc165449b0b8a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1120,17 +1120,15 @@ class Analyzer( val v2Ident = Identifier.of(v1Ident.database.toArray, v1Ident.identifier) ResolvedPersistentView( catalog, v2Ident, v1Table.catalogTable) - case table => - if (table.capabilities().contains(TableCapability.SPARK_TABLE_OR_VIEW)) { - val catalogTable = V1Table.toCatalogTable(catalog, ident, table) - if (catalogTable.tableType == CatalogTableType.VIEW) { - ResolvedPersistentView(catalog, ident, catalogTable) - } else { - ResolvedTable.create(catalog.asTableCatalog, ident, table) - } + case t: DataSourceTableOrView => + val catalogTable = V1Table.toCatalogTable(catalog, ident, t) + if (catalogTable.tableType == CatalogTableType.VIEW) { + ResolvedPersistentView(catalog, ident, catalogTable) } else { - ResolvedTable.create(catalog.asTableCatalog, ident, table) + ResolvedTable.create(catalog.asTableCatalog, ident, t) } + case table => + ResolvedTable.create(catalog.asTableCatalog, ident, table) } case _ => None } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index 62713fb959b72..4360d63b52323 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -35,10 +35,10 @@ import org.apache.spark.sql.connector.catalog.{ CatalogPlugin, CatalogV2Util, ChangelogInfo, + DataSourceTableOrView, Identifier, LookupCatalog, Table, - TableCapability, V1Table, V2TableWithV1Fallback } @@ -344,7 +344,7 @@ class RelationResolution( || !v1Table.catalogTable.tracksPartitionsInCatalog => createDataSourceV1Scan(v1Table.v1Table) - case t if t.capabilities().contains(TableCapability.SPARK_TABLE_OR_VIEW) => + case t: DataSourceTableOrView => createDataSourceV1Scan(V1Table.toCatalogTable(catalog, ident, t)) case table => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index 4e237d3bf4747..91d628ba59d36 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -70,7 +70,7 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table { override def name: String = v1Table.identifier.quoted override def capabilities: util.Set[TableCapability] = - util.EnumSet.of(TableCapability.SPARK_TABLE_OR_VIEW) + util.EnumSet.noneOf(classOf[TableCapability]) override def toString: String = s"V1Table($name)" } @@ -110,20 +110,17 @@ private[sql] object V1Table { } } - def toCatalogTable(catalog: CatalogPlugin, ident: Identifier, t: Table): CatalogTable = { - if (t.isInstanceOf[V1Table]) { - return t.asInstanceOf[V1Table].v1Table - } - assert(t.capabilities().contains(TableCapability.SPARK_TABLE_OR_VIEW)) - val tableType = t.properties().get(TableCatalog.PROP_TABLE_TYPE) match { + def toCatalogTable( + catalog: CatalogPlugin, + ident: Identifier, + t: DataSourceTableOrView): CatalogTable = { + val tableType = t.getTableType() match { case TableSummary.VIEW_TABLE_TYPE => CatalogTableType.VIEW case TableSummary.MANAGED_TABLE_TYPE => CatalogTableType.MANAGED case _ => CatalogTableType.EXTERNAL } - val location = Option(t.properties().get(TableCatalog.PROP_LOCATION)) - val viewText = Option(t.properties().get(TableCatalog.PROP_VIEW_TEXT)) - val (serdeProps, tableProps) = t.properties().asScala - .partition(_._1.startsWith(TableCatalog.OPTION_PREFIX)) + val viewText = Option(t.getViewText()) + val tableProps = t.getTableProps().asScala.toMap val (partCols, bucketSpec, clusterBySpec) = t.partitioning().toSeq.convertTransforms CatalogTable( identifier = TableIdentifier( @@ -132,23 +129,23 @@ private[sql] object V1Table { catalog = Some(catalog.name())), tableType = tableType, storage = CatalogStorageFormat.empty.copy( - locationUri = location.map(CatalogUtils.stringToURI), + locationUri = Option(t.getLocation()).map(CatalogUtils.stringToURI), // v2 table properties should be put into the serde properties as well in case // it contains data source options. - properties = tableProps.toMap ++ serdeProps.map { - case (k, v) => k.drop(TableCatalog.OPTION_PREFIX.length) -> v - } + properties = tableProps ++ t.getSerdeProps().asScala ), schema = CatalogV2Util.v2ColumnsToStructType(t.columns()), - provider = Option(t.properties().get(TableCatalog.PROP_PROVIDER)), + provider = Option(t.getProvider), partitionColumnNames = partCols, bucketSpec = bucketSpec, - owner = Option(t.properties().get(TableCatalog.PROP_OWNER)).getOrElse("unknown"), + owner = Option(t.getOwner()).getOrElse("unknown"), + createTime = t.getCreateTime(), + createVersion = t.getCreateVersion(), viewText = viewText, viewOriginalText = viewText, - comment = Option(t.properties().get(TableCatalog.PROP_COMMENT)), - collation = Option(t.properties().get(TableCatalog.PROP_COLLATION)), - properties = tableProps.toMap ++ clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation) + comment = Option(t.getComment()), + collation = Option(t.getCollation()), + properties = tableProps ++ clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation) ) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2GeneralTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataSourceTableOrViewSuite.scala similarity index 60% rename from sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2GeneralTableSuite.scala rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataSourceTableOrViewSuite.scala index fa6468b281b04..34dc11d0532fe 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2GeneralTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataSourceTableOrViewSuite.scala @@ -17,19 +17,17 @@ package org.apache.spark.sql.connector -import java.util - import org.apache.spark.SparkConf import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.NoSuchTableException -import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCapability, TableCatalog, TableChange, TableSummary} -import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform} +import org.apache.spark.sql.connector.catalog.{DataSourceTableOrView, Identifier, Table, TableCatalog, TableChange, TableSummary} +import org.apache.spark.sql.connector.expressions.LogicalExpressions import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap -class DataSourceV2GeneralTableSuite extends QueryTest with SharedSparkSession { +class DataSourceV2DataSourceTableOrViewSuite extends QueryTest with SharedSparkSession { import testImplicits._ override def sparkConf: SparkConf = super.sparkConf @@ -96,10 +94,33 @@ class TestingGeneralCatalog extends TableCatalog { override def loadTable(ident: Identifier): Table = { ident.name() match { - case "test_json" => new TestingGeneralJsonTable(ident.namespace().head) - case "test_partitioned_json" => new TestingGeneralPartitionedJsonTable(ident.namespace().head) - case "test_v2" => new TestingGeneralV2Table - case "test_view" => new TestingGeneralViewTable(ident.namespace().head == "ansi") + case "test_json" => + new DataSourceTableOrView.Builder(new StructType().add("col", "string")) + .withProvider("json") + .withLocation(ident.namespace().head) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build() + case "test_partitioned_json" => + val partitioning = LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2"))) + new DataSourceTableOrView.Builder(new StructType().add("c1", "int").add("c2", "int")) + .withProvider("json") + .withLocation(ident.namespace().head) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .withPartitioning(Array(partitioning)) + .build() + case "test_v2" => + new DataSourceTableOrView.Builder(FakeV2Provider.schema) + .withProvider(classOf[FakeV2Provider].getName) + .build() + case "test_view" => + val viewProps = java.util.Map.of( + TableCatalog.VIEW_CONF_PREFIX + SQLConf.ANSI_ENABLED.key, + (ident.namespace().head == "ansi").toString + ) + new DataSourceTableOrView.Builder(new StructType().add("col", "string").add("i", "int")) + .withViewText("SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") + .withTableProps(viewProps) + .build() case _ => throw new NoSuchTableException(ident) } } @@ -123,66 +144,3 @@ class TestingGeneralCatalog extends TableCatalog { } override def name(): String = catalogName } - -class TestingGeneralJsonTable(path: String) extends Table { - override def name(): String = "test_json" - - override def schema(): StructType = new StructType().add("col", "string") - - override def capabilities(): util.Set[TableCapability] = - util.EnumSet.of(TableCapability.SPARK_TABLE_OR_VIEW) - - override def properties(): util.Map[String, String] = util.Map.ofEntries( - util.Map.entry(TableCatalog.PROP_PROVIDER, "json"), - util.Map.entry(TableCatalog.PROP_LOCATION, path) - ); -} - -class TestingGeneralPartitionedJsonTable(path: String) extends Table { - override def name(): String = "test_partitioned_json" - - override def schema(): StructType = new StructType().add("c1", "int").add("c2", "int") - - override def capabilities(): util.Set[TableCapability] = - util.EnumSet.of(TableCapability.SPARK_TABLE_OR_VIEW) - - override def partitioning(): Array[Transform] = - Array(LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2")))) - - override def properties(): util.Map[String, String] = util.Map.ofEntries( - util.Map.entry(TableCatalog.PROP_PROVIDER, "json"), - util.Map.entry(TableCatalog.PROP_LOCATION, path) - ); -} - -class TestingGeneralV2Table extends Table { - override def name(): String = "test_v2" - - override def schema(): StructType = FakeV2Provider.schema - - override def capabilities(): util.Set[TableCapability] = - util.EnumSet.of(TableCapability.SPARK_TABLE_OR_VIEW) - - override def properties(): util.Map[String, String] = util.Map.ofEntries( - util.Map.entry(TableCatalog.PROP_PROVIDER, classOf[FakeV2Provider].getName) - ); -} - -class TestingGeneralViewTable(ansi: Boolean) extends Table { - override def name(): String = "test_view" - - override def schema(): StructType = new StructType().add("col", "string").add("i", "int") - - override def capabilities(): util.Set[TableCapability] = - util.EnumSet.of(TableCapability.SPARK_TABLE_OR_VIEW) - - override def properties(): util.Map[String, String] = util.Map.ofEntries( - util.Map.entry(TableCatalog.PROP_TABLE_TYPE, TableSummary.VIEW_TABLE_TYPE), - util.Map.entry( - TableCatalog.PROP_VIEW_TEXT, - "SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'"), - util.Map.entry( - TableCatalog.VIEW_CONF_PREFIX + SQLConf.ANSI_ENABLED.key, - ansi.toString) - ); -} From 647ec773e496069542e6eff439eb3be115033811 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 31 Jul 2025 02:19:17 +0800 Subject: [PATCH 04/59] rename --- ...ableOrView.java => MetadataOnlyTable.java} | 19 ++++++++++++------- .../sql/catalyst/analysis/Analyzer.scala | 2 +- .../analysis/RelationResolution.scala | 4 ++-- .../spark/sql/connector/catalog/V1Table.scala | 2 +- ... DataSourceV2MetadataOnlyTableSuite.scala} | 12 ++++++------ 5 files changed, 22 insertions(+), 17 deletions(-) rename sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/{DataSourceTableOrView.java => MetadataOnlyTable.java} (90%) rename sql/core/src/test/scala/org/apache/spark/sql/connector/{DataSourceV2DataSourceTableOrViewSuite.scala => DataSourceV2MetadataOnlyTableSuite.scala} (90%) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DataSourceTableOrView.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java similarity index 90% rename from sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DataSourceTableOrView.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java index dfabb5ee1eb0c..368fd3191682f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DataSourceTableOrView.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -20,19 +20,24 @@ import java.util.Map; import java.util.Set; +import org.apache.spark.annotation.Evolving; import org.apache.spark.sql.connector.expressions.Transform; import org.apache.spark.sql.types.StructType; /** - * A concrete {@code Table} implementation that represents a general Spark data source table or - * a Spark view, which does not implement read/write directly, but relies on Spark to interpret - * the table metadata and resolve the table provider into a data source, or read it as a view. + * A concrete {@code Table} implementation that only contains the table metadata without + * implementing read/write directly. It represents a general Spark data source table or + * a Spark view, and relies on Spark to interpret the table metadata, resolve the table + * provider into a data source, or read it as a view. * This affects the table read/write operations but not DDL operations. + * + * @since 4.1.0 */ -public class DataSourceTableOrView implements Table { +@Evolving +public class MetadataOnlyTable implements Table { private final Builder builder; - private DataSourceTableOrView(Builder builder) { + private MetadataOnlyTable(Builder builder) { this.builder = builder; } @@ -128,8 +133,8 @@ public Builder withPartitioning(Transform[] partitioning) { return this; } - public DataSourceTableOrView build() { - return new DataSourceTableOrView(this); + public MetadataOnlyTable build() { + return new MetadataOnlyTable(this); } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index dc165449b0b8a..1d031d58e2ae4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1120,7 +1120,7 @@ class Analyzer( val v2Ident = Identifier.of(v1Ident.database.toArray, v1Ident.identifier) ResolvedPersistentView( catalog, v2Ident, v1Table.catalogTable) - case t: DataSourceTableOrView => + case t: MetadataOnlyTable => val catalogTable = V1Table.toCatalogTable(catalog, ident, t) if (catalogTable.tableType == CatalogTableType.VIEW) { ResolvedPersistentView(catalog, ident, catalogTable) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index 4360d63b52323..357b3571d0acb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -35,9 +35,9 @@ import org.apache.spark.sql.connector.catalog.{ CatalogPlugin, CatalogV2Util, ChangelogInfo, - DataSourceTableOrView, Identifier, LookupCatalog, + MetadataOnlyTable, Table, V1Table, V2TableWithV1Fallback @@ -344,7 +344,7 @@ class RelationResolution( || !v1Table.catalogTable.tracksPartitionsInCatalog => createDataSourceV1Scan(v1Table.v1Table) - case t: DataSourceTableOrView => + case t: MetadataOnlyTable => createDataSourceV1Scan(V1Table.toCatalogTable(catalog, ident, t)) case table => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index 91d628ba59d36..22742d387c6b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -113,7 +113,7 @@ private[sql] object V1Table { def toCatalogTable( catalog: CatalogPlugin, ident: Identifier, - t: DataSourceTableOrView): CatalogTable = { + t: MetadataOnlyTable): CatalogTable = { val tableType = t.getTableType() match { case TableSummary.VIEW_TABLE_TYPE => CatalogTableType.VIEW case TableSummary.MANAGED_TABLE_TYPE => CatalogTableType.MANAGED diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataSourceTableOrViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala similarity index 90% rename from sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataSourceTableOrViewSuite.scala rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index 34dc11d0532fe..16b140f331c53 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataSourceTableOrViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -20,14 +20,14 @@ package org.apache.spark.sql.connector import org.apache.spark.SparkConf import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.NoSuchTableException -import org.apache.spark.sql.connector.catalog.{DataSourceTableOrView, Identifier, Table, TableCatalog, TableChange, TableSummary} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, Table, TableCatalog, TableChange, TableSummary} import org.apache.spark.sql.connector.expressions.LogicalExpressions import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap -class DataSourceV2DataSourceTableOrViewSuite extends QueryTest with SharedSparkSession { +class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSession { import testImplicits._ override def sparkConf: SparkConf = super.sparkConf @@ -95,21 +95,21 @@ class TestingGeneralCatalog extends TableCatalog { override def loadTable(ident: Identifier): Table = { ident.name() match { case "test_json" => - new DataSourceTableOrView.Builder(new StructType().add("col", "string")) + new MetadataOnlyTable.Builder(new StructType().add("col", "string")) .withProvider("json") .withLocation(ident.namespace().head) .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) .build() case "test_partitioned_json" => val partitioning = LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2"))) - new DataSourceTableOrView.Builder(new StructType().add("c1", "int").add("c2", "int")) + new MetadataOnlyTable.Builder(new StructType().add("c1", "int").add("c2", "int")) .withProvider("json") .withLocation(ident.namespace().head) .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) .withPartitioning(Array(partitioning)) .build() case "test_v2" => - new DataSourceTableOrView.Builder(FakeV2Provider.schema) + new MetadataOnlyTable.Builder(FakeV2Provider.schema) .withProvider(classOf[FakeV2Provider].getName) .build() case "test_view" => @@ -117,7 +117,7 @@ class TestingGeneralCatalog extends TableCatalog { TableCatalog.VIEW_CONF_PREFIX + SQLConf.ANSI_ENABLED.key, (ident.namespace().head == "ansi").toString ) - new DataSourceTableOrView.Builder(new StructType().add("col", "string").add("i", "int")) + new MetadataOnlyTable.Builder(new StructType().add("col", "string").add("i", "int")) .withViewText("SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") .withTableProps(viewProps) .build() From 3ab45a67e4ba2ad85d30aa7c930b2c7dfe2a2d94 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 31 Jul 2025 21:25:43 +0800 Subject: [PATCH 05/59] Apply suggestions from code review --- .../apache/spark/sql/connector/catalog/MetadataOnlyTable.java | 1 - .../sql/connector/DataSourceV2MetadataOnlyTableSuite.scala | 2 -- 2 files changed, 3 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java index 368fd3191682f..2a545c9e77e5a 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -29,7 +29,6 @@ * implementing read/write directly. It represents a general Spark data source table or * a Spark view, and relies on Spark to interpret the table metadata, resolve the table * provider into a data source, or read it as a view. - * This affects the table read/write operations but not DDL operations. * * @since 4.1.0 */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index 16b140f331c53..5696b55faac3e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -74,8 +74,6 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi // TODO: move the v2 data source table handling from V2SessionCatalog to the analyzer ignore("v2 data source table") { val tableName = "general_catalog.default.test_v2" - spark.table(tableName).show() - spark.table(tableName).explain(true) checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i, -i))) } From 31a8d4496e4ad8082e47d2d6723d0df212013cf8 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 1 Aug 2025 22:21:02 +0800 Subject: [PATCH 06/59] address comment --- .../sql/connector/catalog/MetadataOnlyTable.java | 11 ----------- .../apache/spark/sql/connector/catalog/V1Table.scala | 10 +++++++--- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java index 2a545c9e77e5a..cba749bdb08ca 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -53,7 +53,6 @@ public static class Builder { private String createVersion = ""; private long createTime = 0; private Map tableProps = Map.ofEntries(); - private Map serdeProps = Map.ofEntries(); private Transform[] partitioning = new Transform[0]; public Builder(Column[] columns) { @@ -122,11 +121,6 @@ public Builder withTableProps(Map tableProps) { return this; } - public Builder withSerdeProps(Map serdeProps) { - this.serdeProps = serdeProps; - return this; - } - public Builder withPartitioning(Transform[] partitioning) { this.partitioning = partitioning; return this; @@ -177,10 +171,6 @@ public Map getTableProps() { return builder.tableProps; } - public Map getSerdeProps() { - return builder.serdeProps; - } - @Override public Column[] columns() { return builder.columns; @@ -189,7 +179,6 @@ public Column[] columns() { @Override public Map properties() { Map props = new java.util.HashMap<>(builder.tableProps); - builder.serdeProps.forEach((k, v) -> props.put(TableCatalog.OPTION_PREFIX + k, v)); if (getProvider() != null) { props.put(TableCatalog.PROP_PROVIDER, getProvider()); } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index 22742d387c6b5..38404200c71e5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -120,7 +120,9 @@ private[sql] object V1Table { case _ => CatalogTableType.EXTERNAL } val viewText = Option(t.getViewText()) - val tableProps = t.getTableProps().asScala.toMap + val (serdeProps, tableProps) = t.getTableProps().asScala.toSeq + .partition(_._1.startsWith(TableCatalog.OPTION_PREFIX)) + val tablePropsMap = tableProps.toMap val (partCols, bucketSpec, clusterBySpec) = t.partitioning().toSeq.convertTransforms CatalogTable( identifier = TableIdentifier( @@ -132,7 +134,9 @@ private[sql] object V1Table { locationUri = Option(t.getLocation()).map(CatalogUtils.stringToURI), // v2 table properties should be put into the serde properties as well in case // it contains data source options. - properties = tableProps ++ t.getSerdeProps().asScala + properties = tablePropsMap ++ serdeProps.map { + case (k, v) => k.drop(TableCatalog.OPTION_PREFIX.length) -> v + } ), schema = CatalogV2Util.v2ColumnsToStructType(t.columns()), provider = Option(t.getProvider), @@ -145,7 +149,7 @@ private[sql] object V1Table { viewOriginalText = viewText, comment = Option(t.getComment()), collation = Option(t.getCollation()), - properties = tableProps ++ clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation) + properties = tablePropsMap ++ clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation) ) } } From 807e61610191227a726755dd4638dbe84c68ce3b Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 03:07:01 +0000 Subject: [PATCH 07/59] add view.currentCatalog and view.currentNamespace --- .../connector/catalog/MetadataOnlyTable.java | 28 +++++++++++++ .../sql/connector/catalog/TableCatalog.java | 20 ++++++++- .../spark/sql/connector/catalog/V1Table.scala | 20 ++++++++- .../DataSourceV2MetadataOnlyTableSuite.scala | 41 +++++++++++++++++++ 4 files changed, 106 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java index cba749bdb08ca..f19fcfa5bcefa 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -21,6 +21,7 @@ import java.util.Set; import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.catalyst.util.QuotingUtils; import org.apache.spark.sql.connector.expressions.Transform; import org.apache.spark.sql.types.StructType; @@ -50,6 +51,8 @@ public static class Builder { private String comment = null; private String collation = null; private String viewText = null; + private String currentCatalog = null; + private String[] currentNamespace = null; private String createVersion = ""; private long createTime = 0; private Map tableProps = Map.ofEntries(); @@ -106,6 +109,16 @@ public Builder withViewText(String viewText) { return this; } + public Builder withCurrentCatalog(String currentCatalog) { + this.currentCatalog = currentCatalog; + return this; + } + + public Builder withCurrentNamespace(String[] currentNamespace) { + this.currentNamespace = currentNamespace; + return this; + } + public Builder withCreateVersion(String createVersion) { this.createVersion = createVersion; return this; @@ -159,6 +172,14 @@ public String getViewText() { return builder.viewText; } + public String getCurrentCatalog() { + return builder.currentCatalog; + } + + public String[] getCurrentNamespace() { + return builder.currentNamespace; + } + public String getCreateVersion() { return builder.createVersion; } @@ -200,6 +221,13 @@ public Map properties() { if (getViewText() != null) { props.put(TableCatalog.PROP_VIEW_TEXT, getViewText()); } + if (getCurrentCatalog() != null) { + props.put(TableCatalog.PROP_VIEW_CURRENT_CATALOG, getCurrentCatalog()); + } + if (getCurrentNamespace() != null && getCurrentNamespace().length > 0) { + props.put(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE, + QuotingUtils.quoted(getCurrentNamespace())); + } return props; } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index fa18543194063..a9ceccb651d30 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -89,11 +89,27 @@ public interface TableCatalog extends CatalogPlugin { /** * A reserved property to specify the view text of a general table that represents - * a SQL view. The identifiers must be fully qualified in the view text to be - * context-independent, otherwise the behavior is undefined. + * a SQL view. Unqualified identifiers in the view text are resolved against + * {@link #PROP_VIEW_CURRENT_CATALOG} and {@link #PROP_VIEW_CURRENT_NAMESPACE} at read time. */ String PROP_VIEW_TEXT = "view_text"; + /** + * A reserved property to specify the current catalog at the time the view was created. + * Unqualified identifiers in the view text are resolved relative to this catalog and + * {@link #PROP_VIEW_CURRENT_NAMESPACE}. + */ + String PROP_VIEW_CURRENT_CATALOG = "view.currentCatalog"; + + /** + * A reserved property to specify the current namespace at the time the view was created. + * The value is a Spark multi-part identifier string (backtick-quoted parts joined with + * {@code "."}, e.g. {@code `db1`.`db2`}) and is parsed with + * {@code ParserInterface.parseMultipartIdentifier}. An absent or empty value means the + * view was created with no current namespace. + */ + String PROP_VIEW_CURRENT_NAMESPACE = "view.currentNamespace"; + /** * A prefix used to specify the Spark SQL configurations for reading this view. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index 38404200c71e5..017c31688f94c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -124,6 +124,22 @@ private[sql] object V1Table { .partition(_._1.startsWith(TableCatalog.OPTION_PREFIX)) val tablePropsMap = tableProps.toMap val (partCols, bucketSpec, clusterBySpec) = t.partitioning().toSeq.convertTransforms + // For views, translate the V2 view context (currentCatalog / currentNamespace) into V1's + // viewCatalogAndNamespace properties so the V1 view resolution path can expand unqualified + // identifiers in the view text. + val viewContextProps = if (tableType == CatalogTableType.VIEW) { + val currentCatalog = Option(t.getCurrentCatalog()) + val currentNamespace = Option(t.getCurrentNamespace()).map(_.toSeq).getOrElse(Seq.empty) + if (currentCatalog.isDefined || currentNamespace.nonEmpty) { + CatalogTable.catalogAndNamespaceToProps( + currentCatalog.getOrElse(catalog.name()), + currentNamespace) + } else { + Map.empty[String, String] + } + } else { + Map.empty[String, String] + } CatalogTable( identifier = TableIdentifier( table = ident.name(), @@ -149,7 +165,9 @@ private[sql] object V1Table { viewOriginalText = viewText, comment = Option(t.getComment()), collation = Option(t.getCollation()), - properties = tablePropsMap ++ clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation) + properties = tablePropsMap ++ + clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation) ++ + viewContextProps ) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index 5696b55faac3e..10ccfc8b0cb5f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -86,6 +86,41 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi checkAnswer(spark.table("general_catalog.non_ansi.test_view"), Row("b", null)) } } + + test("general table as view with stored current catalog/namespace") { + withTable("spark_catalog.default.t") { + Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") + // View text uses the unqualified name `t`; it resolves via the stored + // current catalog / namespace properties. + checkAnswer(spark.table("general_catalog.ns.test_unqualified_view"), Row("b")) + } + } + + test("view current catalog/namespace are serialized into table properties") { + val table = new MetadataOnlyTable.Builder(new StructType().add("col", "string")) + .withViewText("SELECT * FROM t") + .withCurrentCatalog("spark_catalog") + .withCurrentNamespace(Array("default")) + .build() + assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG) == "spark_catalog") + assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE) == "default") + } + + test("view current namespace quotes multi-part names with dots") { + val table = new MetadataOnlyTable.Builder(new StructType().add("col", "string")) + .withViewText("SELECT * FROM t") + .withCurrentNamespace(Array("weird.db", "normal")) + .build() + assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE) == "`weird.db`.normal") + } + + test("view with no current catalog/namespace omits the properties") { + val table = new MetadataOnlyTable.Builder(new StructType().add("col", "string")) + .withViewText("SELECT * FROM spark_catalog.default.t") + .build() + assert(!table.properties().containsKey(TableCatalog.PROP_VIEW_CURRENT_CATALOG)) + assert(!table.properties().containsKey(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE)) + } } class TestingGeneralCatalog extends TableCatalog { @@ -119,6 +154,12 @@ class TestingGeneralCatalog extends TableCatalog { .withViewText("SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") .withTableProps(viewProps) .build() + case "test_unqualified_view" => + new MetadataOnlyTable.Builder(new StructType().add("col", "string")) + .withViewText("SELECT col FROM t WHERE col = 'b'") + .withCurrentCatalog("spark_catalog") + .withCurrentNamespace(Array("default")) + .build() case _ => throw new NoSuchTableException(ident) } } From 674d607ccdb4687150b762ce4e128c651d724fe7 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 03:24:05 +0000 Subject: [PATCH 08/59] clarify PROP_VIEW_CURRENT_NAMESPACE encoding format The Javadoc claimed parts are "backtick-quoted", but QuotingUtils.quoted uses quoteIfNeeded so simple identifiers are not quoted. Document the actual quoteIfNeeded semantics with a correct example. --- .../apache/spark/sql/connector/catalog/TableCatalog.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index a9ceccb651d30..f9bce48545b99 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -103,10 +103,11 @@ public interface TableCatalog extends CatalogPlugin { /** * A reserved property to specify the current namespace at the time the view was created. - * The value is a Spark multi-part identifier string (backtick-quoted parts joined with - * {@code "."}, e.g. {@code `db1`.`db2`}) and is parsed with - * {@code ParserInterface.parseMultipartIdentifier}. An absent or empty value means the - * view was created with no current namespace. + * The value is a Spark multi-part identifier string: parts are joined with {@code "."}, + * and a part is backtick-quoted only when it is not a simple identifier (e.g. when it + * contains a {@code "."}). For example, {@code db1.db2} or {@code `weird.db`.normal}. + * The value is parsed with {@code ParserInterface.parseMultipartIdentifier}. An absent + * or empty value means the view was created with no current namespace. */ String PROP_VIEW_CURRENT_NAMESPACE = "view.currentNamespace"; From 6193316a95c608d9fefdb97c56aeb35214835cdc Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 05:06:22 +0000 Subject: [PATCH 09/59] unify MetadataOnlyTable with TableInfo; add SUPPORTS_CREATE_VIEW capability - Extend TableInfo.Builder with convenience setters for metadata-only reserved keys (withProvider, withLocation, withComment, withCollation, withOwner, withTableType, withViewText, withCurrentCatalog, withCurrentNamespace) plus withSchema(StructType). Setters write into the internal properties map. withProperties now takes a defensive copy so later convenience-setter puts don't mutate the caller's map. - Collapse MetadataOnlyTable to a thin Table adapter over TableInfo: delete the nested Builder, all 12 explicit getters, and the hand-rolled properties() serialization. One ctor taking TableInfo (plus optional name), everything else delegates. - Add TableCatalogCapability.SUPPORTS_CREATE_VIEW, gating CREATE VIEW on catalogs that have opted in to round-tripping PROP_VIEW_TEXT as MetadataOnlyTable. Existing TableCatalog impls won't get CREATE VIEW calls silently and will fail up front rather than at SELECT time. - Rewrite V1Table.toCatalogTable to read from TableInfo.properties(), parsing PROP_VIEW_CURRENT_NAMESPACE via CatalystSqlParser. Reserved keys are stripped so they're not double-persisted in CatalogTable. - Migrate DataSourceV2MetadataOnlyTableSuite (both the assertions and TestingGeneralCatalog) to the new TableInfo-based API. --- .../connector/catalog/MetadataOnlyTable.java | 204 ++---------------- .../catalog/TableCatalogCapability.java | 17 +- .../sql/connector/catalog/TableInfo.java | 70 +++++- .../spark/sql/connector/catalog/V1Table.scala | 51 +++-- .../DataSourceV2MetadataOnlyTableSuite.scala | 44 ++-- 5 files changed, 170 insertions(+), 216 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java index f19fcfa5bcefa..5ba68feaba538 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -21,224 +21,60 @@ import java.util.Set; import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.catalyst.util.QuotingUtils; import org.apache.spark.sql.connector.expressions.Transform; -import org.apache.spark.sql.types.StructType; /** * A concrete {@code Table} implementation that only contains the table metadata without * implementing read/write directly. It represents a general Spark data source table or * a Spark view, and relies on Spark to interpret the table metadata, resolve the table * provider into a data source, or read it as a view. + *

+ * Catalogs build the metadata via {@link TableInfo.Builder} (which provides convenience + * setters for reserved properties such as {@link TableCatalog#PROP_PROVIDER}, + * {@link TableCatalog#PROP_LOCATION}, {@link TableCatalog#PROP_VIEW_TEXT}, etc.) and wrap + * the resulting {@link TableInfo} in a {@code MetadataOnlyTable} to return from + * {@link TableCatalog#loadTable(Identifier)}. * * @since 4.1.0 */ @Evolving public class MetadataOnlyTable implements Table { - private final Builder builder; + private static final String DEFAULT_NAME = "data_source_table_or_view"; - private MetadataOnlyTable(Builder builder) { - this.builder = builder; - } - - public static class Builder { - private final Column[] columns; - private String name = "data_source_table_or_view"; - private String provider = null; - private String location = null; - private String tableType = null; - private String owner = null; - private String comment = null; - private String collation = null; - private String viewText = null; - private String currentCatalog = null; - private String[] currentNamespace = null; - private String createVersion = ""; - private long createTime = 0; - private Map tableProps = Map.ofEntries(); - private Transform[] partitioning = new Transform[0]; - - public Builder(Column[] columns) { - assert columns != null; - this.columns = columns; - } - - public Builder(StructType schema) { - assert schema != null; - this.columns = CatalogV2Util.structTypeToV2Columns(schema); - } - - public Builder withName(String name) { - this.name = name; - return this; - } - - public Builder withProvider(String provider) { - this.provider = provider; - return this; - } - - public Builder withLocation(String location) { - this.location = location; - return this; - } - - public Builder withTableType(String tableType) { - this.tableType = tableType; - return this; - } - - public Builder withOwner(String owner) { - this.owner = owner; - return this; - } - - public Builder withComment(String comment) { - this.comment = comment; - return this; - } - - public Builder withCollation(String collation) { - this.collation = collation; - return this; - } - - public Builder withViewText(String viewText) { - this.viewText = viewText; - this.tableType = TableSummary.VIEW_TABLE_TYPE; - return this; - } - - public Builder withCurrentCatalog(String currentCatalog) { - this.currentCatalog = currentCatalog; - return this; - } - - public Builder withCurrentNamespace(String[] currentNamespace) { - this.currentNamespace = currentNamespace; - return this; - } - - public Builder withCreateVersion(String createVersion) { - this.createVersion = createVersion; - return this; - } - - public Builder withCreateTime(long createTime) { - this.createTime = createTime; - return this; - } - - public Builder withTableProps(Map tableProps) { - this.tableProps = tableProps; - return this; - } - - public Builder withPartitioning(Transform[] partitioning) { - this.partitioning = partitioning; - return this; - } - - public MetadataOnlyTable build() { - return new MetadataOnlyTable(this); - } - } - - public String getProvider() { - return builder.provider; - } - - public String getLocation() { - return builder.location; - } - - public String getTableType() { - return builder.tableType; - } - - public String getOwner() { - return builder.owner; - } - - public String getComment() { - return builder.comment; - } - - public String getCollation() { - return builder.collation; - } - - public String getViewText() { - return builder.viewText; - } - - public String getCurrentCatalog() { - return builder.currentCatalog; - } - - public String[] getCurrentNamespace() { - return builder.currentNamespace; - } + private final TableInfo info; + private final String name; - public String getCreateVersion() { - return builder.createVersion; + public MetadataOnlyTable(TableInfo info) { + this(info, DEFAULT_NAME); } - public long getCreateTime() { - return builder.createTime; + public MetadataOnlyTable(TableInfo info, String name) { + this.info = info; + this.name = name; } - public Map getTableProps() { - return builder.tableProps; + public TableInfo getTableInfo() { + return info; } @Override public Column[] columns() { - return builder.columns; + return info.columns(); } @Override public Map properties() { - Map props = new java.util.HashMap<>(builder.tableProps); - if (getProvider() != null) { - props.put(TableCatalog.PROP_PROVIDER, getProvider()); - } - if (getLocation() != null) { - props.put(TableCatalog.PROP_LOCATION, getLocation()); - } - if (getTableType() != null) { - props.put(TableCatalog.PROP_TABLE_TYPE, getTableType()); - } - if (getOwner() != null) { - props.put(TableCatalog.PROP_OWNER, getOwner()); - } - if (getComment() != null) { - props.put(TableCatalog.PROP_COMMENT, getComment()); - } - if (getCollation() != null) { - props.put(TableCatalog.PROP_COLLATION, getCollation()); - } - if (getViewText() != null) { - props.put(TableCatalog.PROP_VIEW_TEXT, getViewText()); - } - if (getCurrentCatalog() != null) { - props.put(TableCatalog.PROP_VIEW_CURRENT_CATALOG, getCurrentCatalog()); - } - if (getCurrentNamespace() != null && getCurrentNamespace().length > 0) { - props.put(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE, - QuotingUtils.quoted(getCurrentNamespace())); - } - return props; + return info.properties(); } @Override public Transform[] partitioning() { - return builder.partitioning; + return info.partitions(); } @Override public String name() { - return builder.name; + return name; } @Override diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java index a60c827d5ace1..63a726ff8a8c3 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java @@ -92,5 +92,20 @@ public enum TableCatalogCapability { * {@link TableCatalog#createTable}. * See {@link Column#identityColumnSpec()}. */ - SUPPORTS_CREATE_TABLE_WITH_IDENTITY_COLUMNS + SUPPORTS_CREATE_TABLE_WITH_IDENTITY_COLUMNS, + + /** + * Signals that the TableCatalog supports creating views via {@link TableCatalog#createTable} + * by accepting a {@link TableInfo} whose properties include {@link TableCatalog#PROP_VIEW_TEXT} + * (and related view keys: {@link TableCatalog#PROP_VIEW_CURRENT_CATALOG}, + * {@link TableCatalog#PROP_VIEW_CURRENT_NAMESPACE}, and + * {@link TableCatalog#VIEW_CONF_PREFIX}-prefixed SQL configs). + *

+ * Catalogs declaring this capability must round-trip those properties and return a + * {@link MetadataOnlyTable} from {@link TableCatalog#loadTable} so Spark's view resolution + * path can expand the view text. Without this capability, Spark rejects {@code CREATE VIEW} + * statements targeting the catalog up front rather than letting the catalog silently persist + * a table entry that cannot be read as a view. + */ + SUPPORTS_CREATE_VIEW } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java index 9870a3b0fa45d..74e460d9879af 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java @@ -20,6 +20,7 @@ import java.util.Map; import java.util.Objects; +import org.apache.spark.sql.catalyst.util.QuotingUtils; import org.apache.spark.sql.connector.catalog.constraints.Constraint; import org.apache.spark.sql.connector.expressions.Transform; import org.apache.spark.sql.types.StructType; @@ -71,8 +72,13 @@ public Builder withColumns(Column[] columns) { return this; } + public Builder withSchema(StructType schema) { + this.columns = CatalogV2Util.structTypeToV2Columns(schema); + return this; + } + public Builder withProperties(Map properties) { - this.properties = properties; + this.properties = new HashMap<>(properties); return this; } @@ -86,6 +92,68 @@ public Builder withConstraints(Constraint[] constraints) { return this; } + // Convenience setters that write reserved keys into `properties`. These mutate the current + // properties map, so call them after any `withProperties(...)` that replaces the map. + + public Builder withProvider(String provider) { + properties.put(TableCatalog.PROP_PROVIDER, provider); + return this; + } + + public Builder withLocation(String location) { + properties.put(TableCatalog.PROP_LOCATION, location); + return this; + } + + public Builder withComment(String comment) { + properties.put(TableCatalog.PROP_COMMENT, comment); + return this; + } + + public Builder withCollation(String collation) { + properties.put(TableCatalog.PROP_COLLATION, collation); + return this; + } + + public Builder withOwner(String owner) { + properties.put(TableCatalog.PROP_OWNER, owner); + return this; + } + + public Builder withTableType(String tableType) { + properties.put(TableCatalog.PROP_TABLE_TYPE, tableType); + return this; + } + + /** + * Sets the view SQL text and marks this TableInfo as a view by setting + * {@link TableCatalog#PROP_TABLE_TYPE} to {@link TableSummary#VIEW_TABLE_TYPE}. + */ + public Builder withViewText(String viewText) { + properties.put(TableCatalog.PROP_VIEW_TEXT, viewText); + properties.put(TableCatalog.PROP_TABLE_TYPE, TableSummary.VIEW_TABLE_TYPE); + return this; + } + + public Builder withCurrentCatalog(String currentCatalog) { + properties.put(TableCatalog.PROP_VIEW_CURRENT_CATALOG, currentCatalog); + return this; + } + + /** + * Sets the current namespace of a view, encoded as a quoted multi-part identifier string + * (see {@link TableCatalog#PROP_VIEW_CURRENT_NAMESPACE}). An empty array clears the property. + */ + public Builder withCurrentNamespace(String[] currentNamespace) { + if (currentNamespace != null && currentNamespace.length > 0) { + properties.put(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE, + QuotingUtils.quoted(currentNamespace)); + } else { + properties.remove(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE); + } + return this; + } + public TableInfo build() { Objects.requireNonNull(columns, "columns should not be null"); return new TableInfo(this); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index 017c31688f94c..a89b437c4abcc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -24,6 +24,7 @@ import scala.jdk.CollectionConverters._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils, ClusterBySpec} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.V1Table.addV2TableProperties import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform} @@ -110,26 +111,46 @@ private[sql] object V1Table { } } + // Reserved keys that are promoted to first-class fields on CatalogTable and must be stripped + // from the user-visible properties map so they're not double-persisted. + private val METADATA_ONLY_RESERVED_KEYS = Set( + TableCatalog.PROP_PROVIDER, + TableCatalog.PROP_LOCATION, + TableCatalog.PROP_TABLE_TYPE, + TableCatalog.PROP_OWNER, + TableCatalog.PROP_COMMENT, + TableCatalog.PROP_COLLATION, + TableCatalog.PROP_VIEW_TEXT, + TableCatalog.PROP_VIEW_CURRENT_CATALOG, + TableCatalog.PROP_VIEW_CURRENT_NAMESPACE + ) + def toCatalogTable( catalog: CatalogPlugin, ident: Identifier, t: MetadataOnlyTable): CatalogTable = { - val tableType = t.getTableType() match { - case TableSummary.VIEW_TABLE_TYPE => CatalogTableType.VIEW - case TableSummary.MANAGED_TABLE_TYPE => CatalogTableType.MANAGED + val info = t.getTableInfo + val props = info.properties.asScala.toMap + val tableType = props.get(TableCatalog.PROP_TABLE_TYPE) match { + case Some(TableSummary.VIEW_TABLE_TYPE) => CatalogTableType.VIEW + case Some(TableSummary.MANAGED_TABLE_TYPE) => CatalogTableType.MANAGED case _ => CatalogTableType.EXTERNAL } - val viewText = Option(t.getViewText()) - val (serdeProps, tableProps) = t.getTableProps().asScala.toSeq + val viewText = props.get(TableCatalog.PROP_VIEW_TEXT) + val userProps = props -- METADATA_ONLY_RESERVED_KEYS + val (serdeProps, tableProps) = userProps.toSeq .partition(_._1.startsWith(TableCatalog.OPTION_PREFIX)) val tablePropsMap = tableProps.toMap - val (partCols, bucketSpec, clusterBySpec) = t.partitioning().toSeq.convertTransforms + val (partCols, bucketSpec, clusterBySpec) = info.partitions.toSeq.convertTransforms // For views, translate the V2 view context (currentCatalog / currentNamespace) into V1's // viewCatalogAndNamespace properties so the V1 view resolution path can expand unqualified // identifiers in the view text. val viewContextProps = if (tableType == CatalogTableType.VIEW) { - val currentCatalog = Option(t.getCurrentCatalog()) - val currentNamespace = Option(t.getCurrentNamespace()).map(_.toSeq).getOrElse(Seq.empty) + val currentCatalog = props.get(TableCatalog.PROP_VIEW_CURRENT_CATALOG) + val currentNamespace = props.get(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE) match { + case Some(s) if s.nonEmpty => CatalystSqlParser.parseMultipartIdentifier(s) + case _ => Seq.empty[String] + } if (currentCatalog.isDefined || currentNamespace.nonEmpty) { CatalogTable.catalogAndNamespaceToProps( currentCatalog.getOrElse(catalog.name()), @@ -147,24 +168,22 @@ private[sql] object V1Table { catalog = Some(catalog.name())), tableType = tableType, storage = CatalogStorageFormat.empty.copy( - locationUri = Option(t.getLocation()).map(CatalogUtils.stringToURI), + locationUri = props.get(TableCatalog.PROP_LOCATION).map(CatalogUtils.stringToURI), // v2 table properties should be put into the serde properties as well in case // it contains data source options. properties = tablePropsMap ++ serdeProps.map { case (k, v) => k.drop(TableCatalog.OPTION_PREFIX.length) -> v } ), - schema = CatalogV2Util.v2ColumnsToStructType(t.columns()), - provider = Option(t.getProvider), + schema = CatalogV2Util.v2ColumnsToStructType(info.columns), + provider = props.get(TableCatalog.PROP_PROVIDER), partitionColumnNames = partCols, bucketSpec = bucketSpec, - owner = Option(t.getOwner()).getOrElse("unknown"), - createTime = t.getCreateTime(), - createVersion = t.getCreateVersion(), + owner = props.getOrElse(TableCatalog.PROP_OWNER, "unknown"), viewText = viewText, viewOriginalText = viewText, - comment = Option(t.getComment()), - collation = Option(t.getCollation()), + comment = props.get(TableCatalog.PROP_COMMENT), + collation = props.get(TableCatalog.PROP_COLLATION), properties = tablePropsMap ++ clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation) ++ viewContextProps diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index 10ccfc8b0cb5f..4ffc17177ca77 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.connector import org.apache.spark.SparkConf import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.NoSuchTableException -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, Table, TableCatalog, TableChange, TableSummary} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, Table, TableCatalog, TableChange, TableInfo, TableSummary} import org.apache.spark.sql.connector.expressions.LogicalExpressions import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -97,27 +97,33 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi } test("view current catalog/namespace are serialized into table properties") { - val table = new MetadataOnlyTable.Builder(new StructType().add("col", "string")) + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) .withViewText("SELECT * FROM t") .withCurrentCatalog("spark_catalog") .withCurrentNamespace(Array("default")) .build() + val table = new MetadataOnlyTable(info) assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG) == "spark_catalog") assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE) == "default") } test("view current namespace quotes multi-part names with dots") { - val table = new MetadataOnlyTable.Builder(new StructType().add("col", "string")) + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) .withViewText("SELECT * FROM t") .withCurrentNamespace(Array("weird.db", "normal")) .build() + val table = new MetadataOnlyTable(info) assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE) == "`weird.db`.normal") } test("view with no current catalog/namespace omits the properties") { - val table = new MetadataOnlyTable.Builder(new StructType().add("col", "string")) + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) .withViewText("SELECT * FROM spark_catalog.default.t") .build() + val table = new MetadataOnlyTable(info) assert(!table.properties().containsKey(TableCatalog.PROP_VIEW_CURRENT_CATALOG)) assert(!table.properties().containsKey(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE)) } @@ -128,38 +134,48 @@ class TestingGeneralCatalog extends TableCatalog { override def loadTable(ident: Identifier): Table = { ident.name() match { case "test_json" => - new MetadataOnlyTable.Builder(new StructType().add("col", "string")) + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) .withProvider("json") .withLocation(ident.namespace().head) .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) .build() + new MetadataOnlyTable(info) case "test_partitioned_json" => val partitioning = LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2"))) - new MetadataOnlyTable.Builder(new StructType().add("c1", "int").add("c2", "int")) + val info = new TableInfo.Builder() + .withSchema(new StructType().add("c1", "int").add("c2", "int")) .withProvider("json") .withLocation(ident.namespace().head) .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) - .withPartitioning(Array(partitioning)) + .withPartitions(Array(partitioning)) .build() + new MetadataOnlyTable(info) case "test_v2" => - new MetadataOnlyTable.Builder(FakeV2Provider.schema) + val info = new TableInfo.Builder() + .withSchema(FakeV2Provider.schema) .withProvider(classOf[FakeV2Provider].getName) .build() + new MetadataOnlyTable(info) case "test_view" => - val viewProps = java.util.Map.of( + val viewProps = new java.util.HashMap[String, String]() + viewProps.put( TableCatalog.VIEW_CONF_PREFIX + SQLConf.ANSI_ENABLED.key, - (ident.namespace().head == "ansi").toString - ) - new MetadataOnlyTable.Builder(new StructType().add("col", "string").add("i", "int")) + (ident.namespace().head == "ansi").toString) + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string").add("i", "int")) + .withProperties(viewProps) .withViewText("SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") - .withTableProps(viewProps) .build() + new MetadataOnlyTable(info) case "test_unqualified_view" => - new MetadataOnlyTable.Builder(new StructType().add("col", "string")) + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) .withViewText("SELECT col FROM t WHERE col = 'b'") .withCurrentCatalog("spark_catalog") .withCurrentNamespace(Array("default")) .build() + new MetadataOnlyTable(info) case _ => throw new NoSuchTableException(ident) } } From 65276225e4fc03f3458e0c5cf786e5d189c6a046 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 05:14:38 +0000 Subject: [PATCH 10/59] make MetadataOnlyTable.properties() return an immutable view The previous implementation returned the backing TableInfo's mutable HashMap directly, so a caller that did table.properties().put(...) would corrupt internal state. V1Table.properties (and the Table default, which is Collections.emptyMap()) are effectively immutable; make MetadataOnlyTable consistent by wrapping with Collections.unmodifiableMap. --- .../apache/spark/sql/connector/catalog/MetadataOnlyTable.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java index 5ba68feaba538..2d0db6ab24f9f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -17,6 +17,7 @@ package org.apache.spark.sql.connector.catalog; +import java.util.Collections; import java.util.Map; import java.util.Set; @@ -64,7 +65,7 @@ public Column[] columns() { @Override public Map properties() { - return info.properties(); + return Collections.unmodifiableMap(info.properties()); } @Override From 0a86bcffb583dc36b2ba38deba6ef84dd25af1ee Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 08:15:51 +0000 Subject: [PATCH 11/59] consolidate reserved-key list into CatalogV2Util.TABLE_RESERVED_PROPERTIES Instead of a V1Table-private METADATA_ONLY_RESERVED_KEYS duplicate, add the three new view-related keys (PROP_VIEW_TEXT, PROP_VIEW_CURRENT_CATALOG, PROP_VIEW_CURRENT_NAMESPACE) to the existing canonical list and reuse it in V1Table.toCatalogTable. This keeps "what is a reserved table property" in one place and, as a side effect, fixes the gap where PROP_EXTERNAL and PROP_IS_MANAGED_LOCATION were not stripped from the metadata-only read path (they were already in TABLE_RESERVED_PROPERTIES but missing from my local copy). Downstream filters (ALTER TABLE SET TBLPROPERTIES, DESCRIBE TABLE, SHOW TBLPROPERTIES, SHOW CREATE TABLE) now also reject / hide the view-related keys, which matches their reserved semantics. --- .../sql/connector/catalog/CatalogV2Util.scala | 5 ++++- .../spark/sql/connector/catalog/V1Table.scala | 18 +++--------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala index b29d0b3eabe56..8ca0cd2053630 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala @@ -61,7 +61,10 @@ private[sql] object CatalogV2Util { TableCatalog.PROP_OWNER, TableCatalog.PROP_EXTERNAL, TableCatalog.PROP_IS_MANAGED_LOCATION, - TableCatalog.PROP_TABLE_TYPE) + TableCatalog.PROP_TABLE_TYPE, + TableCatalog.PROP_VIEW_TEXT, + TableCatalog.PROP_VIEW_CURRENT_CATALOG, + TableCatalog.PROP_VIEW_CURRENT_NAMESPACE) /** * The list of reserved namespace properties, which can not be removed or changed directly by diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index a89b437c4abcc..6a723c76ea68a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -111,20 +111,6 @@ private[sql] object V1Table { } } - // Reserved keys that are promoted to first-class fields on CatalogTable and must be stripped - // from the user-visible properties map so they're not double-persisted. - private val METADATA_ONLY_RESERVED_KEYS = Set( - TableCatalog.PROP_PROVIDER, - TableCatalog.PROP_LOCATION, - TableCatalog.PROP_TABLE_TYPE, - TableCatalog.PROP_OWNER, - TableCatalog.PROP_COMMENT, - TableCatalog.PROP_COLLATION, - TableCatalog.PROP_VIEW_TEXT, - TableCatalog.PROP_VIEW_CURRENT_CATALOG, - TableCatalog.PROP_VIEW_CURRENT_NAMESPACE - ) - def toCatalogTable( catalog: CatalogPlugin, ident: Identifier, @@ -137,7 +123,9 @@ private[sql] object V1Table { case _ => CatalogTableType.EXTERNAL } val viewText = props.get(TableCatalog.PROP_VIEW_TEXT) - val userProps = props -- METADATA_ONLY_RESERVED_KEYS + // Reserved keys are promoted to first-class CatalogTable fields; strip them from the + // user-visible properties map so they're not double-persisted or leaked into the serde bag. + val userProps = props -- CatalogV2Util.TABLE_RESERVED_PROPERTIES val (serdeProps, tableProps) = userProps.toSeq .partition(_._1.startsWith(TableCatalog.OPTION_PREFIX)) val tablePropsMap = tableProps.toMap From 4bf3bfc9e84fc0a07d13dc8fd4aca9567333e9dc Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 08:42:33 +0000 Subject: [PATCH 12/59] collapse view currentCatalog/currentNamespace into a single property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace PROP_VIEW_CURRENT_CATALOG + PROP_VIEW_CURRENT_NAMESPACE with a single PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE key whose value is a quoted multi-part identifier (first part is the catalog, remaining parts are the namespace). Matches the conceptual model v1 already uses (CatalogTable.viewCatalogAndNamespace returns Seq[String] with catalog at index 0), keeps the Spark idiom of quoted multi-part identifiers, and simplifies both the API (one setter: withCurrentCatalogAndNamespace) and the v2-to-v1 translation in V1Table.toCatalogTable (parse once, head as catalog, tail as namespace — no fallback defaulting). Also drops the last two-key-specific tests and merges them into one serialization test plus one quoting test. --- .../sql/connector/catalog/TableCatalog.java | 24 ++++++---------- .../catalog/TableCatalogCapability.java | 3 +- .../sql/connector/catalog/TableInfo.java | 28 ++++++++++--------- .../sql/connector/catalog/CatalogV2Util.scala | 3 +- .../spark/sql/connector/catalog/V1Table.scala | 20 ++++++------- .../DataSourceV2MetadataOnlyTableSuite.scala | 25 ++++++++--------- 6 files changed, 46 insertions(+), 57 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index f9bce48545b99..25886f73317be 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -90,26 +90,20 @@ public interface TableCatalog extends CatalogPlugin { /** * A reserved property to specify the view text of a general table that represents * a SQL view. Unqualified identifiers in the view text are resolved against - * {@link #PROP_VIEW_CURRENT_CATALOG} and {@link #PROP_VIEW_CURRENT_NAMESPACE} at read time. + * {@link #PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE} at read time. */ String PROP_VIEW_TEXT = "view_text"; /** - * A reserved property to specify the current catalog at the time the view was created. - * Unqualified identifiers in the view text are resolved relative to this catalog and - * {@link #PROP_VIEW_CURRENT_NAMESPACE}. + * A reserved property that captures the current catalog and namespace at the time the view + * was created. The value is a Spark multi-part identifier string: parts are joined with + * {@code "."} and any part that isn't a simple identifier is backtick-quoted (see + * {@code QuotingUtils.quoted}). The first part is the catalog; the remaining parts are the + * namespace. For example, {@code my_catalog.db1.db2} or {@code my_catalog.`weird.db`.normal}. + * The value is parsed with {@code ParserInterface.parseMultipartIdentifier}. An absent or + * empty value means the view was created with no captured resolution context. */ - String PROP_VIEW_CURRENT_CATALOG = "view.currentCatalog"; - - /** - * A reserved property to specify the current namespace at the time the view was created. - * The value is a Spark multi-part identifier string: parts are joined with {@code "."}, - * and a part is backtick-quoted only when it is not a simple identifier (e.g. when it - * contains a {@code "."}). For example, {@code db1.db2} or {@code `weird.db`.normal}. - * The value is parsed with {@code ParserInterface.parseMultipartIdentifier}. An absent - * or empty value means the view was created with no current namespace. - */ - String PROP_VIEW_CURRENT_NAMESPACE = "view.currentNamespace"; + String PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE = "view.currentCatalogAndNamespace"; /** * A prefix used to specify the Spark SQL configurations for reading this view. diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java index 63a726ff8a8c3..bbec0a3b3ba77 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java @@ -97,8 +97,7 @@ public enum TableCatalogCapability { /** * Signals that the TableCatalog supports creating views via {@link TableCatalog#createTable} * by accepting a {@link TableInfo} whose properties include {@link TableCatalog#PROP_VIEW_TEXT} - * (and related view keys: {@link TableCatalog#PROP_VIEW_CURRENT_CATALOG}, - * {@link TableCatalog#PROP_VIEW_CURRENT_NAMESPACE}, and + * (and related view keys: {@link TableCatalog#PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE} and * {@link TableCatalog#VIEW_CONF_PREFIX}-prefixed SQL configs). *

* Catalogs declaring this capability must round-trip those properties and return a diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java index 74e460d9879af..25a4859a39a78 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java @@ -135,22 +135,24 @@ public Builder withViewText(String viewText) { return this; } - public Builder withCurrentCatalog(String currentCatalog) { - properties.put(TableCatalog.PROP_VIEW_CURRENT_CATALOG, currentCatalog); - return this; - } - /** - * Sets the current namespace of a view, encoded as a quoted multi-part identifier string - * (see {@link TableCatalog#PROP_VIEW_CURRENT_NAMESPACE}). An empty array clears the property. + * Sets the current catalog and namespace at view creation time, encoded as a single quoted + * multi-part identifier string (see + * {@link TableCatalog#PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE}). The first part is the + * catalog; remaining parts are the namespace. Passing a null or empty catalog clears the + * property. */ - public Builder withCurrentNamespace(String[] currentNamespace) { - if (currentNamespace != null && currentNamespace.length > 0) { - properties.put(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE, - QuotingUtils.quoted(currentNamespace)); - } else { - properties.remove(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE); + public Builder withCurrentCatalogAndNamespace(String catalog, String[] namespace) { + if (catalog == null || catalog.isEmpty()) { + properties.remove(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE); + return this; } + String[] ns = namespace == null ? new String[0] : namespace; + String[] parts = new String[ns.length + 1]; + parts[0] = catalog; + System.arraycopy(ns, 0, parts, 1, ns.length); + properties.put(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE, + QuotingUtils.quoted(parts)); return this; } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala index 8ca0cd2053630..6e41cd64332b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala @@ -63,8 +63,7 @@ private[sql] object CatalogV2Util { TableCatalog.PROP_IS_MANAGED_LOCATION, TableCatalog.PROP_TABLE_TYPE, TableCatalog.PROP_VIEW_TEXT, - TableCatalog.PROP_VIEW_CURRENT_CATALOG, - TableCatalog.PROP_VIEW_CURRENT_NAMESPACE) + TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) /** * The list of reserved namespace properties, which can not be removed or changed directly by diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index 6a723c76ea68a..d2343cf53211b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -130,21 +130,17 @@ private[sql] object V1Table { .partition(_._1.startsWith(TableCatalog.OPTION_PREFIX)) val tablePropsMap = tableProps.toMap val (partCols, bucketSpec, clusterBySpec) = info.partitions.toSeq.convertTransforms - // For views, translate the V2 view context (currentCatalog / currentNamespace) into V1's + // For views, translate the V2 view context (PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE, a + // single quoted multi-part identifier whose first part is the catalog) into V1's numbered // viewCatalogAndNamespace properties so the V1 view resolution path can expand unqualified // identifiers in the view text. val viewContextProps = if (tableType == CatalogTableType.VIEW) { - val currentCatalog = props.get(TableCatalog.PROP_VIEW_CURRENT_CATALOG) - val currentNamespace = props.get(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE) match { - case Some(s) if s.nonEmpty => CatalystSqlParser.parseMultipartIdentifier(s) - case _ => Seq.empty[String] - } - if (currentCatalog.isDefined || currentNamespace.nonEmpty) { - CatalogTable.catalogAndNamespaceToProps( - currentCatalog.getOrElse(catalog.name()), - currentNamespace) - } else { - Map.empty[String, String] + props.get(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) match { + case Some(s) if s.nonEmpty => + val parts = CatalystSqlParser.parseMultipartIdentifier(s) + CatalogTable.catalogAndNamespaceToProps(parts.head, parts.tail) + case _ => + Map.empty[String, String] } } else { Map.empty[String, String] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index 4ffc17177ca77..598310c0cc787 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -96,36 +96,36 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi } } - test("view current catalog/namespace are serialized into table properties") { + test("view current catalog/namespace are serialized into a single property") { val info = new TableInfo.Builder() .withSchema(new StructType().add("col", "string")) .withViewText("SELECT * FROM t") - .withCurrentCatalog("spark_catalog") - .withCurrentNamespace(Array("default")) + .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) .build() val table = new MetadataOnlyTable(info) - assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG) == "spark_catalog") - assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE) == "default") + assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) == + "spark_catalog.default") } - test("view current namespace quotes multi-part names with dots") { + test("view current catalog/namespace quotes multi-part names with dots") { val info = new TableInfo.Builder() .withSchema(new StructType().add("col", "string")) .withViewText("SELECT * FROM t") - .withCurrentNamespace(Array("weird.db", "normal")) + .withCurrentCatalogAndNamespace("spark_catalog", Array("weird.db", "normal")) .build() val table = new MetadataOnlyTable(info) - assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE) == "`weird.db`.normal") + assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) == + "spark_catalog.`weird.db`.normal") } - test("view with no current catalog/namespace omits the properties") { + test("view with no current catalog/namespace omits the property") { val info = new TableInfo.Builder() .withSchema(new StructType().add("col", "string")) .withViewText("SELECT * FROM spark_catalog.default.t") .build() val table = new MetadataOnlyTable(info) - assert(!table.properties().containsKey(TableCatalog.PROP_VIEW_CURRENT_CATALOG)) - assert(!table.properties().containsKey(TableCatalog.PROP_VIEW_CURRENT_NAMESPACE)) + assert(!table.properties().containsKey( + TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) } } @@ -172,8 +172,7 @@ class TestingGeneralCatalog extends TableCatalog { val info = new TableInfo.Builder() .withSchema(new StructType().add("col", "string")) .withViewText("SELECT col FROM t WHERE col = 'b'") - .withCurrentCatalog("spark_catalog") - .withCurrentNamespace(Array("default")) + .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) .build() new MetadataOnlyTable(info) case _ => throw new NoSuchTableException(ident) From a642356f0207b9ed25bc4175e0e571bedc0225be Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 08:52:11 +0000 Subject: [PATCH 13/59] implement DS v2 CREATE VIEW via TableCatalog.createTable - DataSourceV2Strategy: new case for CreateView(ResolvedIdentifier, ...). Checks SUPPORTS_CREATE_VIEW capability (reuses the existing MISSING_CATALOG_ABILITY.VIEWS error) and dispatches to CreateV2ViewExec (plain TableCatalog, non-atomic drop+create) or AtomicCreateV2ViewExec (StagingTableCatalog, stage+commit) - same shape as REPLACE TABLE. - CreateV2ViewExec / AtomicCreateV2ViewExec: build a TableInfo via the TableInfo.Builder convenience setters (withSchema / withViewText / withCurrentCatalogAndNamespace / withComment / withCollation / ...) and delegate to catalog.createTable. Temp-view and temp-variable reference rejection is handled via the shared ViewHelper methods. Temp-function rejection is skipped for now because v2 CreateView does not extend AnalysisOnlyCommand (TODO). - ViewHelper.generateViewProperties: add optional catalogAndNamespaceEncoder parameter (default preserves v1 behaviour) so v2 can emit the single PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE key instead of v1's numbered catalogAndNamespace.* keys, while sharing the rest of the properties-bag construction (query column names, SQL configs, temp refs, schema mode). - ResolveSessionCatalog: drop the throw for non-session-catalog CreateView; the strategy now handles it. - TestingGeneralCatalog: declare SUPPORTS_CREATE_VIEW, implement createTable + dropTable + tableExists backed by an in-memory map so round-trip works. - Tests: CREATE VIEW, CREATE VIEW IF NOT EXISTS (no-op), CREATE VIEW on existing (failure), CREATE OR REPLACE VIEW (replace), CREATE VIEW on a catalog without SUPPORTS_CREATE_VIEW (MISSING_CATALOG_ABILITY.VIEWS). --- .../analysis/ResolveSessionCatalog.scala | 3 - .../spark/sql/execution/command/views.scala | 6 +- .../datasources/v2/CreateV2ViewExec.scala | 197 ++++++++++++++++++ .../datasources/v2/DataSourceV2Strategy.scala | 20 +- .../DataSourceV2MetadataOnlyTableSuite.scala | 192 ++++++++++++----- 5 files changed, 365 insertions(+), 53 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index d7d943a2eedb2..b1cb4badf032a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -532,9 +532,6 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) viewType = PersistedView, viewSchemaMode = viewSchemaMode) - case CreateView(ResolvedIdentifier(catalog, _), _, _, _, _, _, _, _, _, _) => - throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) - case ShowViews(ns: ResolvedNamespace, pattern, output) => ns match { case ResolvedDatabaseInSessionCatalog(db) => ShowViewsCommand(db, pattern, output) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 95d76c72d2951..42ef6e7a40ba5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -483,7 +483,9 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { viewSchemaMode: ViewSchemaMode, tempViewNames: Seq[Seq[String]] = Seq.empty, tempFunctionNames: Seq[String] = Seq.empty, - tempVariableNames: Seq[Seq[String]] = Seq.empty): Map[String, String] = { + tempVariableNames: Seq[Seq[String]] = Seq.empty, + catalogAndNamespaceEncoder: (String, Seq[String]) => Map[String, String] = + catalogAndNamespaceToProps): Map[String, String] = { val conf = session.sessionState.conf @@ -502,7 +504,7 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { // Generate the view default catalog and namespace, as well as captured SQL configs. val manager = session.sessionState.catalogManager removeReferredTempNames(removeSQLConfigs(removeQueryColumnNames(properties))) ++ - catalogAndNamespaceToProps( + catalogAndNamespaceEncoder( manager.currentCatalog.name, manager.currentNamespace.toImmutableArraySeq) ++ sqlConfigsToProps(conf, VIEW_SQL_CONFIG_PREFIX) ++ queryColumnNameProps ++ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala new file mode 100644 index 0000000000000..d3587d65a649d --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import scala.jdk.CollectionConverters._ + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ViewSchemaMode} +import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, QuotingUtils, SchemaUtils} +import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, TableCatalog, TableInfo} +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.execution.command.{CommandUtils, ViewHelper} +import org.apache.spark.sql.execution.metric.SQLMetric +import org.apache.spark.util.Utils + +/** + * Shared validation + TableInfo construction for v2 CREATE VIEW execs. + */ +private[v2] trait V2ViewPreparation extends LeafV2CommandExec { + def catalog: TableCatalog + def identifier: Identifier + def userSpecifiedColumns: Seq[(String, Option[String])] + def comment: Option[String] + def collation: Option[String] + def userProperties: Map[String, String] + def originalText: String + def query: LogicalPlan + def viewSchemaMode: ViewSchemaMode + + // Build a synthetic v1 TableIdentifier for error messages and for ViewHelper methods that + // accept it purely for rendering. This carries no semantic weight - the v2 Identifier is the + // actual target. + protected lazy val legacyName: TableIdentifier = TableIdentifier( + table = identifier.name(), + database = identifier.namespace().lastOption, + catalog = Some(catalog.name())) + + override def output: Seq[Attribute] = Seq.empty + + protected def buildTableInfo(): TableInfo = { + import ViewHelper._ + import TableCatalog._ + + if (userSpecifiedColumns.nonEmpty) { + if (userSpecifiedColumns.length > query.output.length) { + throw QueryCompilationErrors.cannotCreateViewNotEnoughColumnsError( + legacyName, userSpecifiedColumns.map(_._1), query) + } else if (userSpecifiedColumns.length < query.output.length) { + throw QueryCompilationErrors.cannotCreateViewTooManyColumnsError( + legacyName, userSpecifiedColumns.map(_._1), query) + } + } + + // Reject permanent views referencing temporary objects. Temp-function references are not + // detected here because v2 `CreateView` does not extend AnalysisOnlyCommand and therefore + // does not capture `referredTempFunctionNames` from AnalysisContext; temp views and temp + // variables are still caught via plan inspection. TODO(SPARK-...): capture temp functions. + verifyTemporaryObjectsNotExists( + isTemporary = false, legacyName, query, referredTempFunctions = Nil) + verifyAutoGeneratedAliasesNotExists(query, isTemporary = false, legacyName) + SchemaUtils.checkIndeterminateCollationInSchema(query.schema) + + val aliasedSchema = CharVarcharUtils.getRawSchema( + aliasPlan(session, query, userSpecifiedColumns).schema, session.sessionState.conf) + + val tempViews = collectTemporaryViews(query) + val tempVars = collectTemporaryVariables(query) + + // Emit PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE (single quoted multi-part identifier, + // catalog as first part) instead of v1's numbered view.catalogAndNamespace.* keys. + val v2Encoder: (String, Seq[String]) => Map[String, String] = { (cat, ns) => + val parts = (cat +: ns).toArray + Map(PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE -> QuotingUtils.quoted(parts)) + } + + val viewProps = generateViewProperties( + properties = userProperties, + session = session, + queryOutput = query.output.map(_.name).toArray, + fieldNames = aliasedSchema.fieldNames, + viewSchemaMode = viewSchemaMode, + tempViewNames = tempViews, + tempFunctionNames = Nil, + tempVariableNames = tempVars, + catalogAndNamespaceEncoder = v2Encoder) + + val builder = new TableInfo.Builder() + .withSchema(aliasedSchema) + .withProperties(viewProps.asJava) + .withViewText(originalText) + comment.foreach(builder.withComment) + collation.foreach(builder.withCollation) + builder.build() + } + + protected def viewAlreadyExists(): Throwable = + QueryCompilationErrors.viewAlreadyExistsError(legacyName) +} + +/** + * Physical plan node for CREATE VIEW on a v2 `TableCatalog` that does NOT support staging. + * REPLACE is implemented as a non-atomic drop + create. + */ +case class CreateV2ViewExec( + catalog: TableCatalog, + identifier: Identifier, + userSpecifiedColumns: Seq[(String, Option[String])], + comment: Option[String], + collation: Option[String], + userProperties: Map[String, String], + originalText: String, + query: LogicalPlan, + allowExisting: Boolean, + replace: Boolean, + viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { + + override protected def run(): Seq[InternalRow] = { + val info = buildTableInfo() + + if (catalog.tableExists(identifier)) { + if (allowExisting) { + return Seq.empty + } + if (!replace) throw viewAlreadyExists() + ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) + CommandUtils.uncacheTableOrView(session, legacyName) + catalog.dropTable(identifier) + } + catalog.createTable(identifier, info) + Seq.empty + } +} + +/** + * Physical plan node for CREATE VIEW on a v2 `StagingTableCatalog`. Uses the staging API to + * commit the metadata swap atomically. + */ +case class AtomicCreateV2ViewExec( + catalog: StagingTableCatalog, + identifier: Identifier, + userSpecifiedColumns: Seq[(String, Option[String])], + comment: Option[String], + collation: Option[String], + userProperties: Map[String, String], + originalText: String, + query: LogicalPlan, + allowExisting: Boolean, + replace: Boolean, + viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { + + override val metrics: Map[String, SQLMetric] = + DataSourceV2Utils.commitMetrics(sparkContext, catalog) + + override protected def run(): Seq[InternalRow] = { + if (allowExisting && catalog.tableExists(identifier)) { + return Seq.empty + } + val info = buildTableInfo() + val staged: StagedTable = if (replace) { + if (catalog.tableExists(identifier)) { + ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) + CommandUtils.uncacheTableOrView(session, legacyName) + } + catalog.stageCreateOrReplace(identifier, info) + } else { + try { + catalog.stageCreate(identifier, info) + } catch { + case _: TableAlreadyExistsException => throw viewAlreadyExists() + } + } + Utils.tryWithSafeFinallyAndFailureCallbacks({ + DataSourceV2Utils.commitStagedChanges(sparkContext, staged, metrics) + })(catchBlock = { + staged.abortStagedChanges() + }) + Seq.empty + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 66ce4a637d51d..29ea49129dbca 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreePattern.SCALAR_SUBQUERY import org.apache.spark.sql.catalyst.util.{toPrettySQL, GeneratedColumn, IdentityColumn, ResolveDefaultColumns, ResolveTableConstraints, V2ExpressionBuilder} import org.apache.spark.sql.classic.SparkSession -import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TruncatableTable, V1Table} +import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableCatalogCapability, TruncatableTable, V1Table} import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.connector.catalog.index.SupportsIndex import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue} @@ -301,6 +301,24 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat qualifyLocInTableSpec(tableSpec), orCreate = orCreate, invalidateCache) :: Nil } + case CreateView(ResolvedIdentifier(catalog, ident), userSpecifiedColumns, comment, + collation, properties, originalText, child, allowExisting, replace, viewSchemaMode) => + val tableCatalog = catalog.asTableCatalog + if (!tableCatalog.capabilities().contains(TableCatalogCapability.SUPPORTS_CREATE_VIEW)) { + throw QueryCompilationErrors.missingCatalogViewsAbilityError(tableCatalog) + } + val sqlText = originalText.getOrElse { + throw QueryCompilationErrors.createPersistedViewFromDatasetAPINotAllowedError() + } + tableCatalog match { + case staging: StagingTableCatalog => + AtomicCreateV2ViewExec(staging, ident, userSpecifiedColumns, comment, collation, + properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil + case _ => + CreateV2ViewExec(tableCatalog, ident, userSpecifiedColumns, comment, collation, + properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil + } + case ReplaceTableAsSelect(ResolvedIdentifier(catalog, ident), parts, query, tableSpec: TableSpec, options, orCreate, true) => catalog match { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index 598310c0cc787..429f4378ea147 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -18,9 +18,9 @@ package org.apache.spark.sql.connector import org.apache.spark.SparkConf -import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.NoSuchTableException -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, Table, TableCatalog, TableChange, TableInfo, TableSummary} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, Table, TableCatalog, TableCatalogCapability, TableChange, TableInfo, TableSummary} import org.apache.spark.sql.connector.expressions.LogicalExpressions import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -127,63 +127,144 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi assert(!table.properties().containsKey( TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) } + + test("CREATE VIEW on a v2 catalog") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW general_catalog.default.my_view AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer(spark.table("general_catalog.default.my_view"), Seq(Row(2), Row(3))) + } + } + + test("CREATE VIEW IF NOT EXISTS is a no-op when the view exists") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW general_catalog.default.v_ifne AS " + + "SELECT x FROM spark_catalog.default.t") + // Re-running with IF NOT EXISTS should not fail and should not change the view. + sql("CREATE VIEW IF NOT EXISTS general_catalog.default.v_ifne AS " + + "SELECT x + 100 AS x FROM spark_catalog.default.t") + checkAnswer(spark.table("general_catalog.default.v_ifne"), + Seq(Row(1), Row(2), Row(3))) + } + } + + test("CREATE VIEW without IF NOT EXISTS fails when the view exists") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW general_catalog.default.v_dup AS " + + "SELECT x FROM spark_catalog.default.t") + intercept[AnalysisException] { + sql("CREATE VIEW general_catalog.default.v_dup AS " + + "SELECT x FROM spark_catalog.default.t") + } + } + } + + test("CREATE OR REPLACE VIEW replaces an existing view") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW general_catalog.default.v_replace AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 10") + checkAnswer(spark.table("general_catalog.default.v_replace"), Seq.empty[Row]) + sql("CREATE OR REPLACE VIEW general_catalog.default.v_replace AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer(spark.table("general_catalog.default.v_replace"), Seq(Row(2), Row(3))) + } + } + + test("CREATE VIEW on a catalog without SUPPORTS_CREATE_VIEW fails") { + withSQLConf( + "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { + val ex = intercept[AnalysisException] { + sql("CREATE VIEW no_view_catalog.default.v AS SELECT 1") + } + assert(ex.getCondition == "MISSING_CATALOG_ABILITY.VIEWS") + } + } } class TestingGeneralCatalog extends TableCatalog { + // Holds views created via createTable within the session. Keyed by (namespace, name). + private val createdViews = + new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() + + override def capabilities(): java.util.Set[TableCatalogCapability] = + java.util.Collections.singleton(TableCatalogCapability.SUPPORTS_CREATE_VIEW) + override def loadTable(ident: Identifier): Table = { - ident.name() match { - case "test_json" => - val info = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withProvider("json") - .withLocation(ident.namespace().head) - .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) - .build() - new MetadataOnlyTable(info) - case "test_partitioned_json" => - val partitioning = LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2"))) - val info = new TableInfo.Builder() - .withSchema(new StructType().add("c1", "int").add("c2", "int")) - .withProvider("json") - .withLocation(ident.namespace().head) - .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) - .withPartitions(Array(partitioning)) - .build() - new MetadataOnlyTable(info) - case "test_v2" => - val info = new TableInfo.Builder() - .withSchema(FakeV2Provider.schema) - .withProvider(classOf[FakeV2Provider].getName) - .build() - new MetadataOnlyTable(info) - case "test_view" => - val viewProps = new java.util.HashMap[String, String]() - viewProps.put( - TableCatalog.VIEW_CONF_PREFIX + SQLConf.ANSI_ENABLED.key, - (ident.namespace().head == "ansi").toString) - val info = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string").add("i", "int")) - .withProperties(viewProps) - .withViewText("SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") - .build() - new MetadataOnlyTable(info) - case "test_unqualified_view" => - val info = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT col FROM t WHERE col = 'b'") - .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) - .build() - new MetadataOnlyTable(info) - case _ => throw new NoSuchTableException(ident) + val key = (ident.namespace().toSeq, ident.name()) + Option(createdViews.get(key)).map(new MetadataOnlyTable(_)).getOrElse { + ident.name() match { + case "test_json" => + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withProvider("json") + .withLocation(ident.namespace().head) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build() + new MetadataOnlyTable(info) + case "test_partitioned_json" => + val partitioning = LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2"))) + val info = new TableInfo.Builder() + .withSchema(new StructType().add("c1", "int").add("c2", "int")) + .withProvider("json") + .withLocation(ident.namespace().head) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .withPartitions(Array(partitioning)) + .build() + new MetadataOnlyTable(info) + case "test_v2" => + val info = new TableInfo.Builder() + .withSchema(FakeV2Provider.schema) + .withProvider(classOf[FakeV2Provider].getName) + .build() + new MetadataOnlyTable(info) + case "test_view" => + val viewProps = new java.util.HashMap[String, String]() + viewProps.put( + TableCatalog.VIEW_CONF_PREFIX + SQLConf.ANSI_ENABLED.key, + (ident.namespace().head == "ansi").toString) + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string").add("i", "int")) + .withProperties(viewProps) + .withViewText( + "SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") + .build() + new MetadataOnlyTable(info) + case "test_unqualified_view" => + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT col FROM t WHERE col = 'b'") + .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) + .build() + new MetadataOnlyTable(info) + case _ => throw new NoSuchTableException(ident) + } } } + override def tableExists(ident: Identifier): Boolean = { + val key = (ident.namespace().toSeq, ident.name()) + createdViews.containsKey(key) || super.tableExists(ident) + } + + override def createTable(ident: Identifier, info: TableInfo): Table = { + val key = (ident.namespace().toSeq, ident.name()) + if (createdViews.putIfAbsent(key, info) != null) { + throw new org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException(ident) + } + new MetadataOnlyTable(info) + } + override def alterTable(ident: Identifier, changes: TableChange*): Table = { throw new RuntimeException("shouldn't be called") } override def dropTable(ident: Identifier): Boolean = { - throw new RuntimeException("shouldn't be called") + val key = (ident.namespace().toSeq, ident.name()) + createdViews.remove(key) != null } override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = { throw new RuntimeException("shouldn't be called") @@ -198,3 +279,20 @@ class TestingGeneralCatalog extends TableCatalog { } override def name(): String = catalogName } + +/** A v2 catalog that does not declare SUPPORTS_CREATE_VIEW. Used to exercise the capability + * gate in `DataSourceV2Strategy`. */ +class TestingTableOnlyCatalog extends TableCatalog { + override def loadTable(ident: Identifier): Table = throw new NoSuchTableException(ident) + override def alterTable(ident: Identifier, changes: TableChange*): Table = + throw new RuntimeException("shouldn't be called") + override def dropTable(ident: Identifier): Boolean = false + override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = + throw new RuntimeException("shouldn't be called") + override def listTables(namespace: Array[String]): Array[Identifier] = Array.empty + private var catalogName = "" + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + catalogName = name + } + override def name(): String = catalogName +} From ed0896f141b26ccb04996278f664e7913a0ada05 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 09:03:45 +0000 Subject: [PATCH 14/59] make CreateView an AnalysisOnlyCommand to capture referredTempFunctions CreateView previously extended BinaryCommand (left = target identifier, right = view query). On the v1 path this was fine because ResolveSessionCatalog rewrote CreateView into the AnalysisOnlyCommand CreateViewCommand, which then captured referredTempFunctions via HandleSpecialCommand.markAsAnalyzed. On the new v2 path CreateView flows straight to DataSourceV2Strategy without rewriting, so there was no opportunity to capture temp-function references - v2 CREATE VIEW silently accepted views referencing session-scoped temp functions. Change CreateView to extend AnalysisOnlyCommand (mirroring V2CreateTableAsSelectPlan): drop the BinaryLike inheritance, add isAnalyzed + referredTempFunctions fields with default values, and implement childrenToAnalyze / markAsAnalyzed / withNewChildrenInternal manually. This makes HandleSpecialCommand fire on CreateView in the v2 path and populate referredTempFunctions before the strategy runs; CreateV2ViewExec + AtomicCreateV2ViewExec now thread that list into ViewHelper.verifyTemporaryObjectsNotExists and ViewHelper.generateViewProperties so v2 CREATE VIEW rejects and serializes temp-function references the same way v1 does. Pattern-match sites updated for the new 12-field arity: ApplyDefaultCollation, ResolveSessionCatalog, DataSourceV2Strategy. The v1 rewriting path ignores isAnalyzed / referredTempFunctions with _ patterns; CreateViewCommand continues to capture them via its own markAsAnalyzed. --- .../analysis/ApplyDefaultCollation.scala | 2 +- .../catalyst/plans/logical/v2Commands.scala | 34 +++++++++++++++---- .../analysis/ResolveSessionCatalog.scala | 3 +- .../datasources/v2/CreateV2ViewExec.scala | 17 +++++----- .../datasources/v2/DataSourceV2Strategy.scala | 9 +++-- 5 files changed, 46 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala index 67d5b70b30a33..1a76b68289a72 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala @@ -197,7 +197,7 @@ object ApplyDefaultCollation extends Rule[LogicalPlan] { collation = getCollationFromSchemaMetadata(catalog, identifier.namespace()))) case createView@CreateView(ResolvedIdentifier( - catalog: SupportsNamespaces, identifier), _, _, _, _, _, _, _, _, _) + catalog: SupportsNamespaces, identifier), _, _, _, _, _, _, _, _, _, _, _) if createView.collation.isEmpty => val newCreateView = CurrentOrigin.withOrigin(createView.origin) { createView.copy( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index b857a360544e3..dc10c20a92ec2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -1731,6 +1731,11 @@ case class AlterViewSchemaBinding( /** * The logical plan of the CREATE VIEW ... command. + * + * Extends [[AnalysisOnlyCommand]] so that [[Analyzer.HandleSpecialCommand]] captures + * `referredTempFunctions` from the [[AnalysisContext]] after the child query is analyzed; + * this list is needed for `verifyTemporaryObjectsNotExists`-style checks on downstream + * execution paths. */ case class CreateView( child: LogicalPlan, @@ -1742,15 +1747,32 @@ case class CreateView( query: LogicalPlan, allowExisting: Boolean, replace: Boolean, - viewSchemaMode: ViewSchemaMode) extends BinaryCommand with CTEInChildren { - override def left: LogicalPlan = child - override def right: LogicalPlan = query + viewSchemaMode: ViewSchemaMode, + isAnalyzed: Boolean = false, + referredTempFunctions: Seq[String] = Seq.empty) + extends Command with AnalysisOnlyCommand with CTEInChildren { + + override def childrenToAnalyze: Seq[LogicalPlan] = Seq(child, query) + + override def markAsAnalyzed(analysisContext: AnalysisContext): LogicalPlan = copy( + isAnalyzed = true, + referredTempFunctions = analysisContext.referredTempFunctionNames.toSeq) + override protected def withNewChildrenInternal( - newLeft: LogicalPlan, newRight: LogicalPlan): LogicalPlan = - copy(child = newLeft, query = newRight) + newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = { + assert(!isAnalyzed) + newChildren match { + case Seq(newChild, newQuery) => + copy(child = newChild, query = newQuery) + case others => + throw new SparkIllegalArgumentException( + errorClass = "_LEGACY_ERROR_TEMP_3218", + messageParameters = Map("others" -> others.toString())) + } + } override def withCTEDefs(cteDefs: Seq[CTERelationDef]): LogicalPlan = { - withNewChildren(Seq(child, WithCTE(query, cteDefs))) + copy(query = WithCTE(query, cteDefs)) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index b1cb4badf032a..3bd5017cd7ecd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -518,7 +518,8 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) AlterViewSchemaBindingCommand(ident, viewSchemaMode) case CreateView(CreateViewInSessionCatalog(ident), userSpecifiedColumns, comment, - collation, properties, originalText, child, allowExisting, replace, viewSchemaMode) => + collation, properties, originalText, child, allowExisting, replace, viewSchemaMode, + _, _) => CreateViewCommand( name = ident, userSpecifiedColumns = userSpecifiedColumns, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala index d3587d65a649d..c2781bb32ea53 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -44,6 +44,7 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { def originalText: String def query: LogicalPlan def viewSchemaMode: ViewSchemaMode + def referredTempFunctions: Seq[String] // Build a synthetic v1 TableIdentifier for error messages and for ViewHelper methods that // accept it purely for rendering. This carries no semantic weight - the v2 Identifier is the @@ -69,12 +70,10 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { } } - // Reject permanent views referencing temporary objects. Temp-function references are not - // detected here because v2 `CreateView` does not extend AnalysisOnlyCommand and therefore - // does not capture `referredTempFunctionNames` from AnalysisContext; temp views and temp - // variables are still caught via plan inspection. TODO(SPARK-...): capture temp functions. + // Reject permanent views referencing temporary objects. `referredTempFunctions` is captured + // from AnalysisContext via `CreateView.markAsAnalyzed` (CreateView extends AnalysisOnlyCommand). verifyTemporaryObjectsNotExists( - isTemporary = false, legacyName, query, referredTempFunctions = Nil) + isTemporary = false, legacyName, query, referredTempFunctions) verifyAutoGeneratedAliasesNotExists(query, isTemporary = false, legacyName) SchemaUtils.checkIndeterminateCollationInSchema(query.schema) @@ -98,7 +97,7 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { fieldNames = aliasedSchema.fieldNames, viewSchemaMode = viewSchemaMode, tempViewNames = tempViews, - tempFunctionNames = Nil, + tempFunctionNames = referredTempFunctions, tempVariableNames = tempVars, catalogAndNamespaceEncoder = v2Encoder) @@ -130,7 +129,8 @@ case class CreateV2ViewExec( query: LogicalPlan, allowExisting: Boolean, replace: Boolean, - viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { + viewSchemaMode: ViewSchemaMode, + referredTempFunctions: Seq[String]) extends V2ViewPreparation { override protected def run(): Seq[InternalRow] = { val info = buildTableInfo() @@ -164,7 +164,8 @@ case class AtomicCreateV2ViewExec( query: LogicalPlan, allowExisting: Boolean, replace: Boolean, - viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { + viewSchemaMode: ViewSchemaMode, + referredTempFunctions: Seq[String]) extends V2ViewPreparation { override val metrics: Map[String, SQLMetric] = DataSourceV2Utils.commitMetrics(sparkContext, catalog) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 29ea49129dbca..fa3eff2853ad6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -302,7 +302,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat } case CreateView(ResolvedIdentifier(catalog, ident), userSpecifiedColumns, comment, - collation, properties, originalText, child, allowExisting, replace, viewSchemaMode) => + collation, properties, originalText, child, allowExisting, replace, viewSchemaMode, + _, referredTempFunctions) => val tableCatalog = catalog.asTableCatalog if (!tableCatalog.capabilities().contains(TableCatalogCapability.SUPPORTS_CREATE_VIEW)) { throw QueryCompilationErrors.missingCatalogViewsAbilityError(tableCatalog) @@ -313,10 +314,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat tableCatalog match { case staging: StagingTableCatalog => AtomicCreateV2ViewExec(staging, ident, userSpecifiedColumns, comment, collation, - properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil + properties, sqlText, child, allowExisting, replace, viewSchemaMode, + referredTempFunctions) :: Nil case _ => CreateV2ViewExec(tableCatalog, ident, userSpecifiedColumns, comment, collation, - properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil + properties, sqlText, child, allowExisting, replace, viewSchemaMode, + referredTempFunctions) :: Nil } case ReplaceTableAsSelect(ResolvedIdentifier(catalog, ident), From e9f834a67a9cb7d3208866144a17ed18975347f7 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 12:08:48 +0000 Subject: [PATCH 15/59] address self-review findings for v2 CREATE VIEW - CheckViewReferences: new post-analysis rule (wired into BaseSessionStateBuilder) catches permanent views that reference temp objects and auto-generated aliases for the v2 CreateView path; v1 CreateViewCommand keeps its exec-time safety net. - CreateV2ViewExec: drop redundant verify/collect calls from buildTableInfo (fixes compile error on private ViewHelper.collectTemporaryViews); wrap TableAlreadyExistsException as viewAlreadyExists in the non-atomic path; reorder atomic run to validate first (match v1 and the non-atomic exec); add SchemaEvolution + user-column-list guard; expand V2ViewPreparation doc with the v1 mirror list. - VIEW_SQL_CONFIG_PREFIX: derive from public TableCatalog.VIEW_CONF_PREFIX to bind the two constants and avoid drift. - V1Table.toCatalogTable: drop magic "root" default for empty namespaces (use None); fix grammar in serde-props comment. - ResolveSessionCatalog, RelationResolution: add explanatory comments for the CreateView rewrite drop and the new MetadataOnlyTable branch. - MetadataOnlyTable: @since 4.1.0 -> 4.2.0. - Tests: exercise AtomicCreateV2ViewExec via a new TestingStagingCatalog; reject user columns + SchemaEvolution; reject too-few/too-many columns; reject temp-function and temp-view references; verify collation propagation; verify withCurrentCatalogAndNamespace clears on null/empty catalog. Co-authored-by: Isaac --- .../connector/catalog/MetadataOnlyTable.java | 2 +- .../analysis/RelationResolution.scala | 3 + .../sql/catalyst/catalog/interface.scala | 4 +- .../spark/sql/connector/catalog/V1Table.scala | 9 +- .../analysis/ResolveSessionCatalog.scala | 3 + .../spark/sql/execution/command/views.scala | 29 ++- .../datasources/v2/CreateV2ViewExec.scala | 43 ++-- .../internal/BaseSessionStateBuilder.scala | 3 +- .../DataSourceV2MetadataOnlyTableSuite.scala | 201 +++++++++++++++++- 9 files changed, 271 insertions(+), 26 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java index 2d0db6ab24f9f..6a07b7b5f072e 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -36,7 +36,7 @@ * the resulting {@link TableInfo} in a {@code MetadataOnlyTable} to return from * {@link TableCatalog#loadTable(Identifier)}. * - * @since 4.1.0 + * @since 4.2.0 */ @Evolving public class MetadataOnlyTable implements Table { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index 357b3571d0acb..493ed2b2dbf9d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -344,6 +344,9 @@ class RelationResolution( || !v1Table.catalogTable.tracksPartitionsInCatalog => createDataSourceV1Scan(v1Table.v1Table) + // MetadataOnlyTable is a sentinel meaning "interpret via v1", so unlike the V1Table + // case above we apply no session-catalog / tracksPartitionsInCatalog guard — any catalog + // returning MetadataOnlyTable has opted into v1 read semantics. case t: MetadataOnlyTable => createDataSourceV1Scan(V1Table.toCatalogTable(catalog, ident, t)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index eaee334a01cbd..cc5c6ba90bd29 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -43,7 +43,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.connector.catalog.CatalogManager +import org.apache.spark.sql.connector.catalog.{CatalogManager, TableCatalog} import org.apache.spark.sql.connector.expressions.{ClusterByTransform, FieldReference, NamedReference, Transform} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf @@ -753,7 +753,7 @@ object CatalogTable { props.toMap } - val VIEW_SQL_CONFIG_PREFIX = VIEW_PREFIX + "sqlConfig." + val VIEW_SQL_CONFIG_PREFIX = TableCatalog.VIEW_CONF_PREFIX val VIEW_QUERY_OUTPUT_PREFIX = VIEW_PREFIX + "query.out." val VIEW_QUERY_OUTPUT_NUM_COLUMNS = VIEW_QUERY_OUTPUT_PREFIX + "numCols" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index d2343cf53211b..8486fa1d5f89a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -146,15 +146,20 @@ private[sql] object V1Table { Map.empty[String, String] } CatalogTable( + // CatalogTable.identifier uses a single-string database; for multi-part namespaces we + // preserve only the last part. The view-expansion path does not rely on this — it reads + // the captured catalog+namespace from PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE (translated + // below into V1's numbered keys) — so the narrowing only affects identifier rendering in + // error messages. identifier = TableIdentifier( table = ident.name(), - database = Some(ident.namespace().lastOption.getOrElse("root")), + database = ident.namespace().lastOption, catalog = Some(catalog.name())), tableType = tableType, storage = CatalogStorageFormat.empty.copy( locationUri = props.get(TableCatalog.PROP_LOCATION).map(CatalogUtils.stringToURI), // v2 table properties should be put into the serde properties as well in case - // it contains data source options. + // they contain data source options. properties = tablePropsMap ++ serdeProps.map { case (k, v) => k.drop(TableCatalog.OPTION_PREFIX.length) -> v } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 3bd5017cd7ecd..26955b4a849ad 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -517,6 +517,9 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) case AlterViewSchemaBinding(ResolvedViewIdentifier(ident), viewSchemaMode) => AlterViewSchemaBindingCommand(ident, viewSchemaMode) + // The final `_, _` are CreateView.isAnalyzed and referredTempFunctions. We drop both: + // CreateViewCommand is a separate AnalysisOnlyCommand and gets its own markAsAnalyzed pass + // from HandleSpecialCommand after this rewrite. case CreateView(CreateViewInSessionCatalog(ident), userSpecifiedColumns, comment, collation, properties, originalText, child, allowExisting, replace, viewSchemaMode, _, _) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 42ef6e7a40ba5..eb9e63e8614d5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -26,11 +26,11 @@ import org.apache.spark.SparkException import org.apache.spark.internal.Logging import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.{CapturesConfig, SQLConfHelper, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, GlobalTempView, LocalTempView, SchemaEvolution, SchemaUnsupported, ViewSchemaMode, ViewType} +import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, GlobalTempView, LocalTempView, ResolvedIdentifier, SchemaEvolution, SchemaUnsupported, ViewSchemaMode, ViewType} import org.apache.spark.sql.catalyst.analysis.V2TableReference import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, TemporaryViewRelation} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, SubqueryExpression, VariableReference} -import org.apache.spark.sql.catalyst.plans.logical.{AnalysisOnlyCommand, CreateTempView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE} +import org.apache.spark.sql.catalyst.plans.logical.{AnalysisOnlyCommand, CreateTempView, CreateView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.classic.ClassicConversions.castToImpl import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper @@ -842,3 +842,28 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { } } } + +/** + * Post-analysis check for v2 CREATE VIEW: rejects permanent views that reference temporary + * objects and rejects view bodies with auto-generated aliases. `referredTempFunctions` is + * captured by [[CreateView.markAsAnalyzed]] before this rule runs. The v1 counterpart + * [[CreateViewCommand]] keeps its existing exec-time checks — Dataset-built commands bypass + * the analyzer's re-capture path, so the exec-time safety net must stay for v1. + */ +object CheckViewReferences extends (LogicalPlan => Unit) { + import ViewHelper._ + + override def apply(plan: LogicalPlan): Unit = plan.foreach { + case cv: CreateView if cv.isAnalyzed => + val ident = cv.child.asInstanceOf[ResolvedIdentifier] + val legacyName = TableIdentifier( + table = ident.identifier.name(), + database = ident.identifier.namespace().lastOption, + catalog = Some(ident.catalog.name())) + verifyTemporaryObjectsNotExists( + isTemporary = false, legacyName, cv.query, cv.referredTempFunctions) + verifyAutoGeneratedAliasesNotExists(cv.query, isTemporary = false, legacyName) + + case _ => + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala index c2781bb32ea53..77c1641a663af 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -19,9 +19,10 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.jdk.CollectionConverters._ +import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.analysis.{SchemaEvolution, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ViewSchemaMode} import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, QuotingUtils, SchemaUtils} @@ -33,6 +34,12 @@ import org.apache.spark.util.Utils /** * Shared validation + TableInfo construction for v2 CREATE VIEW execs. + * + * Mirrors the persistent-view portion of v1 [[ViewHelper.prepareTable]] + the execution-time + * checks in [[CreateViewCommand.run]]. Any future addition on the v1 side — new view-specific + * reserved property, new validation, new schema-mode handling — must be mirrored here. + * Post-analysis checks for temp-object references and auto-generated aliases run once for both + * v1 and v2 in [[CheckViewReferences]]. */ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { def catalog: TableCatalog @@ -68,21 +75,17 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { throw QueryCompilationErrors.cannotCreateViewTooManyColumnsError( legacyName, userSpecifiedColumns.map(_._1), query) } + if (viewSchemaMode == SchemaEvolution) { + throw SparkException.internalError( + "View with user column list has viewSchemaMode EVOLUTION") + } } - // Reject permanent views referencing temporary objects. `referredTempFunctions` is captured - // from AnalysisContext via `CreateView.markAsAnalyzed` (CreateView extends AnalysisOnlyCommand). - verifyTemporaryObjectsNotExists( - isTemporary = false, legacyName, query, referredTempFunctions) - verifyAutoGeneratedAliasesNotExists(query, isTemporary = false, legacyName) SchemaUtils.checkIndeterminateCollationInSchema(query.schema) val aliasedSchema = CharVarcharUtils.getRawSchema( aliasPlan(session, query, userSpecifiedColumns).schema, session.sessionState.conf) - val tempViews = collectTemporaryViews(query) - val tempVars = collectTemporaryVariables(query) - // Emit PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE (single quoted multi-part identifier, // catalog as first part) instead of v1's numbered view.catalogAndNamespace.* keys. val v2Encoder: (String, Seq[String]) => Map[String, String] = { (cat, ns) => @@ -90,15 +93,16 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { Map(PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE -> QuotingUtils.quoted(parts)) } + // Temp-object collection arguments are omitted: persistent-view semantics are enforced by + // CheckViewReferences before this runs, so any referenced temp view/function/variable has + // already caused analysis to fail. This matches v1 ViewHelper.prepareTable, which also + // calls generateViewProperties without them on the persistent-view path. val viewProps = generateViewProperties( properties = userProperties, session = session, queryOutput = query.output.map(_.name).toArray, fieldNames = aliasedSchema.fieldNames, viewSchemaMode = viewSchemaMode, - tempViewNames = tempViews, - tempFunctionNames = referredTempFunctions, - tempVariableNames = tempVars, catalogAndNamespaceEncoder = v2Encoder) val builder = new TableInfo.Builder() @@ -144,7 +148,14 @@ case class CreateV2ViewExec( CommandUtils.uncacheTableOrView(session, legacyName) catalog.dropTable(identifier) } - catalog.createTable(identifier, info) + // TOCTOU: if another writer creates the table between tableExists and createTable, a bare + // TableAlreadyExistsException is unhelpful; present the same viewAlreadyExists error the + // atomic path uses. + try { + catalog.createTable(identifier, info) + } catch { + case _: TableAlreadyExistsException => throw viewAlreadyExists() + } Seq.empty } } @@ -171,10 +182,14 @@ case class AtomicCreateV2ViewExec( DataSourceV2Utils.commitMetrics(sparkContext, catalog) override protected def run(): Seq[InternalRow] = { + // Validate first (mirrors v1 CreateViewCommand.run and the non-atomic exec above) so a + // CREATE VIEW IF NOT EXISTS v AS with existing v fails the same way in + // both execs: the malformed view body is rejected even when the allow-existing short- + // circuit would otherwise skip creation. + val info = buildTableInfo() if (allowExisting && catalog.tableExists(identifier)) { return Seq.empty } - val info = buildTableInfo() val staged: StagedTable = if (replace) { if (catalog.tableExists(identifier)) { ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 9bd68cbe72a07..d8fe14a0664c1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.{ColumnarRule, CommandExecutionMode, Query import org.apache.spark.sql.execution.adaptive.AdaptiveRulesHolder import org.apache.spark.sql.execution.aggregate.{ResolveEncodersInScalaAgg, ScalaUDAF} import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin -import org.apache.spark.sql.execution.command.CommandCheck +import org.apache.spark.sql.execution.command.{CheckViewReferences, CommandCheck} import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.v2.{TableCapabilityCheck, V2SessionCatalog} import org.apache.spark.sql.execution.streaming.runtime.ResolveWriteToStream @@ -259,6 +259,7 @@ abstract class BaseSessionStateBuilder( HiveOnlyCheck +: TableCapabilityCheck +: CommandCheck +: + CheckViewReferences +: ViewSyncSchemaToMetaStore +: customCheckRules } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index 429f4378ea147..d22ffa748acb2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -17,10 +17,10 @@ package org.apache.spark.sql.connector -import org.apache.spark.SparkConf +import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.sql.{AnalysisException, QueryTest, Row} -import org.apache.spark.sql.catalyst.analysis.NoSuchTableException -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, Table, TableCatalog, TableCatalogCapability, TableChange, TableInfo, TableSummary} +import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableCatalogCapability, TableChange, TableInfo, TableSummary} import org.apache.spark.sql.connector.expressions.LogicalExpressions import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -183,6 +183,127 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi assert(ex.getCondition == "MISSING_CATALOG_ABILITY.VIEWS") } } + + test("CREATE VIEW rejects user column list with SCHEMA EVOLUTION") { + withTable("spark_catalog.default.t") { + Seq(1 -> 10).toDF("x", "y").write.saveAsTable("spark_catalog.default.t") + // The parser either rejects `v(a, b) WITH SCHEMA EVOLUTION` outright or lets it through + // to the exec, where `buildTableInfo` throws an internal error. Either is acceptable. + val ex = intercept[Exception] { + sql("CREATE VIEW general_catalog.default.v_evo (a, b) WITH SCHEMA EVOLUTION AS " + + "SELECT x, y FROM spark_catalog.default.t") + } + assert(ex.isInstanceOf[AnalysisException] || ex.isInstanceOf[SparkException]) + } + } + + test("CREATE VIEW rejects too-few / too-many user-specified columns") { + withTable("spark_catalog.default.t") { + Seq(1 -> 10).toDF("x", "y").write.saveAsTable("spark_catalog.default.t") + intercept[AnalysisException] { + sql("CREATE VIEW general_catalog.default.v_few (a) AS " + + "SELECT x, y FROM spark_catalog.default.t") + } + intercept[AnalysisException] { + sql("CREATE VIEW general_catalog.default.v_many (a, b, c) AS " + + "SELECT x, y FROM spark_catalog.default.t") + } + } + } + + test("CREATE VIEW rejects reference to a temporary function") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + spark.udf.register("temp_udf", (i: Int) => i + 1) + val ex = intercept[AnalysisException] { + sql("CREATE VIEW general_catalog.default.v_tempfn AS " + + "SELECT temp_udf(x) FROM spark_catalog.default.t") + } + assert(ex.getMessage.toLowerCase.contains("temporary")) + } + } + + test("CREATE VIEW rejects reference to a temporary view") { + withTempView("tv") { + spark.range(3).createOrReplaceTempView("tv") + val ex = intercept[AnalysisException] { + sql("CREATE VIEW general_catalog.default.v_tempview AS SELECT id FROM tv") + } + assert(ex.getMessage.toLowerCase.contains("temporary")) + } + } + + test("CREATE VIEW propagates DEFAULT COLLATION to TableInfo") { + withTable("spark_catalog.default.t") { + Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW general_catalog.default.v_coll DEFAULT COLLATION UTF8_BINARY AS " + + "SELECT col FROM spark_catalog.default.t") + // TestingGeneralCatalog stores the TableInfo verbatim, so the collation property is + // observable via the catalog-stored builder output. + val catalog = spark.sessionState.catalogManager.catalog("general_catalog") + .asInstanceOf[TestingGeneralCatalog] + val info = catalog.getStoredView(Array("default"), "v_coll") + assert(info.properties().get(TableCatalog.PROP_COLLATION) == "UTF8_BINARY") + } + } + + test("withCurrentCatalogAndNamespace clears the property when catalog is null or empty") { + val infoNull = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT 1 AS col") + .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) + .withCurrentCatalogAndNamespace(null, Array("ignored")) + .build() + assert(!infoNull.properties().containsKey( + TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) + + val infoEmpty = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT 1 AS col") + .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) + .withCurrentCatalogAndNamespace("", Array("ignored")) + .build() + assert(!infoEmpty.properties().containsKey( + TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) + } + + test("CREATE VIEW on a StagingTableCatalog uses the atomic exec") { + withSQLConf( + "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + + // Plain CREATE — exercises stageCreate. + sql("CREATE VIEW staging_catalog.default.v_atomic AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer( + spark.table("staging_catalog.default.v_atomic"), + Seq(Row(2), Row(3))) + + // Second CREATE without IF NOT EXISTS — should surface viewAlreadyExistsError + // (TestingStagingCatalog's stageCreate throws TableAlreadyExistsException, which the + // exec wraps). + val ex = intercept[AnalysisException] { + sql("CREATE VIEW staging_catalog.default.v_atomic AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + } + assert(ex.getMessage.toLowerCase.contains("already exists")) + + // CREATE OR REPLACE — exercises stageCreateOrReplace. + sql("CREATE OR REPLACE VIEW staging_catalog.default.v_atomic AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 2") + checkAnswer(spark.table("staging_catalog.default.v_atomic"), Row(3)) + + // CREATE IF NOT EXISTS on an existing view — no-op. After the PR reorders atomic to + // validate first, this should still succeed (the body is valid); the earlier behavior + // where a broken body was silently skipped no longer applies. + sql("CREATE VIEW IF NOT EXISTS staging_catalog.default.v_atomic AS " + + "SELECT x + 100 AS x FROM spark_catalog.default.t") + // Value unchanged — IF NOT EXISTS was a no-op. + checkAnswer(spark.table("staging_catalog.default.v_atomic"), Row(3)) + } + } + } } class TestingGeneralCatalog extends TableCatalog { @@ -254,11 +375,18 @@ class TestingGeneralCatalog extends TableCatalog { override def createTable(ident: Identifier, info: TableInfo): Table = { val key = (ident.namespace().toSeq, ident.name()) if (createdViews.putIfAbsent(key, info) != null) { - throw new org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException(ident) + throw new TableAlreadyExistsException(ident) } new MetadataOnlyTable(info) } + /** Test-only accessor: returns the stored TableInfo for a created view. */ + def getStoredView(namespace: Array[String], name: String): TableInfo = { + Option(createdViews.get((namespace.toSeq, name))).getOrElse { + throw new NoSuchTableException(Identifier.of(namespace, name)) + } + } + override def alterTable(ident: Identifier, changes: TableChange*): Table = { throw new RuntimeException("shouldn't be called") } @@ -280,6 +408,71 @@ class TestingGeneralCatalog extends TableCatalog { override def name(): String = catalogName } +/** + * A minimal [[StagingTableCatalog]] used to drive `AtomicCreateV2ViewExec`. Views are stored + * in a local map; staging commits write through, aborts discard. Supports SUPPORTS_CREATE_VIEW. + */ +class TestingStagingCatalog extends StagingTableCatalog { + + private val views = + new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() + + override def capabilities(): java.util.Set[TableCatalogCapability] = + java.util.Collections.singleton(TableCatalogCapability.SUPPORTS_CREATE_VIEW) + + private def keyOf(ident: Identifier): (Seq[String], String) = + (ident.namespace().toSeq, ident.name()) + + override def loadTable(ident: Identifier): Table = { + Option(views.get(keyOf(ident))).map(new MetadataOnlyTable(_)) + .getOrElse(throw new NoSuchTableException(ident)) + } + + override def tableExists(ident: Identifier): Boolean = views.containsKey(keyOf(ident)) + + override def createTable(ident: Identifier, info: TableInfo): Table = { + if (views.putIfAbsent(keyOf(ident), info) != null) { + throw new TableAlreadyExistsException(ident) + } + new MetadataOnlyTable(info) + } + + override def stageCreate(ident: Identifier, info: TableInfo): StagedTable = { + if (views.containsKey(keyOf(ident))) throw new TableAlreadyExistsException(ident) + new RecordingStagedTable(info, () => views.put(keyOf(ident), info), () => ()) + } + + override def stageReplace(ident: Identifier, info: TableInfo): StagedTable = { + if (!views.containsKey(keyOf(ident))) throw new NoSuchTableException(ident) + new RecordingStagedTable(info, () => views.put(keyOf(ident), info), () => ()) + } + + override def stageCreateOrReplace(ident: Identifier, info: TableInfo): StagedTable = { + new RecordingStagedTable(info, () => views.put(keyOf(ident), info), () => ()) + } + + override def alterTable(ident: Identifier, changes: TableChange*): Table = + throw new RuntimeException("shouldn't be called") + override def dropTable(ident: Identifier): Boolean = views.remove(keyOf(ident)) != null + override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = + throw new RuntimeException("shouldn't be called") + override def listTables(namespace: Array[String]): Array[Identifier] = Array.empty + + private var catalogName = "" + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + catalogName = name + } + override def name(): String = catalogName +} + +private class RecordingStagedTable( + info: TableInfo, + onCommit: () => Unit, + onAbort: () => Unit) extends MetadataOnlyTable(info) with StagedTable { + override def commitStagedChanges(): Unit = onCommit() + override def abortStagedChanges(): Unit = onAbort() +} + /** A v2 catalog that does not declare SUPPORTS_CREATE_VIEW. Used to exercise the capability * gate in `DataSourceV2Strategy`. */ class TestingTableOnlyCatalog extends TableCatalog { From 710b97ff452cbb4be9cb2032a81b71cee5a77cd9 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 12:26:21 +0000 Subject: [PATCH 16/59] add ALTER VIEW support for DS v2 catalogs - AlterViewAs: extend AnalysisOnlyCommand so referredTempFunctions is captured for the non-session path (mirrors the CreateView shape). - AlterV2ViewExec + AtomicAlterV2ViewExec: new physical plans. Non-atomic path loads the existing MetadataOnlyTable, builds a replacement TableInfo preserving user TBLPROPERTIES / comment / collation / schema-binding mode, then drop+create. Staging path uses stageReplace for atomic commit. - DataSourceV2Strategy: dispatch AlterViewAs(ResolvedPersistentView,...) to the new execs; gate on SUPPORTS_CREATE_VIEW (ALTER shares the capability with CREATE). - ResolvedViewIdentifier: replace the session-catalog assert with a guard so non-session persistent views fall through to v2 strategies instead of firing AssertionError. Previously a latent bug after the CREATE VIEW PR enabled non-session ResolvedPersistentView. - Analyzer.lookupTableOrView: drop the viewOnly non-session-catalog rejection; v2 catalogs can now expose views via MetadataOnlyTable, so the blanket rejection is wrong. Non-view results are still rejected downstream by UnresolvedView's expectViewNotTableError case. - CheckViewReferences: extend to AlterViewAs as well as CreateView; share a legacyNameFor helper that handles both ResolvedIdentifier (CreateView) and ResolvedPersistentView (AlterViewAs) children. - TableCatalogCapability.SUPPORTS_CREATE_VIEW: doc now covers ALTER VIEW. - Tests: ALTER VIEW end-to-end on both TableCatalog and StagingTableCatalog; missing-view error; temp-function rejection; TBLPROPERTIES preservation. Out of scope (follow-ups): ALTER VIEW SET/UNSET TBLPROPERTIES, RENAME, WITH SCHEMA BINDING for v2 catalogs. Co-authored-by: Isaac --- .../catalog/TableCatalogCapability.java | 16 +- .../sql/catalyst/analysis/Analyzer.scala | 7 +- .../analysis/ApplyDefaultCollation.scala | 2 +- .../catalyst/plans/logical/v2Commands.scala | 35 +++- .../analysis/ResolveSessionCatalog.scala | 11 +- .../spark/sql/execution/command/views.scala | 43 +++-- .../datasources/v2/AlterV2ViewExec.scala | 150 ++++++++++++++++++ .../datasources/v2/DataSourceV2Strategy.scala | 17 ++ .../DataSourceV2MetadataOnlyTableSuite.scala | 84 ++++++++++ 9 files changed, 334 insertions(+), 31 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java index bbec0a3b3ba77..0d63512b995fb 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java @@ -95,16 +95,20 @@ public enum TableCatalogCapability { SUPPORTS_CREATE_TABLE_WITH_IDENTITY_COLUMNS, /** - * Signals that the TableCatalog supports creating views via {@link TableCatalog#createTable} - * by accepting a {@link TableInfo} whose properties include {@link TableCatalog#PROP_VIEW_TEXT} - * (and related view keys: {@link TableCatalog#PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE} and + * Signals that the TableCatalog supports creating and altering views via + * {@link TableCatalog#createTable} by accepting a {@link TableInfo} whose properties include + * {@link TableCatalog#PROP_VIEW_TEXT} (and related view keys: + * {@link TableCatalog#PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE} and * {@link TableCatalog#VIEW_CONF_PREFIX}-prefixed SQL configs). *

* Catalogs declaring this capability must round-trip those properties and return a * {@link MetadataOnlyTable} from {@link TableCatalog#loadTable} so Spark's view resolution - * path can expand the view text. Without this capability, Spark rejects {@code CREATE VIEW} - * statements targeting the catalog up front rather than letting the catalog silently persist - * a table entry that cannot be read as a view. + * path can expand the view text. {@code ALTER VIEW ... AS} is implemented as a + * {@code dropTable} + {@code createTable} on a plain {@code TableCatalog}, or as + * {@link StagingTableCatalog#stageReplace} when the catalog also implements + * {@link StagingTableCatalog}. Without this capability, Spark rejects {@code CREATE VIEW} + * and {@code ALTER VIEW} statements targeting the catalog up front rather than letting the + * catalog silently persist a table entry that cannot be read as a view. */ SUPPORTS_CREATE_VIEW } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 1d031d58e2ae4..a71ac481f6fe7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1110,9 +1110,10 @@ class Analyzer( }.orElse { relationResolution.expandIdentifier(identifier) match { case CatalogAndIdentifier(catalog, ident) => - if (viewOnly && !CatalogV2Util.isSessionCatalog(catalog)) { - throw QueryCompilationErrors.catalogOperationNotSupported(catalog, "views") - } + // Previously view-only lookups rejected non-session catalogs outright. With + // `MetadataOnlyTable`, non-session catalogs can now expose views, so instead we + // let the lookup proceed and rely on the downstream match — a non-view result is + // converted into the standard `expectViewNotTableError` by UnresolvedView's caller. CatalogV2Util.loadTable(catalog, ident).map { case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) && v1Table.v1Table.tableType == CatalogTableType.VIEW => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala index 1a76b68289a72..3e8b507e4f6c0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala @@ -209,7 +209,7 @@ object ApplyDefaultCollation extends Rule[LogicalPlan] { // We match against ResolvedPersistentView because temporary views don't have a // schema/catalog. case alterViewAs@AlterViewAs(resolvedPersistentView@ResolvedPersistentView( - catalog: SupportsNamespaces, identifier, _), _, _) + catalog: SupportsNamespaces, identifier, _), _, _, _, _) if resolvedPersistentView.metadata.collation.isEmpty => val newResolvedPersistentView = resolvedPersistentView.copy( metadata = resolvedPersistentView.metadata.copy( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index dc10c20a92ec2..cafffdc7db823 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -1702,19 +1702,42 @@ case class RepairTable( /** * The logical plan of the ALTER VIEW ... AS command. + * + * Extends [[AnalysisOnlyCommand]] so [[Analyzer.HandleSpecialCommand]] captures + * `referredTempFunctions` from [[AnalysisContext]]; this list is needed by + * [[CheckViewReferences]] and by the v2 execs when the target is a non-session catalog. + * Session-catalog targets are still rewritten to [[AlterViewAsCommand]] by + * `ResolveSessionCatalog` and the captured value is dropped there (the v1 command re-captures). */ case class AlterViewAs( child: LogicalPlan, originalText: String, - query: LogicalPlan) extends BinaryCommand with CTEInChildren { - override def left: LogicalPlan = child - override def right: LogicalPlan = query + query: LogicalPlan, + isAnalyzed: Boolean = false, + referredTempFunctions: Seq[String] = Seq.empty) + extends Command with AnalysisOnlyCommand with CTEInChildren { + + override def childrenToAnalyze: Seq[LogicalPlan] = Seq(child, query) + + override def markAsAnalyzed(analysisContext: AnalysisContext): LogicalPlan = copy( + isAnalyzed = true, + referredTempFunctions = analysisContext.referredTempFunctionNames.toSeq) + override protected def withNewChildrenInternal( - newLeft: LogicalPlan, newRight: LogicalPlan): LogicalPlan = - copy(child = newLeft, query = newRight) + newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = { + assert(!isAnalyzed) + newChildren match { + case Seq(newChild, newQuery) => + copy(child = newChild, query = newQuery) + case others => + throw new SparkIllegalArgumentException( + errorClass = "_LEGACY_ERROR_TEMP_3218", + messageParameters = Map("others" -> others.toString())) + } + } override def withCTEDefs(cteDefs: Seq[CTERelationDef]): LogicalPlan = { - withNewChildren(Seq(child, WithCTE(query, cteDefs))) + copy(query = WithCTE(query, cteDefs)) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 26955b4a849ad..4707b7f30b3f7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -511,7 +511,10 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) location) => AlterTableSetLocationCommand(ident, Some(partitionSpec), location) - case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query) => + // The final `_, _` are AlterViewAs.isAnalyzed and referredTempFunctions. We drop both: + // AlterViewAsCommand is a separate AnalysisOnlyCommand and gets its own markAsAnalyzed pass + // from HandleSpecialCommand after this rewrite. + case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query, _, _) => AlterViewAsCommand(ident, originalText, query) case AlterViewSchemaBinding(ResolvedViewIdentifier(ident), viewSchemaMode) => @@ -767,9 +770,11 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) } object ResolvedViewIdentifier { + // Only matches session-catalog persistent views. Non-session-catalog persistent views + // (produced for `MetadataOnlyTable`) fall through so they can be picked up by v2 strategies + // rather than silently collapsed to a v1 `TableIdentifier`. def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match { - case ResolvedPersistentView(catalog, ident, _) => - assert(isSessionCatalog(catalog)) + case ResolvedPersistentView(catalog, ident, _) if isSessionCatalog(catalog) => Some(ident.asTableIdentifier.copy(catalog = Some(catalog.name))) case ResolvedTempView(ident, _) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index eb9e63e8614d5..d8bb4e1ea8fac 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -26,11 +26,11 @@ import org.apache.spark.SparkException import org.apache.spark.internal.Logging import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.{CapturesConfig, SQLConfHelper, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, GlobalTempView, LocalTempView, ResolvedIdentifier, SchemaEvolution, SchemaUnsupported, ViewSchemaMode, ViewType} +import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, GlobalTempView, LocalTempView, ResolvedIdentifier, ResolvedPersistentView, SchemaEvolution, SchemaUnsupported, ViewSchemaMode, ViewType} import org.apache.spark.sql.catalyst.analysis.V2TableReference import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, TemporaryViewRelation} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, SubqueryExpression, VariableReference} -import org.apache.spark.sql.catalyst.plans.logical.{AnalysisOnlyCommand, CreateTempView, CreateView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE} +import org.apache.spark.sql.catalyst.plans.logical.{AlterViewAs, AnalysisOnlyCommand, CreateTempView, CreateView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.classic.ClassicConversions.castToImpl import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper @@ -844,26 +844,45 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { } /** - * Post-analysis check for v2 CREATE VIEW: rejects permanent views that reference temporary - * objects and rejects view bodies with auto-generated aliases. `referredTempFunctions` is - * captured by [[CreateView.markAsAnalyzed]] before this rule runs. The v1 counterpart - * [[CreateViewCommand]] keeps its existing exec-time checks — Dataset-built commands bypass - * the analyzer's re-capture path, so the exec-time safety net must stay for v1. + * Post-analysis check for v2 CREATE VIEW / ALTER VIEW: rejects permanent views that reference + * temporary objects and rejects view bodies with auto-generated aliases. `referredTempFunctions` + * is captured by the command's `markAsAnalyzed` before this rule runs. The v1 counterparts + * [[CreateViewCommand]] and [[AlterViewAsCommand]] keep their existing exec-time checks — + * Dataset-built commands bypass the analyzer's re-capture path, so the exec-time safety net + * must stay for v1. */ object CheckViewReferences extends (LogicalPlan => Unit) { import ViewHelper._ + private def legacyNameFor(resolved: LogicalPlan): TableIdentifier = resolved match { + case ri: ResolvedIdentifier => + TableIdentifier( + table = ri.identifier.name(), + database = ri.identifier.namespace().lastOption, + catalog = Some(ri.catalog.name())) + case rpv: ResolvedPersistentView => + TableIdentifier( + table = rpv.identifier.name(), + database = rpv.identifier.namespace().lastOption, + catalog = Some(rpv.catalog.name())) + case other => + throw SparkException.internalError( + s"Unexpected child of view command: ${other.getClass.getName}") + } + override def apply(plan: LogicalPlan): Unit = plan.foreach { case cv: CreateView if cv.isAnalyzed => - val ident = cv.child.asInstanceOf[ResolvedIdentifier] - val legacyName = TableIdentifier( - table = ident.identifier.name(), - database = ident.identifier.namespace().lastOption, - catalog = Some(ident.catalog.name())) + val legacyName = legacyNameFor(cv.child) verifyTemporaryObjectsNotExists( isTemporary = false, legacyName, cv.query, cv.referredTempFunctions) verifyAutoGeneratedAliasesNotExists(cv.query, isTemporary = false, legacyName) + case av: AlterViewAs if av.isAnalyzed => + val legacyName = legacyNameFor(av.child) + verifyTemporaryObjectsNotExists( + isTemporary = false, legacyName, av.query, av.referredTempFunctions) + verifyAutoGeneratedAliasesNotExists(av.query, isTemporary = false, legacyName) + case _ => } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala new file mode 100644 index 0000000000000..e6174b2684a3e --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import scala.jdk.CollectionConverters._ + +import org.apache.spark.SparkException +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, SchemaBinding, SchemaCompensation, SchemaEvolution, SchemaTypeEvolution, SchemaUnsupported, TableAlreadyExistsException, ViewSchemaMode} +import org.apache.spark.sql.catalyst.catalog.CatalogTable.VIEW_SCHEMA_MODE +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, TableCatalog, TableInfo} +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.execution.command.{CommandUtils, ViewHelper} +import org.apache.spark.sql.execution.metric.SQLMetric +import org.apache.spark.util.Utils + +/** + * Shared bits for the v2 ALTER VIEW ... AS execs. Loads the existing view once via + * `existingInfo` and uses its properties to preserve user-set properties, comment, collation, + * and schema-binding mode when constructing the replacement `TableInfo`. A v2 identifier that + * does not resolve to a [[MetadataOnlyTable]] is rejected — the connector contract for catalogs + * with `SUPPORTS_CREATE_VIEW` is to round-trip `MetadataOnlyTable` from `loadTable`. + * + * `generateViewProperties` (invoked from `buildTableInfo`) strips the transient view keys + * (SQL configs, query column names, referred-temp names) from the inherited properties and + * re-emits them from the current session, matching v1 `AlterViewAsCommand.alterPermanentView`. + */ +private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { + protected lazy val existingInfo: TableInfo = { + val table = try { + catalog.loadTable(identifier) + } catch { + case _: NoSuchTableException => + throw QueryCompilationErrors.noSuchTableError(catalog.name(), identifier) + } + table match { + case mot: MetadataOnlyTable => mot.getTableInfo + case other => + // SUPPORTS_CREATE_VIEW requires catalogs to round-trip MetadataOnlyTable; getting + // anything else back is a catalog contract violation. + throw SparkException.internalError( + s"Expected MetadataOnlyTable from $catalog for $identifier, " + + s"got ${other.getClass.getName}") + } + } + + private def existingProp(key: String): Option[String] = + Option(existingInfo.properties.get(key)) + + // ALTER VIEW ... AS does not accept a user column list. + override def userSpecifiedColumns: Seq[(String, Option[String])] = Seq.empty + override def comment: Option[String] = existingProp(TableCatalog.PROP_COMMENT) + override def collation: Option[String] = existingProp(TableCatalog.PROP_COLLATION) + // Strip reserved keys; those become first-class `TableInfo` / `CatalogTable` fields or are + // re-emitted by `buildTableInfo` (view text, current-catalog-namespace, comment, collation). + // User TBLPROPERTIES and view.sqlConfig.* / view.query.out.* / view.referredTempNames / + // view.schemaMode pass through — generateViewProperties handles their cleanup + re-emit. + override def userProperties: Map[String, String] = + existingInfo.properties.asScala.toMap -- CatalogV2Util.TABLE_RESERVED_PROPERTIES + + override def viewSchemaMode: ViewSchemaMode = { + existingProp(VIEW_SCHEMA_MODE) match { + case Some(s) if s == SchemaBinding.toString => SchemaBinding + case Some(s) if s == SchemaEvolution.toString => SchemaEvolution + case Some(s) if s == SchemaTypeEvolution.toString => SchemaTypeEvolution + case Some(s) if s == SchemaCompensation.toString => SchemaCompensation + case _ => SchemaUnsupported + } + } +} + +/** + * Non-atomic ALTER VIEW for a plain [[TableCatalog]]: load existing, build replacement, + * check cyclic reference, uncache, drop, create. Between drop and create the view does not + * exist — catalogs that need atomicity should also implement [[StagingTableCatalog]]. + */ +case class AlterV2ViewExec( + catalog: TableCatalog, + identifier: Identifier, + originalText: String, + query: LogicalPlan, + referredTempFunctions: Seq[String]) extends V2AlterViewPreparation { + + override protected def run(): Seq[InternalRow] = { + // Force the lazy to load before building; surfaces NoSuchTableException as a proper error. + val _ = existingInfo + val info = buildTableInfo() + ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) + CommandUtils.uncacheTableOrView(session, legacyName) + catalog.dropTable(identifier) + try { + catalog.createTable(identifier, info) + } catch { + case _: TableAlreadyExistsException => throw viewAlreadyExists() + } + Seq.empty + } +} + +/** + * Atomic ALTER VIEW for a [[StagingTableCatalog]]: uses `stageReplace` + commit so the view + * metadata swap is atomic against concurrent readers. `stageReplace` throws + * [[NoSuchTableException]] when the view does not exist; we surface that as the standard + * no-such-table error. + */ +case class AtomicAlterV2ViewExec( + catalog: StagingTableCatalog, + identifier: Identifier, + originalText: String, + query: LogicalPlan, + referredTempFunctions: Seq[String]) extends V2AlterViewPreparation { + + override val metrics: Map[String, SQLMetric] = + DataSourceV2Utils.commitMetrics(sparkContext, catalog) + + override protected def run(): Seq[InternalRow] = { + val _ = existingInfo + val info = buildTableInfo() + ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) + CommandUtils.uncacheTableOrView(session, legacyName) + val staged: StagedTable = try { + catalog.stageReplace(identifier, info) + } catch { + case _: NoSuchTableException => + throw QueryCompilationErrors.noSuchTableError(catalog.name(), identifier) + } + Utils.tryWithSafeFinallyAndFailureCallbacks({ + DataSourceV2Utils.commitStagedChanges(sparkContext, staged, metrics) + })(catchBlock = { + staged.abortStagedChanges() + }) + Seq.empty + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index fa3eff2853ad6..bba8f514eaed5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -322,6 +322,23 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat referredTempFunctions) :: Nil } + case AlterViewAs(ResolvedPersistentView(catalog, ident, _), originalText, query, + _, referredTempFunctions) => + val tableCatalog = catalog.asTableCatalog + // Re-use the CREATE VIEW capability — a catalog able to create views via createTable + // must also be able to replace them via dropTable+createTable or stageReplace. + if (!tableCatalog.capabilities().contains(TableCatalogCapability.SUPPORTS_CREATE_VIEW)) { + throw QueryCompilationErrors.missingCatalogViewsAbilityError(tableCatalog) + } + tableCatalog match { + case staging: StagingTableCatalog => + AtomicAlterV2ViewExec( + staging, ident, originalText, query, referredTempFunctions) :: Nil + case _ => + AlterV2ViewExec( + tableCatalog, ident, originalText, query, referredTempFunctions) :: Nil + } + case ReplaceTableAsSelect(ResolvedIdentifier(catalog, ident), parts, query, tableSpec: TableSpec, options, orCreate, true) => catalog match { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index d22ffa748acb2..7b992c0df4a2c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -267,6 +267,90 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) } + test("ALTER VIEW ... AS updates the view body on a v2 catalog") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW general_catalog.default.v_alter AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 10") + checkAnswer(spark.table("general_catalog.default.v_alter"), Seq.empty[Row]) + + sql("ALTER VIEW general_catalog.default.v_alter AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer(spark.table("general_catalog.default.v_alter"), Seq(Row(2), Row(3))) + } + } + + test("ALTER VIEW on a missing view fails at analysis") { + // UnresolvedView resolves through lookupTableOrView and the missing view surfaces as an + // AnalysisException before we ever reach the v2 exec. The exact error condition (e.g. + // TABLE_OR_VIEW_NOT_FOUND) varies across Spark versions; we just assert we fail cleanly. + intercept[AnalysisException] { + sql("ALTER VIEW general_catalog.default.does_not_exist AS SELECT 1 AS x") + } + } + + test("ALTER VIEW rejects reference to a temporary function") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW general_catalog.default.v_alter_tempfn AS " + + "SELECT x FROM spark_catalog.default.t") + spark.udf.register("temp_udf_alter", (i: Int) => i + 1) + val ex = intercept[AnalysisException] { + sql("ALTER VIEW general_catalog.default.v_alter_tempfn AS " + + "SELECT temp_udf_alter(x) FROM spark_catalog.default.t") + } + assert(ex.getMessage.toLowerCase.contains("temporary")) + } + } + + test("ALTER VIEW preserves user-set TBLPROPERTIES") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW general_catalog.default.v_preserve " + + "TBLPROPERTIES ('mykey'='myvalue') AS " + + "SELECT x FROM spark_catalog.default.t") + sql("ALTER VIEW general_catalog.default.v_preserve AS " + + "SELECT x + 1 AS x FROM spark_catalog.default.t") + + val catalog = spark.sessionState.catalogManager.catalog("general_catalog") + .asInstanceOf[TestingGeneralCatalog] + val info = catalog.getStoredView(Array("default"), "v_preserve") + assert(info.properties().get("mykey") == "myvalue") + } + } + + test("ALTER VIEW on a StagingTableCatalog uses the atomic exec (stageReplace)") { + withSQLConf( + "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW staging_catalog.default.v_atomic_alter AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 10") + checkAnswer(spark.table("staging_catalog.default.v_atomic_alter"), Seq.empty[Row]) + + sql("ALTER VIEW staging_catalog.default.v_atomic_alter AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer( + spark.table("staging_catalog.default.v_atomic_alter"), + Seq(Row(2), Row(3))) + } + } + } + + test("ALTER VIEW on a catalog without SUPPORTS_CREATE_VIEW fails") { + // An identifier the TestingTableOnlyCatalog can't find — we never get past the view + // lookup stage, so the error here is the no-such-table / not-a-view path. The capability + // gate in DataSourceV2Strategy is only reachable once the existing view is resolvable, + // which this catalog can't do; the capability rejection is already exercised by the + // CREATE VIEW test above. + withSQLConf( + "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { + intercept[AnalysisException] { + sql("ALTER VIEW no_view_catalog.default.v AS SELECT 1") + } + } + } + test("CREATE VIEW on a StagingTableCatalog uses the atomic exec") { withSQLConf( "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { From eee8c494bfa10b1f04214c583d1d335f463121bb Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 12:34:15 +0000 Subject: [PATCH 17/59] rename SUPPORTS_CREATE_VIEW to SUPPORTS_VIEW The capability now gates both CREATE VIEW and ALTER VIEW, so the create-only name misrepresents the feature set. "SUPPORTS_VIEW" reads like the other TableCatalogCapability entries (SUPPORTS_CREATE_TABLE_* are about creation only; view support is the full lifecycle). Co-authored-by: Isaac --- .../connector/catalog/TableCatalogCapability.java | 4 ++-- .../execution/datasources/v2/AlterV2ViewExec.scala | 4 ++-- .../datasources/v2/DataSourceV2Strategy.scala | 4 ++-- .../DataSourceV2MetadataOnlyTableSuite.scala | 12 ++++++------ 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java index 0d63512b995fb..46afd1f1476f0 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java @@ -95,7 +95,7 @@ public enum TableCatalogCapability { SUPPORTS_CREATE_TABLE_WITH_IDENTITY_COLUMNS, /** - * Signals that the TableCatalog supports creating and altering views via + * Signals that the TableCatalog supports views. Views are created and altered via * {@link TableCatalog#createTable} by accepting a {@link TableInfo} whose properties include * {@link TableCatalog#PROP_VIEW_TEXT} (and related view keys: * {@link TableCatalog#PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE} and @@ -110,5 +110,5 @@ public enum TableCatalogCapability { * and {@code ALTER VIEW} statements targeting the catalog up front rather than letting the * catalog silently persist a table entry that cannot be read as a view. */ - SUPPORTS_CREATE_VIEW + SUPPORTS_VIEW } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala index e6174b2684a3e..132a3df18512f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -35,7 +35,7 @@ import org.apache.spark.util.Utils * `existingInfo` and uses its properties to preserve user-set properties, comment, collation, * and schema-binding mode when constructing the replacement `TableInfo`. A v2 identifier that * does not resolve to a [[MetadataOnlyTable]] is rejected — the connector contract for catalogs - * with `SUPPORTS_CREATE_VIEW` is to round-trip `MetadataOnlyTable` from `loadTable`. + * with `SUPPORTS_VIEW` is to round-trip `MetadataOnlyTable` from `loadTable`. * * `generateViewProperties` (invoked from `buildTableInfo`) strips the transient view keys * (SQL configs, query column names, referred-temp names) from the inherited properties and @@ -52,7 +52,7 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { table match { case mot: MetadataOnlyTable => mot.getTableInfo case other => - // SUPPORTS_CREATE_VIEW requires catalogs to round-trip MetadataOnlyTable; getting + // SUPPORTS_VIEW requires catalogs to round-trip MetadataOnlyTable; getting // anything else back is a catalog contract violation. throw SparkException.internalError( s"Expected MetadataOnlyTable from $catalog for $identifier, " + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index bba8f514eaed5..78a528454577c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -305,7 +305,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat collation, properties, originalText, child, allowExisting, replace, viewSchemaMode, _, referredTempFunctions) => val tableCatalog = catalog.asTableCatalog - if (!tableCatalog.capabilities().contains(TableCatalogCapability.SUPPORTS_CREATE_VIEW)) { + if (!tableCatalog.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW)) { throw QueryCompilationErrors.missingCatalogViewsAbilityError(tableCatalog) } val sqlText = originalText.getOrElse { @@ -327,7 +327,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat val tableCatalog = catalog.asTableCatalog // Re-use the CREATE VIEW capability — a catalog able to create views via createTable // must also be able to replace them via dropTable+createTable or stageReplace. - if (!tableCatalog.capabilities().contains(TableCatalogCapability.SUPPORTS_CREATE_VIEW)) { + if (!tableCatalog.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW)) { throw QueryCompilationErrors.missingCatalogViewsAbilityError(tableCatalog) } tableCatalog match { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index 7b992c0df4a2c..39d98d6d10bcb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -174,7 +174,7 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi } } - test("CREATE VIEW on a catalog without SUPPORTS_CREATE_VIEW fails") { + test("CREATE VIEW on a catalog without SUPPORTS_VIEW fails") { withSQLConf( "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { val ex = intercept[AnalysisException] { @@ -337,7 +337,7 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi } } - test("ALTER VIEW on a catalog without SUPPORTS_CREATE_VIEW fails") { + test("ALTER VIEW on a catalog without SUPPORTS_VIEW fails") { // An identifier the TestingTableOnlyCatalog can't find — we never get past the view // lookup stage, so the error here is the no-such-table / not-a-view path. The capability // gate in DataSourceV2Strategy is only reachable once the existing view is resolvable, @@ -397,7 +397,7 @@ class TestingGeneralCatalog extends TableCatalog { new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() override def capabilities(): java.util.Set[TableCatalogCapability] = - java.util.Collections.singleton(TableCatalogCapability.SUPPORTS_CREATE_VIEW) + java.util.Collections.singleton(TableCatalogCapability.SUPPORTS_VIEW) override def loadTable(ident: Identifier): Table = { val key = (ident.namespace().toSeq, ident.name()) @@ -494,7 +494,7 @@ class TestingGeneralCatalog extends TableCatalog { /** * A minimal [[StagingTableCatalog]] used to drive `AtomicCreateV2ViewExec`. Views are stored - * in a local map; staging commits write through, aborts discard. Supports SUPPORTS_CREATE_VIEW. + * in a local map; staging commits write through, aborts discard. Supports SUPPORTS_VIEW. */ class TestingStagingCatalog extends StagingTableCatalog { @@ -502,7 +502,7 @@ class TestingStagingCatalog extends StagingTableCatalog { new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() override def capabilities(): java.util.Set[TableCatalogCapability] = - java.util.Collections.singleton(TableCatalogCapability.SUPPORTS_CREATE_VIEW) + java.util.Collections.singleton(TableCatalogCapability.SUPPORTS_VIEW) private def keyOf(ident: Identifier): (Seq[String], String) = (ident.namespace().toSeq, ident.name()) @@ -557,7 +557,7 @@ private class RecordingStagedTable( override def abortStagedChanges(): Unit = onAbort() } -/** A v2 catalog that does not declare SUPPORTS_CREATE_VIEW. Used to exercise the capability +/** A v2 catalog that does not declare SUPPORTS_VIEW. Used to exercise the capability * gate in `DataSourceV2Strategy`. */ class TestingTableOnlyCatalog extends TableCatalog { override def loadTable(ident: Identifier): Table = throw new NoSuchTableException(ident) From 3ae6e65935c4d655934509d095720b4566d50fb5 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 14:16:44 +0000 Subject: [PATCH 18/59] address review findings: uncaching, viewSchemaMode, capability check, tests - uncacheTableOrView now uses ResolvedIdentifier overload so multi-level namespaces aren't narrowed to a single database part - V2AlterViewPreparation.viewSchemaMode delegates to CatalogTable.viewSchemaMode to match v1 defaults and honor viewSchemaBindingEnabled - drop unused referredTempFunctions field from V2 view execs - gate on TableCatalog + SUPPORTS_VIEW together in DataSourceV2Strategy so non-TableCatalog plugins still see MISSING_CATALOG_ABILITY.VIEWS - add tests for temp variable rejection and cyclic v2 view references - split DataSourceV2MetadataOnlyTableSuite into table-read and view suites - doc polish: PROP_VIEW_TEXT, MetadataOnlyTable javadoc, stale comments --- .../connector/catalog/MetadataOnlyTable.java | 7 +- .../sql/connector/catalog/TableCatalog.java | 6 +- .../sql/catalyst/analysis/Analyzer.scala | 2 +- .../analysis/RelationResolution.scala | 2 +- .../spark/sql/execution/command/views.scala | 2 +- .../datasources/v2/AlterV2ViewExec.scala | 53 +- .../datasources/v2/CreateV2ViewExec.scala | 22 +- .../datasources/v2/DataSourceV2Strategy.scala | 36 +- .../DataSourceV2MetadataOnlyTableSuite.scala | 544 ++--------------- .../DataSourceV2MetadataOnlyViewSuite.scala | 558 ++++++++++++++++++ 10 files changed, 674 insertions(+), 558 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java index 6a07b7b5f072e..b75e0fbe4101d 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -25,10 +25,9 @@ import org.apache.spark.sql.connector.expressions.Transform; /** - * A concrete {@code Table} implementation that only contains the table metadata without - * implementing read/write directly. It represents a general Spark data source table or - * a Spark view, and relies on Spark to interpret the table metadata, resolve the table - * provider into a data source, or read it as a view. + * A concrete {@code Table} implementation that contains only table metadata, deferring + * read/write to Spark. It represents a general Spark data source table or a Spark view; + * Spark resolves the table provider into a data source or expands the view text at read time. *

* Catalogs build the metadata via {@link TableInfo.Builder} (which provides convenience * setters for reserved properties such as {@link TableCatalog#PROP_PROVIDER}, diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index 25886f73317be..e60bd5b496b0d 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -88,9 +88,9 @@ public interface TableCatalog extends CatalogPlugin { String PROP_OWNER = "owner"; /** - * A reserved property to specify the view text of a general table that represents - * a SQL view. Unqualified identifiers in the view text are resolved against - * {@link #PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE} at read time. + * A reserved property that holds the SQL text of a view. Unqualified identifiers in the + * view text are resolved against {@link #PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE} at read + * time. */ String PROP_VIEW_TEXT = "view_text"; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index a71ac481f6fe7..b2672971cb5fb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1112,7 +1112,7 @@ class Analyzer( case CatalogAndIdentifier(catalog, ident) => // Previously view-only lookups rejected non-session catalogs outright. With // `MetadataOnlyTable`, non-session catalogs can now expose views, so instead we - // let the lookup proceed and rely on the downstream match — a non-view result is + // let the lookup proceed and rely on the downstream match -- a non-view result is // converted into the standard `expectViewNotTableError` by UnresolvedView's caller. CatalogV2Util.loadTable(catalog, ident).map { case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) && diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index 493ed2b2dbf9d..c88ad26a92c25 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -345,7 +345,7 @@ class RelationResolution( createDataSourceV1Scan(v1Table.v1Table) // MetadataOnlyTable is a sentinel meaning "interpret via v1", so unlike the V1Table - // case above we apply no session-catalog / tracksPartitionsInCatalog guard — any catalog + // case above we apply no session-catalog / tracksPartitionsInCatalog guard -- any catalog // returning MetadataOnlyTable has opted into v1 read semantics. case t: MetadataOnlyTable => createDataSourceV1Scan(V1Table.toCatalogTable(catalog, ident, t)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index d8bb4e1ea8fac..be8da6ded25f7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -847,7 +847,7 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { * Post-analysis check for v2 CREATE VIEW / ALTER VIEW: rejects permanent views that reference * temporary objects and rejects view bodies with auto-generated aliases. `referredTempFunctions` * is captured by the command's `markAsAnalyzed` before this rule runs. The v1 counterparts - * [[CreateViewCommand]] and [[AlterViewAsCommand]] keep their existing exec-time checks — + * [[CreateViewCommand]] and [[AlterViewAsCommand]] keep their existing exec-time checks -- * Dataset-built commands bypass the analyzer's re-capture path, so the exec-time safety net * must stay for v1. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala index 132a3df18512f..a23eef393b4c8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -21,10 +21,10 @@ import scala.jdk.CollectionConverters._ import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, SchemaBinding, SchemaCompensation, SchemaEvolution, SchemaTypeEvolution, SchemaUnsupported, TableAlreadyExistsException, ViewSchemaMode} -import org.apache.spark.sql.catalyst.catalog.CatalogTable.VIEW_SCHEMA_MODE +import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, ResolvedIdentifier, TableAlreadyExistsException, ViewSchemaMode} +import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, TableCatalog, TableInfo} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, TableCatalog, TableInfo, V1Table} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command.{CommandUtils, ViewHelper} import org.apache.spark.sql.execution.metric.SQLMetric @@ -32,9 +32,9 @@ import org.apache.spark.util.Utils /** * Shared bits for the v2 ALTER VIEW ... AS execs. Loads the existing view once via - * `existingInfo` and uses its properties to preserve user-set properties, comment, collation, + * `existingTable` and uses its properties to preserve user-set properties, comment, collation, * and schema-binding mode when constructing the replacement `TableInfo`. A v2 identifier that - * does not resolve to a [[MetadataOnlyTable]] is rejected — the connector contract for catalogs + * does not resolve to a [[MetadataOnlyTable]] is rejected -- the connector contract for catalogs * with `SUPPORTS_VIEW` is to round-trip `MetadataOnlyTable` from `loadTable`. * * `generateViewProperties` (invoked from `buildTableInfo`) strips the transient view keys @@ -42,7 +42,7 @@ import org.apache.spark.util.Utils * re-emits them from the current session, matching v1 `AlterViewAsCommand.alterPermanentView`. */ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { - protected lazy val existingInfo: TableInfo = { + protected lazy val existingTable: MetadataOnlyTable = { val table = try { catalog.loadTable(identifier) } catch { @@ -50,7 +50,7 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { throw QueryCompilationErrors.noSuchTableError(catalog.name(), identifier) } table match { - case mot: MetadataOnlyTable => mot.getTableInfo + case mot: MetadataOnlyTable => mot case other => // SUPPORTS_VIEW requires catalogs to round-trip MetadataOnlyTable; getting // anything else back is a catalog contract violation. @@ -60,6 +60,14 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { } } + protected lazy val existingInfo: TableInfo = existingTable.getTableInfo + + // Translate once through V1Table so we can delegate semantics like viewSchemaMode to the + // same logic the v1 read path uses (honors viewSchemaBindingEnabled, same default when the + // property is absent). + protected lazy val existingCatalogTable: CatalogTable = + V1Table.toCatalogTable(catalog, identifier, existingTable) + private def existingProp(key: String): Option[String] = Option(existingInfo.properties.get(key)) @@ -70,39 +78,31 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { // Strip reserved keys; those become first-class `TableInfo` / `CatalogTable` fields or are // re-emitted by `buildTableInfo` (view text, current-catalog-namespace, comment, collation). // User TBLPROPERTIES and view.sqlConfig.* / view.query.out.* / view.referredTempNames / - // view.schemaMode pass through — generateViewProperties handles their cleanup + re-emit. + // view.schemaMode pass through -- generateViewProperties handles their cleanup + re-emit. override def userProperties: Map[String, String] = existingInfo.properties.asScala.toMap -- CatalogV2Util.TABLE_RESERVED_PROPERTIES - override def viewSchemaMode: ViewSchemaMode = { - existingProp(VIEW_SCHEMA_MODE) match { - case Some(s) if s == SchemaBinding.toString => SchemaBinding - case Some(s) if s == SchemaEvolution.toString => SchemaEvolution - case Some(s) if s == SchemaTypeEvolution.toString => SchemaTypeEvolution - case Some(s) if s == SchemaCompensation.toString => SchemaCompensation - case _ => SchemaUnsupported - } - } + override def viewSchemaMode: ViewSchemaMode = existingCatalogTable.viewSchemaMode } /** * Non-atomic ALTER VIEW for a plain [[TableCatalog]]: load existing, build replacement, * check cyclic reference, uncache, drop, create. Between drop and create the view does not - * exist — catalogs that need atomicity should also implement [[StagingTableCatalog]]. + * exist -- catalogs that need atomicity should also implement [[StagingTableCatalog]]. */ case class AlterV2ViewExec( catalog: TableCatalog, identifier: Identifier, originalText: String, - query: LogicalPlan, - referredTempFunctions: Seq[String]) extends V2AlterViewPreparation { + query: LogicalPlan) extends V2AlterViewPreparation { override protected def run(): Seq[InternalRow] = { - // Force the lazy to load before building; surfaces NoSuchTableException as a proper error. - val _ = existingInfo + // Force evaluation of the existingTable lazy val so NoSuchTableException surfaces before + // we do any other work. + val _ = existingTable val info = buildTableInfo() ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) - CommandUtils.uncacheTableOrView(session, legacyName) + CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) catalog.dropTable(identifier) try { catalog.createTable(identifier, info) @@ -123,17 +123,16 @@ case class AtomicAlterV2ViewExec( catalog: StagingTableCatalog, identifier: Identifier, originalText: String, - query: LogicalPlan, - referredTempFunctions: Seq[String]) extends V2AlterViewPreparation { + query: LogicalPlan) extends V2AlterViewPreparation { override val metrics: Map[String, SQLMetric] = DataSourceV2Utils.commitMetrics(sparkContext, catalog) override protected def run(): Seq[InternalRow] = { - val _ = existingInfo + val _ = existingTable val info = buildTableInfo() ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) - CommandUtils.uncacheTableOrView(session, legacyName) + CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) val staged: StagedTable = try { catalog.stageReplace(identifier, info) } catch { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala index 77c1641a663af..aeea54937e79e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -22,22 +22,23 @@ import scala.jdk.CollectionConverters._ import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.{SchemaEvolution, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.analysis.{ResolvedIdentifier, SchemaEvolution, TableAlreadyExistsException, ViewSchemaMode} import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ViewSchemaMode} -import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, QuotingUtils, SchemaUtils} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, QuotingUtils} import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, TableCatalog, TableInfo} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command.{CommandUtils, ViewHelper} import org.apache.spark.sql.execution.metric.SQLMetric +import org.apache.spark.sql.util.SchemaUtils import org.apache.spark.util.Utils /** * Shared validation + TableInfo construction for v2 CREATE VIEW execs. * * Mirrors the persistent-view portion of v1 [[ViewHelper.prepareTable]] + the execution-time - * checks in [[CreateViewCommand.run]]. Any future addition on the v1 side — new view-specific - * reserved property, new validation, new schema-mode handling — must be mirrored here. + * checks in [[CreateViewCommand.run]]. Any future addition on the v1 side -- new view-specific + * reserved property, new validation, new schema-mode handling -- must be mirrored here. * Post-analysis checks for temp-object references and auto-generated aliases run once for both * v1 and v2 in [[CheckViewReferences]]. */ @@ -51,7 +52,6 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { def originalText: String def query: LogicalPlan def viewSchemaMode: ViewSchemaMode - def referredTempFunctions: Seq[String] // Build a synthetic v1 TableIdentifier for error messages and for ViewHelper methods that // accept it purely for rendering. This carries no semantic weight - the v2 Identifier is the @@ -133,8 +133,7 @@ case class CreateV2ViewExec( query: LogicalPlan, allowExisting: Boolean, replace: Boolean, - viewSchemaMode: ViewSchemaMode, - referredTempFunctions: Seq[String]) extends V2ViewPreparation { + viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { override protected def run(): Seq[InternalRow] = { val info = buildTableInfo() @@ -145,7 +144,7 @@ case class CreateV2ViewExec( } if (!replace) throw viewAlreadyExists() ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) - CommandUtils.uncacheTableOrView(session, legacyName) + CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) catalog.dropTable(identifier) } // TOCTOU: if another writer creates the table between tableExists and createTable, a bare @@ -175,8 +174,7 @@ case class AtomicCreateV2ViewExec( query: LogicalPlan, allowExisting: Boolean, replace: Boolean, - viewSchemaMode: ViewSchemaMode, - referredTempFunctions: Seq[String]) extends V2ViewPreparation { + viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { override val metrics: Map[String, SQLMetric] = DataSourceV2Utils.commitMetrics(sparkContext, catalog) @@ -193,7 +191,7 @@ case class AtomicCreateV2ViewExec( val staged: StagedTable = if (replace) { if (catalog.tableExists(identifier)) { ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) - CommandUtils.uncacheTableOrView(session, legacyName) + CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) } catalog.stageCreateOrReplace(identifier, info) } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 78a528454577c..55ec13eb394e4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -303,10 +303,14 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case CreateView(ResolvedIdentifier(catalog, ident), userSpecifiedColumns, comment, collation, properties, originalText, child, allowExisting, replace, viewSchemaMode, - _, referredTempFunctions) => - val tableCatalog = catalog.asTableCatalog - if (!tableCatalog.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW)) { - throw QueryCompilationErrors.missingCatalogViewsAbilityError(tableCatalog) + _, _) => + // Gate on TableCatalog + SUPPORTS_VIEW together so non-TableCatalog plugins still + // surface the VIEWS-specific error (instead of the generic TABLES error that + // asTableCatalog would throw). + val tableCatalog = catalog match { + case tc: TableCatalog + if tc.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) => tc + case _ => throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) } val sqlText = originalText.getOrElse { throw QueryCompilationErrors.createPersistedViewFromDatasetAPINotAllowedError() @@ -314,29 +318,25 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat tableCatalog match { case staging: StagingTableCatalog => AtomicCreateV2ViewExec(staging, ident, userSpecifiedColumns, comment, collation, - properties, sqlText, child, allowExisting, replace, viewSchemaMode, - referredTempFunctions) :: Nil + properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil case _ => CreateV2ViewExec(tableCatalog, ident, userSpecifiedColumns, comment, collation, - properties, sqlText, child, allowExisting, replace, viewSchemaMode, - referredTempFunctions) :: Nil + properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil } - case AlterViewAs(ResolvedPersistentView(catalog, ident, _), originalText, query, - _, referredTempFunctions) => - val tableCatalog = catalog.asTableCatalog - // Re-use the CREATE VIEW capability — a catalog able to create views via createTable + case AlterViewAs(ResolvedPersistentView(catalog, ident, _), originalText, query, _, _) => + // Re-use the CREATE VIEW capability -- a catalog able to create views via createTable // must also be able to replace them via dropTable+createTable or stageReplace. - if (!tableCatalog.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW)) { - throw QueryCompilationErrors.missingCatalogViewsAbilityError(tableCatalog) + val tableCatalog = catalog match { + case tc: TableCatalog + if tc.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) => tc + case _ => throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) } tableCatalog match { case staging: StagingTableCatalog => - AtomicAlterV2ViewExec( - staging, ident, originalText, query, referredTempFunctions) :: Nil + AtomicAlterV2ViewExec(staging, ident, originalText, query) :: Nil case _ => - AlterV2ViewExec( - tableCatalog, ident, originalText, query, referredTempFunctions) :: Nil + AlterV2ViewExec(tableCatalog, ident, originalText, query) :: Nil } case ReplaceTableAsSelect(ResolvedIdentifier(catalog, ident), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index 39d98d6d10bcb..4afa9f7ce97f3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -17,26 +17,32 @@ package org.apache.spark.sql.connector -import org.apache.spark.{SparkConf, SparkException} -import org.apache.spark.sql.{AnalysisException, QueryTest, Row} -import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException} -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableCatalogCapability, TableChange, TableInfo, TableSummary} +import org.apache.spark.SparkConf +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, Table, TableCatalog, TableChange, TableInfo, TableSummary} import org.apache.spark.sql.connector.expressions.LogicalExpressions -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap +/** + * Tests for the data-source-table side of [[MetadataOnlyTable]]: a v2 catalog returns + * metadata-only tables and Spark reads / writes them via the V1 data-source path. + * View-related paths live in [[DataSourceV2MetadataOnlyViewSuite]]. + */ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSession { import testImplicits._ override def sparkConf: SparkConf = super.sparkConf - .set("spark.sql.catalog.general_catalog", classOf[TestingGeneralCatalog].getName) + .set( + "spark.sql.catalog.table_catalog", + classOf[TestingDataSourceTableCatalog].getName) test("file source table") { withTempPath { path => val loc = path.getCanonicalPath - val tableName = s"general_catalog.`$loc`.test_json" + val tableName = s"table_catalog.`$loc`.test_json" spark.range(10).select($"id".cast("string").as("col")).write.json(loc) checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i.toString))) @@ -52,7 +58,7 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi test("partitioned file source table") { withTempPath { path => val loc = path.getCanonicalPath - val tableName = s"general_catalog.`$loc`.test_partitioned_json" + val tableName = s"table_catalog.`$loc`.test_partitioned_json" Seq(1 -> 1, 2 -> 1).toDF("c1", "c2").write.partitionBy("c2").json(loc) checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1))) @@ -73,500 +79,56 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi // TODO: move the v2 data source table handling from V2SessionCatalog to the analyzer ignore("v2 data source table") { - val tableName = "general_catalog.default.test_v2" + val tableName = "table_catalog.default.test_v2" checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i, -i))) } - - test("general table as view") { - // TODO: support creating views. - withTable("spark_catalog.default.t") { - Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") - // Make sure the view config applies correctly. - intercept[Exception](spark.table("general_catalog.ansi.test_view").collect()) - checkAnswer(spark.table("general_catalog.non_ansi.test_view"), Row("b", null)) - } - } - - test("general table as view with stored current catalog/namespace") { - withTable("spark_catalog.default.t") { - Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") - // View text uses the unqualified name `t`; it resolves via the stored - // current catalog / namespace properties. - checkAnswer(spark.table("general_catalog.ns.test_unqualified_view"), Row("b")) - } - } - - test("view current catalog/namespace are serialized into a single property") { - val info = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT * FROM t") - .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) - .build() - val table = new MetadataOnlyTable(info) - assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) == - "spark_catalog.default") - } - - test("view current catalog/namespace quotes multi-part names with dots") { - val info = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT * FROM t") - .withCurrentCatalogAndNamespace("spark_catalog", Array("weird.db", "normal")) - .build() - val table = new MetadataOnlyTable(info) - assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) == - "spark_catalog.`weird.db`.normal") - } - - test("view with no current catalog/namespace omits the property") { - val info = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT * FROM spark_catalog.default.t") - .build() - val table = new MetadataOnlyTable(info) - assert(!table.properties().containsKey( - TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) - } - - test("CREATE VIEW on a v2 catalog") { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - sql("CREATE VIEW general_catalog.default.my_view AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 1") - checkAnswer(spark.table("general_catalog.default.my_view"), Seq(Row(2), Row(3))) - } - } - - test("CREATE VIEW IF NOT EXISTS is a no-op when the view exists") { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - sql("CREATE VIEW general_catalog.default.v_ifne AS " + - "SELECT x FROM spark_catalog.default.t") - // Re-running with IF NOT EXISTS should not fail and should not change the view. - sql("CREATE VIEW IF NOT EXISTS general_catalog.default.v_ifne AS " + - "SELECT x + 100 AS x FROM spark_catalog.default.t") - checkAnswer(spark.table("general_catalog.default.v_ifne"), - Seq(Row(1), Row(2), Row(3))) - } - } - - test("CREATE VIEW without IF NOT EXISTS fails when the view exists") { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - sql("CREATE VIEW general_catalog.default.v_dup AS " + - "SELECT x FROM spark_catalog.default.t") - intercept[AnalysisException] { - sql("CREATE VIEW general_catalog.default.v_dup AS " + - "SELECT x FROM spark_catalog.default.t") - } - } - } - - test("CREATE OR REPLACE VIEW replaces an existing view") { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - sql("CREATE VIEW general_catalog.default.v_replace AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 10") - checkAnswer(spark.table("general_catalog.default.v_replace"), Seq.empty[Row]) - sql("CREATE OR REPLACE VIEW general_catalog.default.v_replace AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 1") - checkAnswer(spark.table("general_catalog.default.v_replace"), Seq(Row(2), Row(3))) - } - } - - test("CREATE VIEW on a catalog without SUPPORTS_VIEW fails") { - withSQLConf( - "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { - val ex = intercept[AnalysisException] { - sql("CREATE VIEW no_view_catalog.default.v AS SELECT 1") - } - assert(ex.getCondition == "MISSING_CATALOG_ABILITY.VIEWS") - } - } - - test("CREATE VIEW rejects user column list with SCHEMA EVOLUTION") { - withTable("spark_catalog.default.t") { - Seq(1 -> 10).toDF("x", "y").write.saveAsTable("spark_catalog.default.t") - // The parser either rejects `v(a, b) WITH SCHEMA EVOLUTION` outright or lets it through - // to the exec, where `buildTableInfo` throws an internal error. Either is acceptable. - val ex = intercept[Exception] { - sql("CREATE VIEW general_catalog.default.v_evo (a, b) WITH SCHEMA EVOLUTION AS " + - "SELECT x, y FROM spark_catalog.default.t") - } - assert(ex.isInstanceOf[AnalysisException] || ex.isInstanceOf[SparkException]) - } - } - - test("CREATE VIEW rejects too-few / too-many user-specified columns") { - withTable("spark_catalog.default.t") { - Seq(1 -> 10).toDF("x", "y").write.saveAsTable("spark_catalog.default.t") - intercept[AnalysisException] { - sql("CREATE VIEW general_catalog.default.v_few (a) AS " + - "SELECT x, y FROM spark_catalog.default.t") - } - intercept[AnalysisException] { - sql("CREATE VIEW general_catalog.default.v_many (a, b, c) AS " + - "SELECT x, y FROM spark_catalog.default.t") - } - } - } - - test("CREATE VIEW rejects reference to a temporary function") { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - spark.udf.register("temp_udf", (i: Int) => i + 1) - val ex = intercept[AnalysisException] { - sql("CREATE VIEW general_catalog.default.v_tempfn AS " + - "SELECT temp_udf(x) FROM spark_catalog.default.t") - } - assert(ex.getMessage.toLowerCase.contains("temporary")) - } - } - - test("CREATE VIEW rejects reference to a temporary view") { - withTempView("tv") { - spark.range(3).createOrReplaceTempView("tv") - val ex = intercept[AnalysisException] { - sql("CREATE VIEW general_catalog.default.v_tempview AS SELECT id FROM tv") - } - assert(ex.getMessage.toLowerCase.contains("temporary")) - } - } - - test("CREATE VIEW propagates DEFAULT COLLATION to TableInfo") { - withTable("spark_catalog.default.t") { - Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") - sql("CREATE VIEW general_catalog.default.v_coll DEFAULT COLLATION UTF8_BINARY AS " + - "SELECT col FROM spark_catalog.default.t") - // TestingGeneralCatalog stores the TableInfo verbatim, so the collation property is - // observable via the catalog-stored builder output. - val catalog = spark.sessionState.catalogManager.catalog("general_catalog") - .asInstanceOf[TestingGeneralCatalog] - val info = catalog.getStoredView(Array("default"), "v_coll") - assert(info.properties().get(TableCatalog.PROP_COLLATION) == "UTF8_BINARY") - } - } - - test("withCurrentCatalogAndNamespace clears the property when catalog is null or empty") { - val infoNull = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT 1 AS col") - .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) - .withCurrentCatalogAndNamespace(null, Array("ignored")) - .build() - assert(!infoNull.properties().containsKey( - TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) - - val infoEmpty = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT 1 AS col") - .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) - .withCurrentCatalogAndNamespace("", Array("ignored")) - .build() - assert(!infoEmpty.properties().containsKey( - TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) - } - - test("ALTER VIEW ... AS updates the view body on a v2 catalog") { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - sql("CREATE VIEW general_catalog.default.v_alter AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 10") - checkAnswer(spark.table("general_catalog.default.v_alter"), Seq.empty[Row]) - - sql("ALTER VIEW general_catalog.default.v_alter AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 1") - checkAnswer(spark.table("general_catalog.default.v_alter"), Seq(Row(2), Row(3))) - } - } - - test("ALTER VIEW on a missing view fails at analysis") { - // UnresolvedView resolves through lookupTableOrView and the missing view surfaces as an - // AnalysisException before we ever reach the v2 exec. The exact error condition (e.g. - // TABLE_OR_VIEW_NOT_FOUND) varies across Spark versions; we just assert we fail cleanly. - intercept[AnalysisException] { - sql("ALTER VIEW general_catalog.default.does_not_exist AS SELECT 1 AS x") - } - } - - test("ALTER VIEW rejects reference to a temporary function") { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - sql("CREATE VIEW general_catalog.default.v_alter_tempfn AS " + - "SELECT x FROM spark_catalog.default.t") - spark.udf.register("temp_udf_alter", (i: Int) => i + 1) - val ex = intercept[AnalysisException] { - sql("ALTER VIEW general_catalog.default.v_alter_tempfn AS " + - "SELECT temp_udf_alter(x) FROM spark_catalog.default.t") - } - assert(ex.getMessage.toLowerCase.contains("temporary")) - } - } - - test("ALTER VIEW preserves user-set TBLPROPERTIES") { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - sql("CREATE VIEW general_catalog.default.v_preserve " + - "TBLPROPERTIES ('mykey'='myvalue') AS " + - "SELECT x FROM spark_catalog.default.t") - sql("ALTER VIEW general_catalog.default.v_preserve AS " + - "SELECT x + 1 AS x FROM spark_catalog.default.t") - - val catalog = spark.sessionState.catalogManager.catalog("general_catalog") - .asInstanceOf[TestingGeneralCatalog] - val info = catalog.getStoredView(Array("default"), "v_preserve") - assert(info.properties().get("mykey") == "myvalue") - } - } - - test("ALTER VIEW on a StagingTableCatalog uses the atomic exec (stageReplace)") { - withSQLConf( - "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - sql("CREATE VIEW staging_catalog.default.v_atomic_alter AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 10") - checkAnswer(spark.table("staging_catalog.default.v_atomic_alter"), Seq.empty[Row]) - - sql("ALTER VIEW staging_catalog.default.v_atomic_alter AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 1") - checkAnswer( - spark.table("staging_catalog.default.v_atomic_alter"), - Seq(Row(2), Row(3))) - } - } - } - - test("ALTER VIEW on a catalog without SUPPORTS_VIEW fails") { - // An identifier the TestingTableOnlyCatalog can't find — we never get past the view - // lookup stage, so the error here is the no-such-table / not-a-view path. The capability - // gate in DataSourceV2Strategy is only reachable once the existing view is resolvable, - // which this catalog can't do; the capability rejection is already exercised by the - // CREATE VIEW test above. - withSQLConf( - "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { - intercept[AnalysisException] { - sql("ALTER VIEW no_view_catalog.default.v AS SELECT 1") - } - } - } - - test("CREATE VIEW on a StagingTableCatalog uses the atomic exec") { - withSQLConf( - "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - - // Plain CREATE — exercises stageCreate. - sql("CREATE VIEW staging_catalog.default.v_atomic AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 1") - checkAnswer( - spark.table("staging_catalog.default.v_atomic"), - Seq(Row(2), Row(3))) - - // Second CREATE without IF NOT EXISTS — should surface viewAlreadyExistsError - // (TestingStagingCatalog's stageCreate throws TableAlreadyExistsException, which the - // exec wraps). - val ex = intercept[AnalysisException] { - sql("CREATE VIEW staging_catalog.default.v_atomic AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 1") - } - assert(ex.getMessage.toLowerCase.contains("already exists")) - - // CREATE OR REPLACE — exercises stageCreateOrReplace. - sql("CREATE OR REPLACE VIEW staging_catalog.default.v_atomic AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 2") - checkAnswer(spark.table("staging_catalog.default.v_atomic"), Row(3)) - - // CREATE IF NOT EXISTS on an existing view — no-op. After the PR reorders atomic to - // validate first, this should still succeed (the body is valid); the earlier behavior - // where a broken body was silently skipped no longer applies. - sql("CREATE VIEW IF NOT EXISTS staging_catalog.default.v_atomic AS " + - "SELECT x + 100 AS x FROM spark_catalog.default.t") - // Value unchanged — IF NOT EXISTS was a no-op. - checkAnswer(spark.table("staging_catalog.default.v_atomic"), Row(3)) - } - } - } -} - -class TestingGeneralCatalog extends TableCatalog { - - // Holds views created via createTable within the session. Keyed by (namespace, name). - private val createdViews = - new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() - - override def capabilities(): java.util.Set[TableCatalogCapability] = - java.util.Collections.singleton(TableCatalogCapability.SUPPORTS_VIEW) - - override def loadTable(ident: Identifier): Table = { - val key = (ident.namespace().toSeq, ident.name()) - Option(createdViews.get(key)).map(new MetadataOnlyTable(_)).getOrElse { - ident.name() match { - case "test_json" => - val info = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withProvider("json") - .withLocation(ident.namespace().head) - .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) - .build() - new MetadataOnlyTable(info) - case "test_partitioned_json" => - val partitioning = LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2"))) - val info = new TableInfo.Builder() - .withSchema(new StructType().add("c1", "int").add("c2", "int")) - .withProvider("json") - .withLocation(ident.namespace().head) - .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) - .withPartitions(Array(partitioning)) - .build() - new MetadataOnlyTable(info) - case "test_v2" => - val info = new TableInfo.Builder() - .withSchema(FakeV2Provider.schema) - .withProvider(classOf[FakeV2Provider].getName) - .build() - new MetadataOnlyTable(info) - case "test_view" => - val viewProps = new java.util.HashMap[String, String]() - viewProps.put( - TableCatalog.VIEW_CONF_PREFIX + SQLConf.ANSI_ENABLED.key, - (ident.namespace().head == "ansi").toString) - val info = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string").add("i", "int")) - .withProperties(viewProps) - .withViewText( - "SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") - .build() - new MetadataOnlyTable(info) - case "test_unqualified_view" => - val info = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT col FROM t WHERE col = 'b'") - .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) - .build() - new MetadataOnlyTable(info) - case _ => throw new NoSuchTableException(ident) - } - } - } - - override def tableExists(ident: Identifier): Boolean = { - val key = (ident.namespace().toSeq, ident.name()) - createdViews.containsKey(key) || super.tableExists(ident) - } - - override def createTable(ident: Identifier, info: TableInfo): Table = { - val key = (ident.namespace().toSeq, ident.name()) - if (createdViews.putIfAbsent(key, info) != null) { - throw new TableAlreadyExistsException(ident) - } - new MetadataOnlyTable(info) - } - - /** Test-only accessor: returns the stored TableInfo for a created view. */ - def getStoredView(namespace: Array[String], name: String): TableInfo = { - Option(createdViews.get((namespace.toSeq, name))).getOrElse { - throw new NoSuchTableException(Identifier.of(namespace, name)) - } - } - - override def alterTable(ident: Identifier, changes: TableChange*): Table = { - throw new RuntimeException("shouldn't be called") - } - override def dropTable(ident: Identifier): Boolean = { - val key = (ident.namespace().toSeq, ident.name()) - createdViews.remove(key) != null - } - override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = { - throw new RuntimeException("shouldn't be called") - } - override def listTables(namespace: Array[String]): Array[Identifier] = { - throw new RuntimeException("shouldn't be called") - } - - private var catalogName = "" - override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { - catalogName = name - } - override def name(): String = catalogName } /** - * A minimal [[StagingTableCatalog]] used to drive `AtomicCreateV2ViewExec`. Views are stored - * in a local map; staging commits write through, aborts discard. Supports SUPPORTS_VIEW. + * A read-only [[TableCatalog]] that returns [[MetadataOnlyTable]] for a small set of canned + * table fixtures. Used to drive the data-source-table read path (file source + v2 provider) + * through Spark's V1 data-source machinery. */ -class TestingStagingCatalog extends StagingTableCatalog { - - private val views = - new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() - - override def capabilities(): java.util.Set[TableCatalogCapability] = - java.util.Collections.singleton(TableCatalogCapability.SUPPORTS_VIEW) - - private def keyOf(ident: Identifier): (Seq[String], String) = - (ident.namespace().toSeq, ident.name()) - - override def loadTable(ident: Identifier): Table = { - Option(views.get(keyOf(ident))).map(new MetadataOnlyTable(_)) - .getOrElse(throw new NoSuchTableException(ident)) - } - - override def tableExists(ident: Identifier): Boolean = views.containsKey(keyOf(ident)) - - override def createTable(ident: Identifier, info: TableInfo): Table = { - if (views.putIfAbsent(keyOf(ident), info) != null) { - throw new TableAlreadyExistsException(ident) - } - new MetadataOnlyTable(info) - } - - override def stageCreate(ident: Identifier, info: TableInfo): StagedTable = { - if (views.containsKey(keyOf(ident))) throw new TableAlreadyExistsException(ident) - new RecordingStagedTable(info, () => views.put(keyOf(ident), info), () => ()) - } - - override def stageReplace(ident: Identifier, info: TableInfo): StagedTable = { - if (!views.containsKey(keyOf(ident))) throw new NoSuchTableException(ident) - new RecordingStagedTable(info, () => views.put(keyOf(ident), info), () => ()) - } - - override def stageCreateOrReplace(ident: Identifier, info: TableInfo): StagedTable = { - new RecordingStagedTable(info, () => views.put(keyOf(ident), info), () => ()) - } - - override def alterTable(ident: Identifier, changes: TableChange*): Table = +class TestingDataSourceTableCatalog extends TableCatalog { + override def loadTable(ident: Identifier): Table = ident.name() match { + case "test_json" => + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withProvider("json") + .withLocation(ident.namespace().head) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build() + new MetadataOnlyTable(info) + case "test_partitioned_json" => + val partitioning = LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2"))) + val info = new TableInfo.Builder() + .withSchema(new StructType().add("c1", "int").add("c2", "int")) + .withProvider("json") + .withLocation(ident.namespace().head) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .withPartitions(Array(partitioning)) + .build() + new MetadataOnlyTable(info) + case "test_v2" => + val info = new TableInfo.Builder() + .withSchema(FakeV2Provider.schema) + .withProvider(classOf[FakeV2Provider].getName) + .build() + new MetadataOnlyTable(info) + case _ => throw new NoSuchTableException(ident) + } + + override def createTable(ident: Identifier, info: TableInfo): Table = throw new RuntimeException("shouldn't be called") - override def dropTable(ident: Identifier): Boolean = views.remove(keyOf(ident)) != null - override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = - throw new RuntimeException("shouldn't be called") - override def listTables(namespace: Array[String]): Array[Identifier] = Array.empty - - private var catalogName = "" - override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { - catalogName = name - } - override def name(): String = catalogName -} - -private class RecordingStagedTable( - info: TableInfo, - onCommit: () => Unit, - onAbort: () => Unit) extends MetadataOnlyTable(info) with StagedTable { - override def commitStagedChanges(): Unit = onCommit() - override def abortStagedChanges(): Unit = onAbort() -} - -/** A v2 catalog that does not declare SUPPORTS_VIEW. Used to exercise the capability - * gate in `DataSourceV2Strategy`. */ -class TestingTableOnlyCatalog extends TableCatalog { - override def loadTable(ident: Identifier): Table = throw new NoSuchTableException(ident) override def alterTable(ident: Identifier, changes: TableChange*): Table = throw new RuntimeException("shouldn't be called") - override def dropTable(ident: Identifier): Boolean = false + override def dropTable(ident: Identifier): Boolean = + throw new RuntimeException("shouldn't be called") override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = throw new RuntimeException("shouldn't be called") - override def listTables(namespace: Array[String]): Array[Identifier] = Array.empty + override def listTables(namespace: Array[String]): Array[Identifier] = + throw new RuntimeException("shouldn't be called") + private var catalogName = "" override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { catalogName = name diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala new file mode 100644 index 0000000000000..1d179685df222 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -0,0 +1,558 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableCatalogCapability, TableChange, TableInfo} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +/** + * Tests for the view side of [[MetadataOnlyTable]]: view-text expansion on read, and + * CREATE VIEW / ALTER VIEW ... AS going through the v2 write path + * (`CreateV2ViewExec` / `AlterV2ViewExec` and their atomic staging variants). + * Data-source-table read paths live in + * [[org.apache.spark.sql.connector.DataSourceV2MetadataOnlyTableSuite]]. + */ +class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSession { + import testImplicits._ + + override def sparkConf: SparkConf = super.sparkConf + .set("spark.sql.catalog.view_catalog", classOf[TestingViewCatalog].getName) + + // --- View read path ----------------------------------------------------- + + test("read view expands SQL text and applies captured SQL configs") { + withTable("spark_catalog.default.t") { + Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") + // view_catalog.ansi.test_view stores view.sqlConfig.spark.sql.ansi.enabled=true; + // view_catalog.non_ansi.test_view stores it =false. The view body does + // `col::int` which errors in ANSI mode and yields NULL in non-ANSI mode. + intercept[Exception](spark.table("view_catalog.ansi.test_view").collect()) + checkAnswer(spark.table("view_catalog.non_ansi.test_view"), Row("b", null)) + } + } + + test("read view resolves unqualified refs via captured current catalog/namespace") { + withTable("spark_catalog.default.t") { + Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") + // View text uses the unqualified name `t`; it resolves via the stored + // current catalog / namespace properties. + checkAnswer(spark.table("view_catalog.ns.test_unqualified_view"), Row("b")) + } + } + + // --- TableInfo.Builder unit tests for view-specific properties ---------- + + test("view current catalog/namespace are serialized into a single property") { + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT * FROM t") + .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) + .build() + val table = new MetadataOnlyTable(info) + assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) == + "spark_catalog.default") + } + + test("view current catalog/namespace quotes multi-part names with dots") { + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT * FROM t") + .withCurrentCatalogAndNamespace("spark_catalog", Array("weird.db", "normal")) + .build() + val table = new MetadataOnlyTable(info) + assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) == + "spark_catalog.`weird.db`.normal") + } + + test("view with no current catalog/namespace omits the property") { + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT * FROM spark_catalog.default.t") + .build() + val table = new MetadataOnlyTable(info) + assert(!table.properties().containsKey( + TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) + } + + test("withCurrentCatalogAndNamespace clears the property when catalog is null or empty") { + val infoNull = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT 1 AS col") + .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) + .withCurrentCatalogAndNamespace(null, Array("ignored")) + .build() + assert(!infoNull.properties().containsKey( + TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) + + val infoEmpty = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT 1 AS col") + .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) + .withCurrentCatalogAndNamespace("", Array("ignored")) + .build() + assert(!infoEmpty.properties().containsKey( + TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) + } + + // --- CREATE VIEW on a plain TableCatalog -------------------------------- + + test("CREATE VIEW on a v2 catalog") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.my_view AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer(spark.table("view_catalog.default.my_view"), Seq(Row(2), Row(3))) + } + } + + test("CREATE VIEW IF NOT EXISTS is a no-op when the view exists") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_ifne AS " + + "SELECT x FROM spark_catalog.default.t") + // Re-running with IF NOT EXISTS should not fail and should not change the view. + sql("CREATE VIEW IF NOT EXISTS view_catalog.default.v_ifne AS " + + "SELECT x + 100 AS x FROM spark_catalog.default.t") + checkAnswer(spark.table("view_catalog.default.v_ifne"), + Seq(Row(1), Row(2), Row(3))) + } + } + + test("CREATE VIEW without IF NOT EXISTS fails when the view exists") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_dup AS " + + "SELECT x FROM spark_catalog.default.t") + intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_dup AS " + + "SELECT x FROM spark_catalog.default.t") + } + } + } + + test("CREATE OR REPLACE VIEW replaces an existing view") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_replace AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 10") + checkAnswer(spark.table("view_catalog.default.v_replace"), Seq.empty[Row]) + sql("CREATE OR REPLACE VIEW view_catalog.default.v_replace AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer(spark.table("view_catalog.default.v_replace"), Seq(Row(2), Row(3))) + } + } + + test("CREATE VIEW on a catalog without SUPPORTS_VIEW fails") { + withSQLConf( + "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { + val ex = intercept[AnalysisException] { + sql("CREATE VIEW no_view_catalog.default.v AS SELECT 1") + } + assert(ex.getCondition == "MISSING_CATALOG_ABILITY.VIEWS") + } + } + + test("CREATE VIEW rejects too-few / too-many user-specified columns") { + withTable("spark_catalog.default.t") { + Seq(1 -> 10).toDF("x", "y").write.saveAsTable("spark_catalog.default.t") + intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_few (a) AS " + + "SELECT x, y FROM spark_catalog.default.t") + } + intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_many (a, b, c) AS " + + "SELECT x, y FROM spark_catalog.default.t") + } + } + } + + test("CREATE VIEW rejects reference to a temporary function") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + spark.udf.register("temp_udf", (i: Int) => i + 1) + val ex = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_tempfn AS " + + "SELECT temp_udf(x) FROM spark_catalog.default.t") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("temporary")) + } + } + + test("CREATE VIEW rejects reference to a temporary view") { + withTempView("tv") { + spark.range(3).createOrReplaceTempView("tv") + val ex = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_tempview AS SELECT id FROM tv") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("temporary")) + } + } + + test("CREATE VIEW rejects reference to a temporary variable") { + withSessionVariable("temp_var") { + sql("DECLARE VARIABLE temp_var INT DEFAULT 1") + val ex = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_tempvar AS SELECT temp_var AS x") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("temporary")) + } + } + + test("CREATE VIEW propagates DEFAULT COLLATION to TableInfo") { + withTable("spark_catalog.default.t") { + Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_coll DEFAULT COLLATION UTF8_BINARY AS " + + "SELECT col FROM spark_catalog.default.t") + // TestingViewCatalog stores the TableInfo verbatim, so the collation property is + // observable via the catalog-stored builder output. + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingViewCatalog] + val info = catalog.getStoredView(Array("default"), "v_coll") + assert(info.properties().get(TableCatalog.PROP_COLLATION) == "UTF8_BINARY") + } + } + + test("CREATE OR REPLACE VIEW detects cyclic view references") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_cycle_a AS " + + "SELECT x FROM spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_cycle_b AS " + + "SELECT x FROM view_catalog.default.v_cycle_a") + val ex = intercept[AnalysisException] { + sql("CREATE OR REPLACE VIEW view_catalog.default.v_cycle_a AS " + + "SELECT x FROM view_catalog.default.v_cycle_b") + } + assert(ex.getCondition == "RECURSIVE_VIEW") + } + } + + // --- CREATE VIEW on a StagingTableCatalog ------------------------------- + + test("CREATE VIEW on a StagingTableCatalog uses the atomic exec") { + withSQLConf( + "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + + // Plain CREATE -- exercises stageCreate. + sql("CREATE VIEW staging_catalog.default.v_atomic AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer( + spark.table("staging_catalog.default.v_atomic"), + Seq(Row(2), Row(3))) + + // Second CREATE without IF NOT EXISTS -- should surface viewAlreadyExistsError + // (TestingStagingCatalog's stageCreate throws TableAlreadyExistsException, which the + // exec wraps). + val ex = intercept[AnalysisException] { + sql("CREATE VIEW staging_catalog.default.v_atomic AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("already exists")) + + // CREATE OR REPLACE -- exercises stageCreateOrReplace. + sql("CREATE OR REPLACE VIEW staging_catalog.default.v_atomic AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 2") + checkAnswer(spark.table("staging_catalog.default.v_atomic"), Row(3)) + + // CREATE IF NOT EXISTS on an existing view -- no-op, but the body is still validated + // first (the atomic exec builds the TableInfo before the allow-existing short-circuit), + // so a malformed body is rejected even when creation is skipped. + sql("CREATE VIEW IF NOT EXISTS staging_catalog.default.v_atomic AS " + + "SELECT x + 100 AS x FROM spark_catalog.default.t") + // Value unchanged -- IF NOT EXISTS was a no-op. + checkAnswer(spark.table("staging_catalog.default.v_atomic"), Row(3)) + } + } + } + + // --- ALTER VIEW --------------------------------------------------------- + + test("ALTER VIEW ... AS updates the view body on a v2 catalog") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_alter AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 10") + checkAnswer(spark.table("view_catalog.default.v_alter"), Seq.empty[Row]) + + sql("ALTER VIEW view_catalog.default.v_alter AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer(spark.table("view_catalog.default.v_alter"), Seq(Row(2), Row(3))) + } + } + + test("ALTER VIEW on a missing view fails at analysis") { + // UnresolvedView resolves through lookupTableOrView and the missing view surfaces as an + // AnalysisException before we ever reach the v2 exec. The exact error condition (e.g. + // TABLE_OR_VIEW_NOT_FOUND) varies across Spark versions; we just assert we fail cleanly. + intercept[AnalysisException] { + sql("ALTER VIEW view_catalog.default.does_not_exist AS SELECT 1 AS x") + } + } + + test("ALTER VIEW rejects reference to a temporary function") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_alter_tempfn AS " + + "SELECT x FROM spark_catalog.default.t") + spark.udf.register("temp_udf_alter", (i: Int) => i + 1) + val ex = intercept[AnalysisException] { + sql("ALTER VIEW view_catalog.default.v_alter_tempfn AS " + + "SELECT temp_udf_alter(x) FROM spark_catalog.default.t") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("temporary")) + } + } + + test("ALTER VIEW preserves user-set TBLPROPERTIES") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_preserve " + + "TBLPROPERTIES ('mykey'='myvalue') AS " + + "SELECT x FROM spark_catalog.default.t") + sql("ALTER VIEW view_catalog.default.v_preserve AS " + + "SELECT x + 1 AS x FROM spark_catalog.default.t") + + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingViewCatalog] + val info = catalog.getStoredView(Array("default"), "v_preserve") + assert(info.properties().get("mykey") == "myvalue") + } + } + + test("ALTER VIEW on a StagingTableCatalog uses the atomic exec (stageReplace)") { + withSQLConf( + "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW staging_catalog.default.v_atomic_alter AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 10") + checkAnswer(spark.table("staging_catalog.default.v_atomic_alter"), Seq.empty[Row]) + + sql("ALTER VIEW staging_catalog.default.v_atomic_alter AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer( + spark.table("staging_catalog.default.v_atomic_alter"), + Seq(Row(2), Row(3))) + } + } + } + + test("ALTER VIEW on a catalog without SUPPORTS_VIEW fails") { + // An identifier the TestingTableOnlyCatalog can't find -- we never get past the view + // lookup stage, so the error here is the no-such-table / not-a-view path. The capability + // gate in DataSourceV2Strategy is only reachable once the existing view is resolvable, + // which this catalog can't do; the capability rejection is already exercised by the + // CREATE VIEW test above. + withSQLConf( + "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { + intercept[AnalysisException] { + sql("ALTER VIEW no_view_catalog.default.v AS SELECT 1") + } + } + } + + test("ALTER VIEW detects cyclic view references") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_alter_cycle_a AS " + + "SELECT x FROM spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_alter_cycle_b AS " + + "SELECT x FROM view_catalog.default.v_alter_cycle_a") + val ex = intercept[AnalysisException] { + sql("ALTER VIEW view_catalog.default.v_alter_cycle_a AS " + + "SELECT x FROM view_catalog.default.v_alter_cycle_b") + } + assert(ex.getCondition == "RECURSIVE_VIEW") + } + } +} + +/** + * A [[TableCatalog]] that supports SUPPORTS_VIEW: round-trips [[MetadataOnlyTable]] for created + * views (via `createTable` / `dropTable` / `tableExists`) and exposes two canned read-only + * fixtures (`test_view`, `test_unqualified_view`) used by the view-read tests. + */ +class TestingViewCatalog extends TableCatalog { + + // Holds views created via createTable within the session. Keyed by (namespace, name). + private val createdViews = + new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() + + override def capabilities(): java.util.Set[TableCatalogCapability] = + java.util.Collections.singleton(TableCatalogCapability.SUPPORTS_VIEW) + + override def loadTable(ident: Identifier): Table = { + val key = (ident.namespace().toSeq, ident.name()) + Option(createdViews.get(key)).map(new MetadataOnlyTable(_)).getOrElse { + ident.name() match { + case "test_view" => + val viewProps = new java.util.HashMap[String, String]() + viewProps.put( + TableCatalog.VIEW_CONF_PREFIX + SQLConf.ANSI_ENABLED.key, + (ident.namespace().head == "ansi").toString) + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string").add("i", "int")) + .withProperties(viewProps) + .withViewText( + "SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") + .build() + new MetadataOnlyTable(info) + case "test_unqualified_view" => + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT col FROM t WHERE col = 'b'") + .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) + .build() + new MetadataOnlyTable(info) + case _ => throw new NoSuchTableException(ident) + } + } + } + + override def tableExists(ident: Identifier): Boolean = { + val key = (ident.namespace().toSeq, ident.name()) + createdViews.containsKey(key) || super.tableExists(ident) + } + + override def createTable(ident: Identifier, info: TableInfo): Table = { + val key = (ident.namespace().toSeq, ident.name()) + if (createdViews.putIfAbsent(key, info) != null) { + throw new TableAlreadyExistsException(ident) + } + new MetadataOnlyTable(info) + } + + /** Test-only accessor: returns the stored TableInfo for a created view. */ + def getStoredView(namespace: Array[String], name: String): TableInfo = { + Option(createdViews.get((namespace.toSeq, name))).getOrElse { + throw new NoSuchTableException(Identifier.of(namespace, name)) + } + } + + override def alterTable(ident: Identifier, changes: TableChange*): Table = { + throw new RuntimeException("shouldn't be called") + } + override def dropTable(ident: Identifier): Boolean = { + val key = (ident.namespace().toSeq, ident.name()) + createdViews.remove(key) != null + } + override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = { + throw new RuntimeException("shouldn't be called") + } + override def listTables(namespace: Array[String]): Array[Identifier] = { + throw new RuntimeException("shouldn't be called") + } + + private var catalogName = "" + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + catalogName = name + } + override def name(): String = catalogName +} + +/** + * A minimal [[StagingTableCatalog]] used to drive `AtomicCreateV2ViewExec`. Views are stored + * in a local map; staging commits write through, aborts discard. Supports SUPPORTS_VIEW. + */ +class TestingStagingCatalog extends StagingTableCatalog { + + private val views = + new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() + + override def capabilities(): java.util.Set[TableCatalogCapability] = + java.util.Collections.singleton(TableCatalogCapability.SUPPORTS_VIEW) + + private def keyOf(ident: Identifier): (Seq[String], String) = + (ident.namespace().toSeq, ident.name()) + + override def loadTable(ident: Identifier): Table = { + Option(views.get(keyOf(ident))).map(new MetadataOnlyTable(_)) + .getOrElse(throw new NoSuchTableException(ident)) + } + + override def tableExists(ident: Identifier): Boolean = views.containsKey(keyOf(ident)) + + override def createTable(ident: Identifier, info: TableInfo): Table = { + if (views.putIfAbsent(keyOf(ident), info) != null) { + throw new TableAlreadyExistsException(ident) + } + new MetadataOnlyTable(info) + } + + override def stageCreate(ident: Identifier, info: TableInfo): StagedTable = { + if (views.containsKey(keyOf(ident))) throw new TableAlreadyExistsException(ident) + new RecordingStagedTable(info, () => views.put(keyOf(ident), info), () => ()) + } + + override def stageReplace(ident: Identifier, info: TableInfo): StagedTable = { + if (!views.containsKey(keyOf(ident))) throw new NoSuchTableException(ident) + new RecordingStagedTable(info, () => views.put(keyOf(ident), info), () => ()) + } + + override def stageCreateOrReplace(ident: Identifier, info: TableInfo): StagedTable = { + new RecordingStagedTable(info, () => views.put(keyOf(ident), info), () => ()) + } + + override def alterTable(ident: Identifier, changes: TableChange*): Table = + throw new RuntimeException("shouldn't be called") + override def dropTable(ident: Identifier): Boolean = views.remove(keyOf(ident)) != null + override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = + throw new RuntimeException("shouldn't be called") + override def listTables(namespace: Array[String]): Array[Identifier] = Array.empty + + private var catalogName = "" + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + catalogName = name + } + override def name(): String = catalogName +} + +private class RecordingStagedTable( + info: TableInfo, + onCommit: () => Unit, + onAbort: () => Unit) extends MetadataOnlyTable(info) with StagedTable { + override def commitStagedChanges(): Unit = onCommit() + override def abortStagedChanges(): Unit = onAbort() +} + +/** + * A v2 catalog that does not declare SUPPORTS_VIEW. Used to exercise the capability + * gate in `DataSourceV2Strategy`. + */ +class TestingTableOnlyCatalog extends TableCatalog { + override def loadTable(ident: Identifier): Table = throw new NoSuchTableException(ident) + override def alterTable(ident: Identifier, changes: TableChange*): Table = + throw new RuntimeException("shouldn't be called") + override def dropTable(ident: Identifier): Boolean = false + override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = + throw new RuntimeException("shouldn't be called") + override def listTables(namespace: Array[String]): Array[Identifier] = Array.empty + private var catalogName = "" + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + catalogName = name + } + override def name(): String = catalogName +} From 5d914808d8c8a22a43851f2162e0239eeaea6150 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 17:57:42 +0000 Subject: [PATCH 19/59] fix multi-part namespace handling: fullIdent + cyclic-ref check in analyzer - Add CatalogTable.fullIdentOpt / fullIdent so v2 catalogs with multi-level namespaces (via MetadataOnlyTable) can carry the real [catalog, ns..., name] that v1 TableIdentifier can't represent. - V1Table.toCatalogTable populates fullIdentOpt from the v2 identifier. - SessionCatalog.getRelation uses fullIdentOpt for the SubqueryAlias qualifier, falling back to qualifyIdentifier for v1 session-catalog tables. Fixes fully-qualified column references against non-session v2 catalogs (qualifier was hardcoded to spark_catalog). - checkCyclicViewReference and recursiveViewDetectedError now take Seq[String] and compare via CatalogTable.fullIdent, so views in multi-level namespaces sharing the last segment (cat.ns1.a.v vs cat.ns2.a.v) no longer collide. - Move the v2-path cyclic-view check from the four exec sites into the new CheckViewReferences analyzer rule, gated on replace for CreateView. v1 keeps its exec-time check as the Dataset API safety net. - Replace two non-ASCII em-dashes in V1Table.scala comments with ASCII. - Tests: fully-qualified column reference on v2 catalog (TableSuite), cyclic detection across multi-level namespaces for both CREATE OR REPLACE and ALTER paths (ViewSuite). Co-authored-by: Isaac --- .../analysis/RelationResolution.scala | 2 +- .../sql/catalyst/catalog/SessionCatalog.scala | 12 ++-- .../sql/catalyst/catalog/interface.scala | 13 +++- .../spark/sql/connector/catalog/V1Table.scala | 10 ++-- .../sql/errors/QueryCompilationErrors.scala | 8 +-- .../spark/sql/execution/command/views.scala | 39 +++++++++--- .../datasources/v2/AlterV2ViewExec.scala | 6 +- .../datasources/v2/CreateV2ViewExec.scala | 4 +- .../DataSourceV2MetadataOnlyTableSuite.scala | 25 ++++++++ .../DataSourceV2MetadataOnlyViewSuite.scala | 59 +++++++++++++++++++ 10 files changed, 149 insertions(+), 29 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index c88ad26a92c25..6913737b6c709 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -324,7 +324,7 @@ class RelationResolution( ) } SubqueryAlias( - catalog.name +: ident.asMultipartIdentifier, + v1Table.fullIdent, UnresolvedCatalogRelation(v1Table, options, isStreaming = true) ) } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index ff4a135b7d044..33a30e3cdcc11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1054,10 +1054,14 @@ class SessionCatalog( def getRelation( metadata: CatalogTable, options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()): LogicalPlan = { - val qualifiedIdent = qualifyIdentifier(metadata.identifier) - val db = qualifiedIdent.database.get - val table = qualifiedIdent.table - val multiParts = Seq(CatalogManager.SESSION_CATALOG_NAME, db, table) + // Prefer `fullIdentOpt` (set by non-session v2 catalogs via `V1Table.toCatalogTable`) so + // the SubqueryAlias qualifier reflects the real catalog + multi-part namespace. + // Fall back to `qualifyIdentifier` for v1 session-catalog tables: it defaults catalog to + // `SESSION_CATALOG_NAME` and database to the current database when either is missing. + val multiParts = metadata.fullIdentOpt.getOrElse { + val qualifiedIdent = qualifyIdentifier(metadata.identifier) + qualifiedIdent.nameParts + } if (CatalogTable.isMetricView(metadata)) { parseMetricViewDefinition(metadata) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index cc5c6ba90bd29..384da3716f192 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -444,11 +444,22 @@ case class CatalogTable( tracksPartitionsInCatalog: Boolean = false, schemaPreservesCase: Boolean = true, ignoredProperties: Map[String, String] = Map.empty, - viewOriginalText: Option[String] = None) + viewOriginalText: Option[String] = None, + // Optional full multi-part identifier [catalog, namespace..., name]. Set when the v1 + // `identifier: TableIdentifier` (single-string database) cannot losslessly carry the real + // multi-level namespace -- e.g. a CatalogTable synthesized from a v2 MetadataOnlyTable + // whose v2 identifier has more than one namespace part. `None` for v1-native tables. + fullIdentOpt: Option[Seq[String]] = None) extends MetadataMapSupport { import CatalogTable._ + /** + * The fully-qualified multi-part identifier. Prefers `fullIdentOpt` when set (v2-sourced + * tables with multi-level namespaces); otherwise reconstructs from `identifier.nameParts`. + */ + def fullIdent: Seq[String] = fullIdentOpt.getOrElse(identifier.nameParts) + /** * schema of this table's partition columns */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index 8486fa1d5f89a..950f13b6d0ab5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -147,10 +147,9 @@ private[sql] object V1Table { } CatalogTable( // CatalogTable.identifier uses a single-string database; for multi-part namespaces we - // preserve only the last part. The view-expansion path does not rely on this — it reads - // the captured catalog+namespace from PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE (translated - // below into V1's numbered keys) — so the narrowing only affects identifier rendering in - // error messages. + // preserve only the last part here and record the full multi-part form in `fullIdentOpt` + // below. Callers needing the real fully-qualified name (e.g. cyclic view detection) + // should read `CatalogTable.fullIdent`. identifier = TableIdentifier( table = ident.name(), database = ident.namespace().lastOption, @@ -175,7 +174,8 @@ private[sql] object V1Table { collation = props.get(TableCatalog.PROP_COLLATION), properties = tablePropsMap ++ clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation) ++ - viewContextProps + viewContextProps, + fullIdentOpt = Some(catalog.name() +: ident.asMultipartIdentifier) ) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 94c76976f6cd1..21bd24abfced3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -3406,13 +3406,13 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat } def recursiveViewDetectedError( - viewIdent: TableIdentifier, - newPath: Seq[TableIdentifier]): Throwable = { + viewIdent: Seq[String], + newPath: Seq[Seq[String]]): Throwable = { new AnalysisException( errorClass = "RECURSIVE_VIEW", messageParameters = Map( - "viewIdent" -> toSQLId(viewIdent.nameParts), - "newPath" -> newPath.map(p => toSQLId(p.nameParts)).mkString(" -> "))) + "viewIdent" -> toSQLId(viewIdent), + "newPath" -> newPath.map(toSQLId).mkString(" -> "))) } def notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index be8da6ded25f7..5c106c1d3bb3a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, SubqueryExpr import org.apache.spark.sql.catalyst.plans.logical.{AlterViewAs, AnalysisOnlyCommand, CreateTempView, CreateView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.classic.ClassicConversions.castToImpl -import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{IdentifierHelper, NamespaceHelper} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.internal.StaticSQLConf @@ -169,7 +169,8 @@ case class CreateViewCommand( } else if (replace) { // Detect cyclic view reference on CREATE OR REPLACE VIEW. val viewIdent = tableMetadata.identifier - checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent) + val viewFullIdent = tableMetadata.fullIdent + checkCyclicViewReference(analyzedPlan, Seq(viewFullIdent), viewFullIdent) // uncache the cached data before replacing an exists view logDebug(s"Try to uncache ${viewIdent.quotedString} before replacing.") @@ -276,7 +277,8 @@ case class AlterViewAsCommand( // Detect cyclic view reference on ALTER VIEW. val viewIdent = viewMeta.identifier - checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent) + val viewFullIdent = viewMeta.fullIdent + checkCyclicViewReference(analyzedPlan, Seq(viewFullIdent), viewFullIdent) logDebug(s"Try to uncache ${viewIdent.quotedString} before replacing.") CommandUtils.uncacheTableOrView(session, viewIdent) @@ -526,16 +528,16 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { * * @param plan the logical plan we detect cyclic view references from. * @param path the path between the altered view and current node. - * @param viewIdent the table identifier of the altered view, we compare two views by the - * `desc.identifier`. + * @param viewIdent the full multi-part identifier of the altered view. We compare two views by + * `desc.fullIdent` so multi-level namespaces (v2 catalogs) are distinguished. */ def checkCyclicViewReference( plan: LogicalPlan, - path: Seq[TableIdentifier], - viewIdent: TableIdentifier): Unit = { + path: Seq[Seq[String]], + viewIdent: Seq[String]): Unit = { plan match { case v: View => - val ident = v.desc.identifier + val ident = v.desc.fullIdent val newPath = path :+ ident // If the table identifier equals to the `viewIdent`, current view node is the same with // the altered view. We detect a view reference cycle, should throw an AnalysisException. @@ -671,7 +673,7 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { if (!storeAnalyzedPlanForView) { // Skip cyclic check because when stored analyzed plan for view, the depended // view is already converted to the underlying tables. So no cyclic views. - checkCyclicViewReference(analyzedPlan, Seq(name), name) + checkCyclicViewReference(analyzedPlan, Seq(name.nameParts), name.nameParts) } CommandUtils.uncacheTableOrView(session, name) } @@ -870,18 +872,37 @@ object CheckViewReferences extends (LogicalPlan => Unit) { s"Unexpected child of view command: ${other.getClass.getName}") } + private def fullIdentFor(resolved: LogicalPlan): Seq[String] = resolved match { + case ri: ResolvedIdentifier => + ri.catalog.name() +: ri.identifier.asMultipartIdentifier + case rpv: ResolvedPersistentView => + rpv.catalog.name() +: rpv.identifier.asMultipartIdentifier + case other => + throw SparkException.internalError( + s"Unexpected child of view command: ${other.getClass.getName}") + } + override def apply(plan: LogicalPlan): Unit = plan.foreach { case cv: CreateView if cv.isAnalyzed => val legacyName = legacyNameFor(cv.child) verifyTemporaryObjectsNotExists( isTemporary = false, legacyName, cv.query, cv.referredTempFunctions) verifyAutoGeneratedAliasesNotExists(cv.query, isTemporary = false, legacyName) + // Cycles can only form when REPLACE'ing an existing view; a plain CREATE against an + // existing view fails earlier with `viewAlreadyExistsError` and against a non-existent + // view has nothing to cycle with. + if (cv.replace) { + val fullIdent = fullIdentFor(cv.child) + checkCyclicViewReference(cv.query, Seq(fullIdent), fullIdent) + } case av: AlterViewAs if av.isAnalyzed => val legacyName = legacyNameFor(av.child) verifyTemporaryObjectsNotExists( isTemporary = false, legacyName, av.query, av.referredTempFunctions) verifyAutoGeneratedAliasesNotExists(av.query, isTemporary = false, legacyName) + val fullIdent = fullIdentFor(av.child) + checkCyclicViewReference(av.query, Seq(fullIdent), fullIdent) case _ => } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala index a23eef393b4c8..45dc7bb1524fa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, TableCatalog, TableInfo, V1Table} import org.apache.spark.sql.errors.QueryCompilationErrors -import org.apache.spark.sql.execution.command.{CommandUtils, ViewHelper} +import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.metric.SQLMetric import org.apache.spark.util.Utils @@ -101,7 +101,7 @@ case class AlterV2ViewExec( // we do any other work. val _ = existingTable val info = buildTableInfo() - ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) + // Cyclic reference detection is done at analysis time in CheckViewReferences. CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) catalog.dropTable(identifier) try { @@ -131,7 +131,7 @@ case class AtomicAlterV2ViewExec( override protected def run(): Seq[InternalRow] = { val _ = existingTable val info = buildTableInfo() - ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) + // Cyclic reference detection is done at analysis time in CheckViewReferences. CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) val staged: StagedTable = try { catalog.stageReplace(identifier, info) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala index aeea54937e79e..444895c85aa03 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -143,7 +143,7 @@ case class CreateV2ViewExec( return Seq.empty } if (!replace) throw viewAlreadyExists() - ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) + // Cyclic reference detection is done at analysis time in CheckViewReferences. CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) catalog.dropTable(identifier) } @@ -189,8 +189,8 @@ case class AtomicCreateV2ViewExec( return Seq.empty } val staged: StagedTable = if (replace) { + // Cyclic reference detection is done at analysis time in CheckViewReferences. if (catalog.tableExists(identifier)) { - ViewHelper.checkCyclicViewReference(query, Seq(legacyName), legacyName) CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) } catalog.stageCreateOrReplace(identifier, info) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index 4afa9f7ce97f3..98b1b1b8b92ff 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -82,6 +82,31 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi val tableName = "table_catalog.default.test_v2" checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i, -i))) } + + test("fully-qualified column reference uses the real catalog name") { + withTempPath { path => + val loc = path.getCanonicalPath + val tableName = s"table_catalog.`$loc`.test_json" + + spark.range(3).select($"id".cast("string").as("col")).write.json(loc) + + // 1-part and 2-part references resolve via last-part suffix matching. + checkAnswer( + sql(s"SELECT test_json.col FROM $tableName"), + Seq(Row("0"), Row("1"), Row("2"))) + checkAnswer( + sql(s"SELECT `$loc`.test_json.col FROM $tableName"), + Seq(Row("0"), Row("1"), Row("2"))) + + // 3-part reference must use `table_catalog`. The v1 `SessionCatalog.getRelation` that + // `RelationResolution.createRelation` delegates to hardcodes `spark_catalog` in the + // SubqueryAlias qualifier, so the attribute qualifier becomes + // `[spark_catalog, , test_json]` -- the reference below fails to resolve. + checkAnswer( + sql(s"SELECT $tableName.col FROM $tableName"), + Seq(Row("0"), Row("1"), Row("2"))) + } + } } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 1d179685df222..59e0f2041eab2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -32,6 +32,12 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap * (`CreateV2ViewExec` / `AlterV2ViewExec` and their atomic staging variants). * Data-source-table read paths live in * [[org.apache.spark.sql.connector.DataSourceV2MetadataOnlyTableSuite]]. + * + * TODO: once the remaining v2 view DDL is implemented (SET/UNSET TBLPROPERTIES, SHOW CREATE + * VIEW, RENAME TO, SCHEMA BINDING, DESCRIBE / SHOW TBLPROPERTIES on v2 views), register a + * `MetadataOnlyTable`-backed `DelegatingCatalogExtension` as `spark.sql.catalog.spark_catalog` + * and run the shared [[org.apache.spark.sql.execution.PersistedViewTestSuite]] body against + * the v2 path for full parity with the v1 persisted-view coverage. */ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSession { import testImplicits._ @@ -374,6 +380,59 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } + test("cyclic detection distinguishes views across multi-level namespaces") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + + // Two views whose last namespace segment collides (`inner`) but whose full multi-part + // identifiers differ. Before the `fullIdent` change both collapsed to + // `TableIdentifier(v, Some("inner"), Some("view_catalog"))` and cyclic detection would + // false-positive on a legitimate cross-namespace REPLACE. + sql("CREATE VIEW view_catalog.ns1.inner.v AS SELECT x FROM spark_catalog.default.t") + sql("CREATE VIEW view_catalog.ns2.inner.v AS " + + "SELECT x FROM view_catalog.ns1.inner.v") + // Legitimate non-cyclic REPLACE -- new body references a different view that happens to + // share the last namespace segment. Must not false-positive. + sql("CREATE OR REPLACE VIEW view_catalog.ns1.inner.v AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer(spark.table("view_catalog.ns1.inner.v"), Seq(Row(2), Row(3))) + + // Real cycle across the two namespaces must still be caught. + val ex = intercept[AnalysisException] { + sql("CREATE OR REPLACE VIEW view_catalog.ns1.inner.v AS " + + "SELECT x FROM view_catalog.ns2.inner.v") + } + assert(ex.getCondition == "RECURSIVE_VIEW") + } + } + + test("ALTER VIEW cyclic detection distinguishes views across multi-level namespaces") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + + sql("CREATE VIEW view_catalog.ns1.inner.v_alter AS " + + "SELECT x FROM spark_catalog.default.t") + sql("CREATE VIEW view_catalog.ns2.inner.v_alter AS " + + "SELECT x FROM view_catalog.ns1.inner.v_alter") + + // Legitimate non-cyclic ALTER -- new body does not reference the altered view. Before + // `fullIdent` this false-positived because the two views collapsed to the same + // TableIdentifier(v_alter, Some("inner"), Some("view_catalog")). + sql("ALTER VIEW view_catalog.ns1.inner.v_alter AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer( + spark.table("view_catalog.ns1.inner.v_alter"), + Seq(Row(2), Row(3))) + + // Real cycle across the two namespaces must still be caught. + val ex = intercept[AnalysisException] { + sql("ALTER VIEW view_catalog.ns1.inner.v_alter AS " + + "SELECT x FROM view_catalog.ns2.inner.v_alter") + } + assert(ex.getCondition == "RECURSIVE_VIEW") + } + } + test("ALTER VIEW detects cyclic view references") { withTable("spark_catalog.default.t") { Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") From db16d6950a3cf3f64302af6b0cc854935ec5d4a3 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Apr 2026 18:39:50 +0000 Subject: [PATCH 20/59] reject CREATE VIEW over a non-view table; preserve PROP_OWNER on ALTER - CreateV2ViewExec / AtomicCreateV2ViewExec: replace the separate `tableExists` + implicit-assume-view flow with a single `loadTable` round-trip and a `MetadataOnlyTable` + PROP_TABLE_TYPE=VIEW check. REPLACE'ing a non-view table as a view is rejected with EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE; plain CREATE surfaces TABLE_OR_VIEW_ALREADY_EXISTS; IF NOT EXISTS remains a no-op. Matches v1 CreateViewCommand semantics. - V2AlterViewPreparation: stop stripping TABLE_RESERVED_PROPERTIES from the existing view's properties. PROP_OWNER (and other non-transient reserved fields) now flow through unchanged, matching v1 AlterViewAsCommand.alterPermanentView's viewMeta.copy semantics. Keys the ALTER actually changes are overwritten downstream. - CheckViewReferences: collapse duplicated legacyNameFor/fullIdentFor extractors onto a shared `catalogAndIdent` helper. - Tests: add three new cases - CREATE/REPLACE-over-non-view-table rejection on both plain and staging catalogs, PROP_OWNER preservation across ALTER VIEW AS, and SCHEMA EVOLUTION mode preservation across ALTER VIEW AS. --- .../spark/sql/execution/command/views.scala | 48 +++---- .../datasources/v2/AlterV2ViewExec.scala | 16 ++- .../datasources/v2/CreateV2ViewExec.scala | 44 +++++- .../DataSourceV2MetadataOnlyViewSuite.scala | 126 +++++++++++++++++- 4 files changed, 197 insertions(+), 37 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 5c106c1d3bb3a..d856e3fce8108 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, SubqueryExpr import org.apache.spark.sql.catalyst.plans.logical.{AlterViewAs, AnalysisOnlyCommand, CreateTempView, CreateView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.classic.ClassicConversions.castToImpl +import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{IdentifierHelper, NamespaceHelper} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation @@ -856,30 +857,29 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { object CheckViewReferences extends (LogicalPlan => Unit) { import ViewHelper._ - private def legacyNameFor(resolved: LogicalPlan): TableIdentifier = resolved match { - case ri: ResolvedIdentifier => - TableIdentifier( - table = ri.identifier.name(), - database = ri.identifier.namespace().lastOption, - catalog = Some(ri.catalog.name())) - case rpv: ResolvedPersistentView => - TableIdentifier( - table = rpv.identifier.name(), - database = rpv.identifier.namespace().lastOption, - catalog = Some(rpv.catalog.name())) - case other => - throw SparkException.internalError( - s"Unexpected child of view command: ${other.getClass.getName}") - } - - private def fullIdentFor(resolved: LogicalPlan): Seq[String] = resolved match { - case ri: ResolvedIdentifier => - ri.catalog.name() +: ri.identifier.asMultipartIdentifier - case rpv: ResolvedPersistentView => - rpv.catalog.name() +: rpv.identifier.asMultipartIdentifier - case other => - throw SparkException.internalError( - s"Unexpected child of view command: ${other.getClass.getName}") + // Extract (catalog, identifier) for the two resolved shapes view commands reach us with: + // `ResolvedIdentifier` for CREATE VIEW on a new target, `ResolvedPersistentView` for ALTER + // VIEW or CREATE OR REPLACE VIEW on an existing view. All other shapes are an analyzer bug. + private def catalogAndIdent(resolved: LogicalPlan): (CatalogPlugin, Identifier) = + resolved match { + case ri: ResolvedIdentifier => (ri.catalog, ri.identifier) + case rpv: ResolvedPersistentView => (rpv.catalog, rpv.identifier) + case other => + throw SparkException.internalError( + s"Unexpected child of view command: ${other.getClass.getName}") + } + + private def legacyNameFor(resolved: LogicalPlan): TableIdentifier = { + val (catalog, ident) = catalogAndIdent(resolved) + TableIdentifier( + table = ident.name(), + database = ident.namespace().lastOption, + catalog = Some(catalog.name())) + } + + private def fullIdentFor(resolved: LogicalPlan): Seq[String] = { + val (catalog, ident) = catalogAndIdent(resolved) + catalog.name() +: ident.asMultipartIdentifier } override def apply(plan: LogicalPlan): Unit = plan.foreach { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala index 45dc7bb1524fa..e97080e65664a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, ResolvedIdentifier, TableAlreadyExistsException, ViewSchemaMode} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, TableCatalog, TableInfo, V1Table} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, TableCatalog, TableInfo, V1Table} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.metric.SQLMetric @@ -75,12 +75,16 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { override def userSpecifiedColumns: Seq[(String, Option[String])] = Seq.empty override def comment: Option[String] = existingProp(TableCatalog.PROP_COMMENT) override def collation: Option[String] = existingProp(TableCatalog.PROP_COLLATION) - // Strip reserved keys; those become first-class `TableInfo` / `CatalogTable` fields or are - // re-emitted by `buildTableInfo` (view text, current-catalog-namespace, comment, collation). - // User TBLPROPERTIES and view.sqlConfig.* / view.query.out.* / view.referredTempNames / - // view.schemaMode pass through -- generateViewProperties handles their cleanup + re-emit. + // Carry the existing view's full property map forward. Keys the ALTER actually changes are + // overwritten downstream: view text + PROP_TABLE_TYPE via `withViewText`, comment / collation + // via `withComment` / `withCollation`, view.sqlConfig.* / view.query.out.* / + // view.referredTempNames re-emitted by `generateViewProperties`, and + // PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE re-emitted by the v2 encoder inside + // `buildTableInfo`. Everything else -- notably PROP_OWNER and view.schemaMode -- flows + // through unchanged, matching v1 `AlterViewAsCommand.alterPermanentView`'s `viewMeta.copy` + // semantics. override def userProperties: Map[String, String] = - existingInfo.properties.asScala.toMap -- CatalogV2Util.TABLE_RESERVED_PROPERTIES + existingInfo.properties.asScala.toMap override def viewSchemaMode: ViewSchemaMode = existingCatalogTable.viewSchemaMode } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala index 444895c85aa03..d7a65688c0ef2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -22,11 +22,11 @@ import scala.jdk.CollectionConverters._ import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.analysis.{ResolvedIdentifier, SchemaEvolution, TableAlreadyExistsException, ViewSchemaMode} +import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, ResolvedIdentifier, SchemaEvolution, TableAlreadyExistsException, ViewSchemaMode} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, QuotingUtils} -import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, TableCatalog, TableInfo} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableInfo, TableSummary} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command.{CommandUtils, ViewHelper} import org.apache.spark.sql.execution.metric.SQLMetric @@ -116,6 +116,27 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { protected def viewAlreadyExists(): Throwable = QueryCompilationErrors.viewAlreadyExistsError(legacyName) + + // Loads the existing entry at `identifier` or returns None if it does not exist. Combines + // the existence check and type check into a single catalog round-trip (vs. the previous + // tableExists + implicit assume-view flow). + protected def tryLoadTable(): Option[Table] = { + try { + Some(catalog.loadTable(identifier)) + } catch { + case _: NoSuchTableException => None + } + } + + // A catalog with SUPPORTS_VIEW round-trips views as MetadataOnlyTable with PROP_TABLE_TYPE + // set to VIEW. Anything else at the same identifier is a non-view table -- REPLACE'ing it as + // a view would silently destroy the table's data, so we reject at the exec layer. + protected def isViewTable(table: Table): Boolean = table match { + case mot: MetadataOnlyTable => + TableSummary.VIEW_TABLE_TYPE.equals( + mot.getTableInfo.properties.get(TableCatalog.PROP_TABLE_TYPE)) + case _ => false + } } /** @@ -138,16 +159,20 @@ case class CreateV2ViewExec( override protected def run(): Seq[InternalRow] = { val info = buildTableInfo() - if (catalog.tableExists(identifier)) { + tryLoadTable().foreach { existing => if (allowExisting) { return Seq.empty } + if (!isViewTable(existing)) { + throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( + legacyName, replace) + } if (!replace) throw viewAlreadyExists() // Cyclic reference detection is done at analysis time in CheckViewReferences. CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) catalog.dropTable(identifier) } - // TOCTOU: if another writer creates the table between tableExists and createTable, a bare + // TOCTOU: if another writer creates an entry between tryLoadTable and createTable, a bare // TableAlreadyExistsException is unhelpful; present the same viewAlreadyExists error the // atomic path uses. try { @@ -185,12 +210,19 @@ case class AtomicCreateV2ViewExec( // both execs: the malformed view body is rejected even when the allow-existing short- // circuit would otherwise skip creation. val info = buildTableInfo() - if (allowExisting && catalog.tableExists(identifier)) { + val existing = tryLoadTable() + if (allowExisting && existing.isDefined) { return Seq.empty } + existing.foreach { table => + if (!isViewTable(table)) { + throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( + legacyName, replace) + } + } val staged: StagedTable = if (replace) { // Cyclic reference detection is done at analysis time in CheckViewReferences. - if (catalog.tableExists(identifier)) { + if (existing.isDefined) { CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) } catalog.stageCreateOrReplace(identifier, info) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 59e0f2041eab2..4fb368c05ebf2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.connector import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException} -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableCatalogCapability, TableChange, TableInfo} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableCatalogCapability, TableChange, TableInfo, TableSummary} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType @@ -254,6 +254,45 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } + test("CREATE VIEW over a non-view table entry is rejected (plain TableCatalog)") { + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingViewCatalog] + val tableIdent = Identifier.of(Array("default"), "v_existing_table") + val tableInfo = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build() + catalog.createTable(tableIdent, tableInfo) + try { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + + // CREATE OR REPLACE VIEW must not silently destroy a non-view table -- v1 parity. + val replaceEx = intercept[AnalysisException] { + sql("CREATE OR REPLACE VIEW view_catalog.default.v_existing_table AS " + + "SELECT x FROM spark_catalog.default.t") + } + assert(replaceEx.getCondition == "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE") + + // Plain CREATE VIEW over a table surfaces TABLE_OR_VIEW_ALREADY_EXISTS, matching v1. + val createEx = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_existing_table AS " + + "SELECT x FROM spark_catalog.default.t") + } + assert(createEx.getCondition == "TABLE_OR_VIEW_ALREADY_EXISTS") + + // CREATE VIEW IF NOT EXISTS is a no-op -- the table entry is untouched. + sql("CREATE VIEW IF NOT EXISTS view_catalog.default.v_existing_table AS " + + "SELECT x FROM spark_catalog.default.t") + val stored = catalog.getStoredView(Array("default"), "v_existing_table") + assert(stored.properties().get(TableCatalog.PROP_TABLE_TYPE) == + TableSummary.EXTERNAL_TABLE_TYPE) + } + } finally { + catalog.dropTable(tableIdent) + } + } + // --- CREATE VIEW on a StagingTableCatalog ------------------------------- test("CREATE VIEW on a StagingTableCatalog uses the atomic exec") { @@ -294,6 +333,48 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } + test("CREATE VIEW over a non-view table entry is rejected (StagingTableCatalog)") { + withSQLConf( + "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { + val stagingCatalog = spark.sessionState.catalogManager.catalog("staging_catalog") + .asInstanceOf[TestingStagingCatalog] + val tableIdent = Identifier.of(Array("default"), "v_existing_table") + val tableInfo = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build() + stagingCatalog.createTable(tableIdent, tableInfo) + try { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + + // CREATE OR REPLACE VIEW must not silently destroy a non-view table. On a staging + // catalog this specifically guards against `stageCreateOrReplace` committing over + // the table. + val replaceEx = intercept[AnalysisException] { + sql("CREATE OR REPLACE VIEW staging_catalog.default.v_existing_table AS " + + "SELECT x FROM spark_catalog.default.t") + } + assert(replaceEx.getCondition == "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE") + + val createEx = intercept[AnalysisException] { + sql("CREATE VIEW staging_catalog.default.v_existing_table AS " + + "SELECT x FROM spark_catalog.default.t") + } + assert(createEx.getCondition == "TABLE_OR_VIEW_ALREADY_EXISTS") + + sql("CREATE VIEW IF NOT EXISTS staging_catalog.default.v_existing_table AS " + + "SELECT x FROM spark_catalog.default.t") + val loaded = stagingCatalog.loadTable(tableIdent).asInstanceOf[MetadataOnlyTable] + assert(loaded.getTableInfo.properties.get(TableCatalog.PROP_TABLE_TYPE) == + TableSummary.EXTERNAL_TABLE_TYPE) + } + } finally { + stagingCatalog.dropTable(tableIdent) + } + } + } + // --- ALTER VIEW --------------------------------------------------------- test("ALTER VIEW ... AS updates the view body on a v2 catalog") { @@ -348,6 +429,49 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } + test("ALTER VIEW preserves PROP_OWNER (v1-parity)") { + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingViewCatalog] + val viewIdent = Identifier.of(Array("default"), "v_owner") + // Pre-seed a view whose stored TableInfo carries an explicit owner. + val initialInfo = new TableInfo.Builder() + .withSchema(new StructType().add("x", "int")) + .withViewText("SELECT 1 AS x") + .withOwner("alice") + .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) + .build() + catalog.createTable(viewIdent, initialInfo) + try { + withTable("spark_catalog.default.t") { + Seq(2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("ALTER VIEW view_catalog.default.v_owner AS " + + "SELECT x FROM spark_catalog.default.t") + // v1 ALTER VIEW AS carries `owner` forward via `viewMeta.copy(...)`. v2 must match: + // the stored TableInfo after the ALTER should still have the original owner. + val info = catalog.getStoredView(Array("default"), "v_owner") + assert(info.properties().get(TableCatalog.PROP_OWNER) == "alice") + } + } finally { + catalog.dropTable(viewIdent) + } + } + + test("ALTER VIEW preserves SCHEMA EVOLUTION binding mode") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_evo WITH SCHEMA EVOLUTION AS " + + "SELECT x FROM spark_catalog.default.t") + sql("ALTER VIEW view_catalog.default.v_evo AS " + + "SELECT x + 1 AS x FROM spark_catalog.default.t") + + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingViewCatalog] + val info = catalog.getStoredView(Array("default"), "v_evo") + // Use the same stored key v1 uses (CatalogTable.VIEW_SCHEMA_MODE = "view.schemaMode"). + assert(info.properties().get("view.schemaMode") == "EVOLUTION") + } + } + test("ALTER VIEW on a StagingTableCatalog uses the atomic exec (stageReplace)") { withSQLConf( "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { From 206f2dd63f6783159a1348457721805587cc6c94 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 23 Apr 2026 01:27:10 +0000 Subject: [PATCH 21/59] address self-review findings: simplify Analyzer and strategy, tighten errors, new tests - drop unused `viewOnly` parameter on `Analyzer.lookupTableOrView` - reorder `CreateV2ViewExec`/`AtomicCreateV2ViewExec` to short-circuit IF NOT EXISTS before building the TableInfo, matching v1 `CreateViewCommand.run` - extract `CatalogTable.viewSchemaModeFromProperties` so `V2AlterViewPreparation` no longer round-trips through `V1Table.toCatalogTable` just to read the mode - cross-reference v1/v2 view-check locations in `CreateViewCommand` and `AlterViewAsCommand` Scaladoc - document `TableInfo.Builder.withProperties` / convenience-setter ordering on `withProperties` itself and add brief docs to the convenience setters - require a view-typed `MetadataOnlyTable` at ALTER VIEW exec time (tightens the race-between-analysis-and-exec surface) - rename `CatalogTable.fullIdentOpt` to `multipartIdentifier` - widen `viewDepthExceedsMaxResolutionDepthError` to take `Seq[String]` so v2 multi-level namespaces are reflected in the error message - move the `SUPPORTS_VIEW` gate from `DataSourceV2Strategy` into `CheckViewReferences`; strategy cases now cast directly since analysis verifies the capability first - add regression tests: ALTER VIEW re-captures current session SQL configs; CREATE OR REPLACE VIEW whose new body references a nonexistent table fails at analysis Co-authored-by: Isaac --- .../sql/connector/catalog/TableInfo.java | 12 ++++- .../sql/catalyst/analysis/Analyzer.scala | 13 ++--- .../catalyst/analysis/ViewResolution.scala | 2 +- .../analysis/resolver/ViewResolver.scala | 2 +- .../sql/catalyst/catalog/SessionCatalog.scala | 6 +-- .../sql/catalyst/catalog/interface.scala | 49 ++++++++++-------- .../spark/sql/connector/catalog/V1Table.scala | 12 +++-- .../sql/errors/QueryCompilationErrors.scala | 4 +- .../spark/sql/execution/command/views.scala | 48 +++++++++++++---- .../datasources/v2/AlterV2ViewExec.scala | 51 +++++++++---------- .../datasources/v2/CreateV2ViewExec.scala | 32 +++++++----- .../datasources/v2/DataSourceV2Strategy.scala | 27 +++------- .../DataSourceV2MetadataOnlyViewSuite.scala | 37 ++++++++++++++ 13 files changed, 183 insertions(+), 112 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java index 25a4859a39a78..64a9dedabc611 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java @@ -77,6 +77,11 @@ public Builder withSchema(StructType schema) { return this; } + /** + * Replaces the current properties map with a defensive copy of the given map. Any reserved + * keys set earlier via convenience setters (e.g. {@link #withProvider}, {@link #withViewText}) + * are discarded -- call those setters after this method, not before. + */ public Builder withProperties(Map properties) { this.properties = new HashMap<>(properties); return this; @@ -92,9 +97,12 @@ public Builder withConstraints(Constraint[] constraints) { return this; } - // Convenience setters that write reserved keys into `properties`. These mutate the current - // properties map, so call them after any `withProperties(...)` that replaces the map. + // Convenience setters below write reserved keys into the current `properties` map. Pair + // each with a preceding `withProperties(...)` call if you want to start from a user map; + // calling `withProperties` after a convenience setter discards the convenience setter's + // write. + /** Writes {@link TableCatalog#PROP_PROVIDER} into the current properties map. */ public Builder withProvider(String provider) { properties.put(TableCatalog.PROP_PROVIDER, provider); return this; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index b2672971cb5fb..d218e33383645 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1078,7 +1078,7 @@ class Analyzer( }.getOrElse(u) case u @ UnresolvedView(identifier, cmd, allowTemp, suggestAlternative) => - lookupTableOrView(identifier, viewOnly = true).map { + lookupTableOrView(identifier).map { case _: ResolvedTempView if !allowTemp => throw QueryCompilationErrors.expectPermanentViewNotTempViewError( identifier, cmd, u) @@ -1100,20 +1100,15 @@ class Analyzer( /** * Resolves relations to `ResolvedTable` or `Resolved[Temp/Persistent]View`. This is - * for resolving DDL and misc commands. + * for resolving DDL and misc commands. UnresolvedView callers reject non-view results + * downstream via `expectViewNotTableError`. */ - private def lookupTableOrView( - identifier: Seq[String], - viewOnly: Boolean = false): Option[LogicalPlan] = { + private def lookupTableOrView(identifier: Seq[String]): Option[LogicalPlan] = { relationResolution.lookupTempView(identifier).map { tempView => ResolvedTempView(identifier.asIdentifier, tempView.tableMeta) }.orElse { relationResolution.expandIdentifier(identifier) match { case CatalogAndIdentifier(catalog, ident) => - // Previously view-only lookups rejected non-session catalogs outright. With - // `MetadataOnlyTable`, non-session catalogs can now expose views, so instead we - // let the lookup proceed and rely on the downstream match -- a non-view result is - // converted into the standard `expectViewNotTableError` by UnresolvedView's caller. CatalogV2Util.loadTable(catalog, ident).map { case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) && v1Table.v1Table.tableType == CatalogTableType.VIEW => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala index faa3b9081cbfd..b0f0ef3b092c1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala @@ -38,7 +38,7 @@ object ViewResolution { val maxNestedViewDepth = AnalysisContext.get.maxNestedViewDepth if (nestedViewDepth > maxNestedViewDepth) { throw QueryCompilationErrors.viewDepthExceedsMaxResolutionDepthError( - view.desc.identifier, + view.desc.fullIdent, maxNestedViewDepth, view ) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ViewResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ViewResolver.scala index 992f065ef3aa2..a224e521b548b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ViewResolver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ViewResolver.scala @@ -193,7 +193,7 @@ case class ViewResolutionContext( def validate(unresolvedView: View): Unit = { if (nestedViewDepth > maxNestedViewDepth) { throw QueryCompilationErrors.viewDepthExceedsMaxResolutionDepthError( - unresolvedView.desc.identifier, + unresolvedView.desc.fullIdent, maxNestedViewDepth, unresolvedView ) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 33a30e3cdcc11..554bded472bde 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1054,11 +1054,11 @@ class SessionCatalog( def getRelation( metadata: CatalogTable, options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()): LogicalPlan = { - // Prefer `fullIdentOpt` (set by non-session v2 catalogs via `V1Table.toCatalogTable`) so - // the SubqueryAlias qualifier reflects the real catalog + multi-part namespace. + // Prefer `multipartIdentifier` (set by non-session v2 catalogs via `V1Table.toCatalogTable`) + // so the SubqueryAlias qualifier reflects the real catalog + multi-part namespace. // Fall back to `qualifyIdentifier` for v1 session-catalog tables: it defaults catalog to // `SESSION_CATALOG_NAME` and database to the current database when either is missing. - val multiParts = metadata.fullIdentOpt.getOrElse { + val multiParts = metadata.multipartIdentifier.getOrElse { val qualifiedIdent = qualifyIdentifier(metadata.identifier) qualifiedIdent.nameParts } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 384da3716f192..d5f6114261f68 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -445,20 +445,20 @@ case class CatalogTable( schemaPreservesCase: Boolean = true, ignoredProperties: Map[String, String] = Map.empty, viewOriginalText: Option[String] = None, - // Optional full multi-part identifier [catalog, namespace..., name]. Set when the v1 - // `identifier: TableIdentifier` (single-string database) cannot losslessly carry the real - // multi-level namespace -- e.g. a CatalogTable synthesized from a v2 MetadataOnlyTable - // whose v2 identifier has more than one namespace part. `None` for v1-native tables. - fullIdentOpt: Option[Seq[String]] = None) + // Multi-part identifier [catalog, namespace..., name] for tables synthesized from a v2 + // `MetadataOnlyTable` whose namespace has more than one part -- the v1 `identifier: + // TableIdentifier` (single-string database) cannot carry that losslessly. `None` for + // v1-native tables; callers should use `fullIdent` which falls back to `identifier.nameParts`. + multipartIdentifier: Option[Seq[String]] = None) extends MetadataMapSupport { import CatalogTable._ /** - * The fully-qualified multi-part identifier. Prefers `fullIdentOpt` when set (v2-sourced + * The fully-qualified multi-part identifier. Prefers `multipartIdentifier` when set (v2-sourced * tables with multi-level namespaces); otherwise reconstructs from `identifier.nameParts`. */ - def fullIdent: Seq[String] = fullIdentOpt.getOrElse(identifier.nameParts) + def fullIdent: Seq[String] = multipartIdentifier.getOrElse(identifier.nameParts) /** * schema of this table's partition columns @@ -554,20 +554,7 @@ case class CatalogTable( * Return the schema binding mode. Defaults to SchemaBinding if not a view or an older * version, unless the viewSchemaBindingMode config is set to false */ - def viewSchemaMode: ViewSchemaMode = { - if (!SQLConf.get.viewSchemaBindingEnabled) { - SchemaUnsupported - } else { - val schemaMode = properties.getOrElse(VIEW_SCHEMA_MODE, SchemaBinding.toString) - schemaMode match { - case SchemaBinding.toString => SchemaBinding - case SchemaEvolution.toString => SchemaEvolution - case SchemaTypeEvolution.toString => SchemaTypeEvolution - case SchemaCompensation.toString => SchemaCompensation - case other => throw SparkException.internalError("Unexpected ViewSchemaMode") - } - } - } + def viewSchemaMode: ViewSchemaMode = CatalogTable.viewSchemaModeFromProperties(properties) /** * Return temporary view names the current view was referred. should be empty if the @@ -780,6 +767,26 @@ object CatalogTable { val PROP_CLUSTERING_COLUMNS: String = "clusteringColumns" + /** + * Decode the view schema binding mode from a properties map. Shared between + * [[CatalogTable.viewSchemaMode]] and the v2 ALTER VIEW path which reads the mode directly + * from the existing view's [[TableInfo]] properties without materializing a full CatalogTable. + */ + def viewSchemaModeFromProperties(properties: Map[String, String]): ViewSchemaMode = { + if (!SQLConf.get.viewSchemaBindingEnabled) { + SchemaUnsupported + } else { + val schemaMode = properties.getOrElse(VIEW_SCHEMA_MODE, SchemaBinding.toString) + schemaMode match { + case SchemaBinding.toString => SchemaBinding + case SchemaEvolution.toString => SchemaEvolution + case SchemaTypeEvolution.toString => SchemaTypeEvolution + case SchemaCompensation.toString => SchemaCompensation + case _ => throw SparkException.internalError("Unexpected ViewSchemaMode") + } + } + } + def splitLargeTableProp( key: String, value: String, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index 950f13b6d0ab5..81af259e08d53 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -117,6 +117,10 @@ private[sql] object V1Table { t: MetadataOnlyTable): CatalogTable = { val info = t.getTableInfo val props = info.properties.asScala.toMap + // PROP_TABLE_TYPE is advisory on the v2 side: it may be absent or carry a value that has no + // v1 mapping (e.g. TableSummary.FOREIGN_TABLE_TYPE). v1 only has EXTERNAL/MANAGED/VIEW, so + // anything other than the two explicit mappings below falls back to EXTERNAL for the v1 + // representation -- the same default v1 uses when the value is missing. val tableType = props.get(TableCatalog.PROP_TABLE_TYPE) match { case Some(TableSummary.VIEW_TABLE_TYPE) => CatalogTableType.VIEW case Some(TableSummary.MANAGED_TABLE_TYPE) => CatalogTableType.MANAGED @@ -147,9 +151,9 @@ private[sql] object V1Table { } CatalogTable( // CatalogTable.identifier uses a single-string database; for multi-part namespaces we - // preserve only the last part here and record the full multi-part form in `fullIdentOpt` - // below. Callers needing the real fully-qualified name (e.g. cyclic view detection) - // should read `CatalogTable.fullIdent`. + // preserve only the last part here and record the full multi-part form in + // `multipartIdentifier` below. Callers needing the real fully-qualified name (e.g. cyclic + // view detection) should read `CatalogTable.fullIdent`. identifier = TableIdentifier( table = ident.name(), database = ident.namespace().lastOption, @@ -175,7 +179,7 @@ private[sql] object V1Table { properties = tablePropsMap ++ clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation) ++ viewContextProps, - fullIdentOpt = Some(catalog.name() +: ident.asMultipartIdentifier) + multipartIdentifier = Some(catalog.name() +: ident.asMultipartIdentifier) ) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 21bd24abfced3..4c5e7e3d80f30 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -567,11 +567,11 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat } def viewDepthExceedsMaxResolutionDepthError( - identifier: TableIdentifier, maxNestedDepth: Int, t: TreeNode[_]): Throwable = { + viewNameParts: Seq[String], maxNestedDepth: Int, t: TreeNode[_]): Throwable = { new AnalysisException( errorClass = "VIEW_EXCEED_MAX_NESTED_DEPTH", messageParameters = Map( - "viewName" -> toSQLId(identifier.nameParts), + "viewName" -> toSQLId(viewNameParts), "maxNestedDepth" -> maxNestedDepth.toString), origin = t.origin) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index d856e3fce8108..73fdbf8927f59 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, SubqueryExpr import org.apache.spark.sql.catalyst.plans.logical.{AlterViewAs, AnalysisOnlyCommand, CreateTempView, CreateView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.classic.ClassicConversions.castToImpl -import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier} +import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, TableCatalog, TableCatalogCapability} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{IdentifierHelper, NamespaceHelper} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation @@ -47,6 +47,12 @@ import org.apache.spark.util.ArrayImplicits._ * properties(e.g. view default database, view query output column names) and store them as * properties in metastore, if we need to create a permanent view. * + * Note: this is the v1 (session catalog) path. Permanent-view checks (no temp-object refs, + * no auto-generated aliases, no cycles) run at exec time here because Dataset-built commands + * can be constructed with `isAnalyzed=true` and bypass the analyzer's recapture path. The v2 + * equivalent is [[org.apache.spark.sql.catalyst.plans.logical.CreateView]]; its checks run at + * analysis time via [[CheckViewReferences]]. Mirror any new validation in both places. + * * @param name the name of this view. * @param userSpecifiedColumns the output column names and optional comments specified by users, * can be Nil if not specified. @@ -210,6 +216,12 @@ case class CreateViewCommand( * this command will try to alter a temporary view first, if view not exist, try permanent view * next, if still not exist, throw an exception. * + * Note: this is the v1 (session catalog) path. Permanent-view checks (no temp-object refs, + * no auto-generated aliases, no cycles) run at exec time here because Dataset-built commands + * can be constructed with `isAnalyzed=true` and bypass the analyzer's recapture path. The v2 + * equivalent is [[org.apache.spark.sql.catalyst.plans.logical.AlterViewAs]]; its checks run at + * analysis time via [[CheckViewReferences]]. Mirror any new validation in both places. + * * @param name the name of this view. * @param originalText the original SQL text of this view. Note that we can only alter a view by * SQL API, which means we always have originalText. @@ -847,19 +859,22 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { } /** - * Post-analysis check for v2 CREATE VIEW / ALTER VIEW: rejects permanent views that reference - * temporary objects and rejects view bodies with auto-generated aliases. `referredTempFunctions` - * is captured by the command's `markAsAnalyzed` before this rule runs. The v1 counterparts - * [[CreateViewCommand]] and [[AlterViewAsCommand]] keep their existing exec-time checks -- - * Dataset-built commands bypass the analyzer's re-capture path, so the exec-time safety net - * must stay for v1. + * Post-analysis check for v2 CREATE VIEW / ALTER VIEW. First rejects catalogs that do not + * declare [[TableCatalogCapability.SUPPORTS_VIEW]] with `MISSING_CATALOG_ABILITY.VIEWS` -- we + * do this before the temp-object and auto-alias checks so a catalog that cannot host views at + * all surfaces the correct root cause instead of a misleading "references temp" error. Then + * rejects permanent views that reference temporary objects and view bodies with auto-generated + * aliases. `referredTempFunctions` is captured by the command's `markAsAnalyzed` before this + * rule runs. The v1 counterparts [[CreateViewCommand]] and [[AlterViewAsCommand]] keep their + * existing exec-time checks -- Dataset-built commands bypass the analyzer's re-capture path, + * so the exec-time safety net must stay for v1. */ object CheckViewReferences extends (LogicalPlan => Unit) { import ViewHelper._ // Extract (catalog, identifier) for the two resolved shapes view commands reach us with: - // `ResolvedIdentifier` for CREATE VIEW on a new target, `ResolvedPersistentView` for ALTER - // VIEW or CREATE OR REPLACE VIEW on an existing view. All other shapes are an analyzer bug. + // `ResolvedIdentifier` for CREATE VIEW, `ResolvedPersistentView` for ALTER VIEW. Other shapes + // are an analyzer bug. private def catalogAndIdent(resolved: LogicalPlan): (CatalogPlugin, Identifier) = resolved match { case ri: ResolvedIdentifier => (ri.catalog, ri.identifier) @@ -882,8 +897,22 @@ object CheckViewReferences extends (LogicalPlan => Unit) { catalog.name() +: ident.asMultipartIdentifier } + // Fail fast if the catalog cannot host views. Gate non-TableCatalog plugins here too so + // callers get the VIEWS-specific error rather than a generic cast failure later. + private def requireSupportsView(resolved: LogicalPlan): Unit = { + val (catalog, _) = catalogAndIdent(resolved) + val supportsView = catalog match { + case tc: TableCatalog => tc.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) + case _ => false + } + if (!supportsView) { + throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) + } + } + override def apply(plan: LogicalPlan): Unit = plan.foreach { case cv: CreateView if cv.isAnalyzed => + requireSupportsView(cv.child) val legacyName = legacyNameFor(cv.child) verifyTemporaryObjectsNotExists( isTemporary = false, legacyName, cv.query, cv.referredTempFunctions) @@ -897,6 +926,7 @@ object CheckViewReferences extends (LogicalPlan => Unit) { } case av: AlterViewAs if av.isAnalyzed => + requireSupportsView(av.child) val legacyName = legacyNameFor(av.child) verifyTemporaryObjectsNotExists( isTemporary = false, legacyName, av.query, av.referredTempFunctions) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala index e97080e65664a..c639a79309e7e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -24,7 +24,8 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, ResolvedIdentifier, TableAlreadyExistsException, ViewSchemaMode} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, TableCatalog, TableInfo, V1Table} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, TableCatalog} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.metric.SQLMetric @@ -34,8 +35,11 @@ import org.apache.spark.util.Utils * Shared bits for the v2 ALTER VIEW ... AS execs. Loads the existing view once via * `existingTable` and uses its properties to preserve user-set properties, comment, collation, * and schema-binding mode when constructing the replacement `TableInfo`. A v2 identifier that - * does not resolve to a [[MetadataOnlyTable]] is rejected -- the connector contract for catalogs - * with `SUPPORTS_VIEW` is to round-trip `MetadataOnlyTable` from `loadTable`. + * does not resolve to a view-typed [[MetadataOnlyTable]] is a catalog contract violation -- + * the analyzer has already verified the target is a view, and `SUPPORTS_VIEW` catalogs must + * round-trip `MetadataOnlyTable` from `loadTable`. The only way to hit the internal-error + * branch is a racing DDL between analysis and exec, which is rare enough that we surface it + * as an internal error rather than a user-facing one. * * `generateViewProperties` (invoked from `buildTableInfo`) strips the transient view keys * (SQL configs, query column names, referred-temp names) from the inherited properties and @@ -50,31 +54,14 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { throw QueryCompilationErrors.noSuchTableError(catalog.name(), identifier) } table match { - case mot: MetadataOnlyTable => mot + case mot: MetadataOnlyTable if isViewTable(mot) => mot case other => - // SUPPORTS_VIEW requires catalogs to round-trip MetadataOnlyTable; getting - // anything else back is a catalog contract violation. throw SparkException.internalError( - s"Expected MetadataOnlyTable from $catalog for $identifier, " + - s"got ${other.getClass.getName}") + s"Expected a view-typed MetadataOnlyTable from ${catalog.name()} for " + + s"${identifier.quoted}, got ${other.getClass.getName}") } } - protected lazy val existingInfo: TableInfo = existingTable.getTableInfo - - // Translate once through V1Table so we can delegate semantics like viewSchemaMode to the - // same logic the v1 read path uses (honors viewSchemaBindingEnabled, same default when the - // property is absent). - protected lazy val existingCatalogTable: CatalogTable = - V1Table.toCatalogTable(catalog, identifier, existingTable) - - private def existingProp(key: String): Option[String] = - Option(existingInfo.properties.get(key)) - - // ALTER VIEW ... AS does not accept a user column list. - override def userSpecifiedColumns: Seq[(String, Option[String])] = Seq.empty - override def comment: Option[String] = existingProp(TableCatalog.PROP_COMMENT) - override def collation: Option[String] = existingProp(TableCatalog.PROP_COLLATION) // Carry the existing view's full property map forward. Keys the ALTER actually changes are // overwritten downstream: view text + PROP_TABLE_TYPE via `withViewText`, comment / collation // via `withComment` / `withCollation`, view.sqlConfig.* / view.query.out.* / @@ -83,10 +70,22 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { // `buildTableInfo`. Everything else -- notably PROP_OWNER and view.schemaMode -- flows // through unchanged, matching v1 `AlterViewAsCommand.alterPermanentView`'s `viewMeta.copy` // semantics. - override def userProperties: Map[String, String] = - existingInfo.properties.asScala.toMap + protected lazy val existingProps: Map[String, String] = + existingTable.getTableInfo.properties.asScala.toMap + + private def existingProp(key: String): Option[String] = existingProps.get(key) + + // ALTER VIEW ... AS does not accept a user column list. + override def userSpecifiedColumns: Seq[(String, Option[String])] = Seq.empty + override def comment: Option[String] = existingProp(TableCatalog.PROP_COMMENT) + override def collation: Option[String] = existingProp(TableCatalog.PROP_COLLATION) + override def userProperties: Map[String, String] = existingProps - override def viewSchemaMode: ViewSchemaMode = existingCatalogTable.viewSchemaMode + // Read the schema binding mode directly from the properties map; shares decoding with + // the v1 path via `CatalogTable.viewSchemaModeFromProperties` (honors + // viewSchemaBindingEnabled and the same default when the property is absent). + override def viewSchemaMode: ViewSchemaMode = + CatalogTable.viewSchemaModeFromProperties(existingProps) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala index d7a65688c0ef2..678728b93386e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -157,18 +157,23 @@ case class CreateV2ViewExec( viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { override protected def run(): Seq[InternalRow] = { - val info = buildTableInfo() - - tryLoadTable().foreach { existing => - if (allowExisting) { - return Seq.empty - } - if (!isViewTable(existing)) { + // Probe the catalog before preparing the view body so `IF NOT EXISTS` short-circuits + // without running `aliasPlan` / `generateViewProperties`, matching v1 + // `CreateViewCommand.run`. Cyclic-reference detection is done at analysis time in + // `CheckViewReferences`. + val existing = tryLoadTable() + if (allowExisting && existing.isDefined) { + return Seq.empty + } + existing.foreach { table => + if (!isViewTable(table)) { throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( legacyName, replace) } if (!replace) throw viewAlreadyExists() - // Cyclic reference detection is done at analysis time in CheckViewReferences. + } + val info = buildTableInfo() + if (existing.isDefined) { CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) catalog.dropTable(identifier) } @@ -205,11 +210,10 @@ case class AtomicCreateV2ViewExec( DataSourceV2Utils.commitMetrics(sparkContext, catalog) override protected def run(): Seq[InternalRow] = { - // Validate first (mirrors v1 CreateViewCommand.run and the non-atomic exec above) so a - // CREATE VIEW IF NOT EXISTS v AS with existing v fails the same way in - // both execs: the malformed view body is rejected even when the allow-existing short- - // circuit would otherwise skip creation. - val info = buildTableInfo() + // Probe the catalog before preparing the view body so `IF NOT EXISTS` short-circuits + // without running `aliasPlan` / `generateViewProperties`, matching v1 + // `CreateViewCommand.run`. Cyclic-reference detection is done at analysis time in + // `CheckViewReferences`. val existing = tryLoadTable() if (allowExisting && existing.isDefined) { return Seq.empty @@ -220,8 +224,8 @@ case class AtomicCreateV2ViewExec( legacyName, replace) } } + val info = buildTableInfo() val staged: StagedTable = if (replace) { - // Cyclic reference detection is done at analysis time in CheckViewReferences. if (existing.isDefined) { CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 55ec13eb394e4..0a96699ffeec5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreePattern.SCALAR_SUBQUERY import org.apache.spark.sql.catalyst.util.{toPrettySQL, GeneratedColumn, IdentityColumn, ResolveDefaultColumns, ResolveTableConstraints, V2ExpressionBuilder} import org.apache.spark.sql.classic.SparkSession -import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableCatalogCapability, TruncatableTable, V1Table} +import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TruncatableTable, V1Table} import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.connector.catalog.index.SupportsIndex import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue} @@ -301,41 +301,28 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat qualifyLocInTableSpec(tableSpec), orCreate = orCreate, invalidateCache) :: Nil } + // The SUPPORTS_VIEW capability gate runs earlier in `CheckViewReferences`, so by the time + // these strategy cases fire the catalog is guaranteed to be a TableCatalog with the flag. case CreateView(ResolvedIdentifier(catalog, ident), userSpecifiedColumns, comment, collation, properties, originalText, child, allowExisting, replace, viewSchemaMode, _, _) => - // Gate on TableCatalog + SUPPORTS_VIEW together so non-TableCatalog plugins still - // surface the VIEWS-specific error (instead of the generic TABLES error that - // asTableCatalog would throw). - val tableCatalog = catalog match { - case tc: TableCatalog - if tc.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) => tc - case _ => throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) - } val sqlText = originalText.getOrElse { throw QueryCompilationErrors.createPersistedViewFromDatasetAPINotAllowedError() } - tableCatalog match { + catalog.asTableCatalog match { case staging: StagingTableCatalog => AtomicCreateV2ViewExec(staging, ident, userSpecifiedColumns, comment, collation, properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil - case _ => + case tableCatalog => CreateV2ViewExec(tableCatalog, ident, userSpecifiedColumns, comment, collation, properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil } case AlterViewAs(ResolvedPersistentView(catalog, ident, _), originalText, query, _, _) => - // Re-use the CREATE VIEW capability -- a catalog able to create views via createTable - // must also be able to replace them via dropTable+createTable or stageReplace. - val tableCatalog = catalog match { - case tc: TableCatalog - if tc.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) => tc - case _ => throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) - } - tableCatalog match { + catalog.asTableCatalog match { case staging: StagingTableCatalog => AtomicAlterV2ViewExec(staging, ident, originalText, query) :: Nil - case _ => + case tableCatalog => AlterV2ViewExec(tableCatalog, ident, originalText, query) :: Nil } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 4fb368c05ebf2..59f650c716fcd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -472,6 +472,43 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } + test("ALTER VIEW re-captures the current session's SQL configs") { + withTable("spark_catalog.default.t") { + Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + sql("CREATE VIEW view_catalog.default.v_configs AS " + + "SELECT col FROM spark_catalog.default.t") + } + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingViewCatalog] + val ansiKey = TableCatalog.VIEW_CONF_PREFIX + SQLConf.ANSI_ENABLED.key + assert(catalog.getStoredView(Array("default"), "v_configs").properties().get(ansiKey) + == "true") + + // ALTER under a different ANSI setting should replace the stored config, not merge. + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + sql("ALTER VIEW view_catalog.default.v_configs AS " + + "SELECT col FROM spark_catalog.default.t WHERE col = 'b'") + } + assert(catalog.getStoredView(Array("default"), "v_configs").properties().get(ansiKey) + == "false") + } + } + + test("CREATE OR REPLACE VIEW whose new body references a nonexistent table fails at " + + "analysis") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_replace_missing AS " + + "SELECT x FROM spark_catalog.default.t") + val ex = intercept[AnalysisException] { + sql("CREATE OR REPLACE VIEW view_catalog.default.v_replace_missing AS " + + "SELECT * FROM spark_catalog.default.does_not_exist") + } + assert(ex.getCondition == "TABLE_OR_VIEW_NOT_FOUND") + } + } + test("ALTER VIEW on a StagingTableCatalog uses the atomic exec (stageReplace)") { withSQLConf( "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { From da417f5f46480226140a8dc20ef99405fbdcf3f8 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 23 Apr 2026 02:48:58 +0000 Subject: [PATCH 22/59] address self-review findings: v2 SHOW VIEWS, orphan-plan pinning, API doc reconciliation Read path: - V1Table.toCatalogTable: gate viewText read on tableType == VIEW so a non-view MetadataOnlyTable with a stray PROP_VIEW_TEXT doesn't synthesize a non-view CatalogTable with non-None viewText. ALTER VIEW execs: - Replace `val _ = existingTable` (obscure lazy-val side effect) with a named `requireExistingView()` helper in V2AlterViewPreparation. - Race between analysis and exec (target dropped or replaced as a non-view between lookup and run) now surfaces as EXPECT_VIEW_NOT_TABLE instead of SparkException.internalError. - AtomicCreateV2ViewExec: reject plain CREATE on an existing view up front with viewAlreadyExists(), matching the non-atomic exec (non-atomic path relied on catalog-side TableAlreadyExistsException, which StagingTableCatalog doesn't formally require). Orphan-plan pinning: - Add DataSourceV2Strategy cases for v2-catalog plans that ResolveSessionCatalog no longer rewrites: SetViewProperties, UnsetViewProperties, AlterViewSchemaBinding, RenameTable, ShowCreateTable, ShowTableProperties, ShowColumns, DescribeRelation, DescribeColumn on ResolvedPersistentView. Each throws UNSUPPORTED_FEATURE.TABLE_OPERATION naming the statement, pinning the current UX until the follow-up PRs land. SHOW VIEWS for v2: - New ShowViewsExec enumerates via TableCatalog.listTableSummaries(namespace) and filters to TableSummary.VIEW_TABLE_TYPE; wired in DataSourceV2Strategy. - ResolveSessionCatalog's ShowViews handler now skips (via guard) for SUPPORTS_VIEW catalogs so they reach the v2 strategy; non-session, non- SUPPORTS_VIEW catalogs still get the MISSING_CATALOG_ABILITY.VIEWS rejection. API contract reconciliation: - Javadocs on TableCatalog.loadTable / dropTable / tableExists / alterTable / renameTable / listTables / purgeTable and StagingTableCatalog.stageCreate / stageReplace / stageCreateOrReplace now spell out the SUPPORTS_VIEW split: loadTable returns views as MetadataOnlyTable, dropTable/tableExists/listTables include views (listTables also includes views for v1 parity with SHOW TABLES), while alterTable / renameTable / purgeTable / versioned+timestamped loadTable remain table-only. - Add IdentifierHelper.asLegacyTableIdentifier(catalogName) to share the lossy multi-part -> v1 TableIdentifier idiom; use in V1Table.toCatalogTable, V2ViewPreparation.legacyName, CheckViewReferences.legacyNameFor. Misc: - ResolveSessionCatalog: rename local var `child` -> `query` in CreateView pattern to match the case-class field name; update the stale ResolvedViewIdentifier comment to describe the new v2-strategy behavior. Tests: - New multi-part namespace round-trip unit test in DataSourceV2MetadataOnlyViewSuite (Builder -> V1Table.toCatalogTable -> viewCatalogAndNamespace preserves [cat, db1, db2]). - Orphan-plan pinning tests: UNSUPPORTED_FEATURE.TABLE_OPERATION for SET/UNSET TBLPROPERTIES, WITH SCHEMA, RENAME TO, SHOW CREATE TABLE, SHOW TBLPROPERTIES, SHOW COLUMNS, DESCRIBE TABLE; clean AnalysisException for DESCRIBE COLUMN (fails at column resolution before reaching the strategy). - SHOW TABLES on a v2 catalog includes views (v1 parity); SHOW VIEWS returns only views; SHOW VIEWS with LIKE filter; SHOW VIEWS on non-SUPPORTS_VIEW rejected with MISSING_CATALOG_ABILITY.VIEWS. - TestingTableOnlyCatalog now round-trips a view-typed MetadataOnlyTable so the ALTER VIEW capability-gate test actually reaches the gate (expected MISSING_CATALOG_ABILITY.VIEWS), closing a coverage hole. Co-authored-by: Isaac --- .../catalog/StagingTableCatalog.java | 13 ++ .../sql/connector/catalog/TableCatalog.java | 84 ++++--- .../catalyst/plans/logical/v2Commands.scala | 8 +- .../catalog/CatalogV2Implicits.scala | 11 + .../spark/sql/connector/catalog/V1Table.scala | 23 +- .../analysis/ResolveSessionCatalog.scala | 24 +- .../spark/sql/execution/command/views.scala | 5 +- .../datasources/v2/AlterV2ViewExec.scala | 38 ++-- .../datasources/v2/CreateV2ViewExec.scala | 13 +- .../datasources/v2/DataSourceV2Strategy.scala | 50 +++- .../datasources/v2/ShowViewsExec.scala | 56 +++++ .../DataSourceV2MetadataOnlyViewSuite.scala | 214 ++++++++++++++++-- 12 files changed, 454 insertions(+), 85 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java index 6811ea380b3ae..05edc9033279b 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java @@ -95,6 +95,10 @@ default StagedTable stageCreate( * table exists when this method is called, the method should throw an exception accordingly. If * another process concurrently creates the table before this table's staged changes are * committed, an exception should be thrown by {@link StagedTable#commitStagedChanges()}. + *

+ * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} also route atomic v2 + * {@code CREATE VIEW} through this method when {@code tableInfo.properties()} includes + * {@link TableCatalog#PROP_VIEW_TEXT}. * * @param ident a table identifier * @param tableInfo information about the table @@ -159,6 +163,11 @@ default StagedTable stageReplace( * {@link #stageCreateOrReplace(Identifier, StructType, Transform[], Map)}, which should create * the table in the data source if the table does not exist at the time of committing the * operation. + *

+ * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} also route atomic v2 + * {@code ALTER VIEW ... AS} through this method when {@code tableInfo.properties()} includes + * {@link TableCatalog#PROP_VIEW_TEXT}; the existing entry at {@code ident} is expected to be + * a view. * * @param ident a table identifier * @param tableInfo information about the table @@ -222,6 +231,10 @@ default StagedTable stageCreateOrReplace( * backing data source. This differs from the expected semantics of * {@link #stageReplace(Identifier, StructType, Transform[], Map)}, which should fail when * the staged changes are committed but the table doesn't exist at commit time. + *

+ * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} also route atomic v2 + * {@code CREATE OR REPLACE VIEW} through this method when {@code tableInfo.properties()} + * includes {@link TableCatalog#PROP_VIEW_TEXT}. * * @param ident a table identifier * @param tableInfo information about the table diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index e60bd5b496b0d..05e22cb11ae3f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -123,10 +123,15 @@ public interface TableCatalog extends CatalogPlugin { /** * List the tables in a namespace from the catalog. *

- * If the catalog supports views, this must return identifiers for only tables and not views. + * Includes views. Like v1 {@code ShowTablesCommand}, the output of Spark's + * {@code SHOW TABLES} includes permanent views alongside tables; for catalogs that declare + * {@link TableCatalogCapability#SUPPORTS_VIEW} this method must therefore return identifiers + * for both tables and views, mirroring the v1 session-catalog behavior. Callers that need to + * tell tables and views apart should use {@link #listTableSummaries} and read + * {@link TableSummary#tableType()}. * * @param namespace a multi-part namespace - * @return an array of Identifiers for tables + * @return an array of Identifiers for tables and (for SUPPORTS_VIEW catalogs) views * @throws NoSuchNamespaceException If the namespace does not exist (optional). */ Identifier[] listTables(String[] namespace) throws NoSuchNamespaceException; @@ -134,11 +139,19 @@ public interface TableCatalog extends CatalogPlugin { /** * List the table summaries in a namespace from the catalog. *

- * This method should return all tables entities from a catalog regardless of type (i.e. views - * should be listed as well). + * This method should return all entities from the namespace regardless of type (tables AND + * views). Each returned {@link TableSummary} carries the entity's {@code tableType} + * (e.g. {@link TableSummary#VIEW_TABLE_TYPE VIEW_TABLE_TYPE}), which is what callers use to + * tell tables and views apart. + *

+ * The default implementation enumerates via {@link #listTables} + {@link #loadTable}, which + * works for SUPPORTS_VIEW catalogs because {@code listTables} also returns view identifiers + * and {@code loadTable} returns a view-typed {@link MetadataOnlyTable} for each. Catalogs + * that can fetch summaries in a single round-trip should override this method for + * efficiency. * * @param namespace a multi-part namespace - * @return an array of Identifiers for tables + * @return an array of summaries for tables and views in the namespace * @throws NoSuchNamespaceException If the namespace does not exist (optional). * @throws NoSuchTableException If certain table listed by listTables API does not exist. */ @@ -163,12 +176,15 @@ default TableSummary[] listTableSummaries(String[] namespace) /** * Load table metadata by {@link Identifier identifier} from the catalog. *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must throw {@link NoSuchTableException}. + * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} must return the view + * as a {@link MetadataOnlyTable} when {@code ident} resolves to a view, so Spark's view + * resolution path can expand the view text. Catalogs that do not declare + * {@code SUPPORTS_VIEW} must throw {@link NoSuchTableException} for a view identifier. * * @param ident a table identifier * @return the table's metadata - * @throws NoSuchTableException If the table doesn't exist or is a view + * @throws NoSuchTableException If the table doesn't exist, or is a view and the catalog + * does not declare {@link TableCatalogCapability#SUPPORTS_VIEW} */ Table loadTable(Identifier ident) throws NoSuchTableException; @@ -176,13 +192,13 @@ default TableSummary[] listTableSummaries(String[] namespace) * Load table metadata by {@link Identifier identifier} from the catalog. Spark will write data * into this table later. *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must throw {@link NoSuchTableException}. + * Contract for views matches {@link #loadTable(Identifier)}. * * @param ident a table identifier * @param writePrivileges * @return the table's metadata - * @throws NoSuchTableException If the table doesn't exist or is a view + * @throws NoSuchTableException If the table doesn't exist or is a view (see + * {@link #loadTable(Identifier)} for the view contract) * * @since 3.5.3 */ @@ -195,8 +211,9 @@ default Table loadTable( /** * Load table metadata of a specific version by {@link Identifier identifier} from the catalog. *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must throw {@link NoSuchTableException}. + * Time-travel targets a versioned table, not a view. This must throw + * {@link NoSuchTableException} for a view identifier regardless of whether the catalog + * declares {@link TableCatalogCapability#SUPPORTS_VIEW}. * * @param ident a table identifier * @param version version of the table @@ -210,8 +227,9 @@ default Table loadTable(Identifier ident, String version) throws NoSuchTableExce /** * Load table metadata at a specific time by {@link Identifier identifier} from the catalog. *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must throw {@link NoSuchTableException}. + * Time-travel targets a versioned table, not a view. This must throw + * {@link NoSuchTableException} for a view identifier regardless of whether the catalog + * declares {@link TableCatalogCapability#SUPPORTS_VIEW}. * * @param ident a table identifier * @param timestamp timestamp of the table, which is microseconds since 1970-01-01 00:00:00 UTC @@ -256,8 +274,10 @@ default void invalidateTable(Identifier ident) { /** * Test whether a table exists using an {@link Identifier identifier} from the catalog. *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must return false. + * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} manage views through the + * same identifier space as tables; for such catalogs this method must return {@code true} + * for a view identifier (mirroring {@link #loadTable(Identifier)}). Catalogs that do not + * declare {@code SUPPORTS_VIEW} must return {@code false} for a view identifier. * * @param ident a table identifier * @return true if the table exists, false otherwise @@ -367,8 +387,10 @@ default boolean useNullableQuerySchema() { *

* The requested changes must be applied in the order given. *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must throw {@link NoSuchTableException}. + * {@code alterTable} targets tables only. Even for catalogs that declare + * {@link TableCatalogCapability#SUPPORTS_VIEW}, this must throw {@link NoSuchTableException} + * when {@code ident} resolves to a view. View-specific DDL (CREATE / ALTER ... AS) goes + * through {@link #createTable(Identifier, TableInfo)} for SUPPORTS_VIEW catalogs. * * @param ident a table identifier * @param changes changes to apply to the table @@ -385,11 +407,16 @@ Table alterTable( /** * Drop a table in the catalog. *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must not drop the view and must return false. + * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} manage views through the + * same identifier space as tables; for such catalogs this method must also drop views at + * {@code ident} and return {@code true}. Spark's non-atomic v2 {@code ALTER VIEW ... AS} path + * relies on this ({@code dropTable} + {@code createTable}). Catalogs that do not declare + * {@code SUPPORTS_VIEW} must not drop a view and must return {@code false} for a view + * identifier. * * @param ident a table identifier - * @return true if a table was deleted, false if no table exists for the identifier + * @return true if a table (or, for SUPPORTS_VIEW catalogs, a view) was deleted, + * false if no such entry exists for the identifier */ boolean dropTable(Identifier ident); @@ -397,8 +424,9 @@ Table alterTable( * Drop a table in the catalog and completely remove its data by skipping a trash even if it is * supported. *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must not drop the view and must return false. + * {@code purgeTable} targets tables only. Even for catalogs that declare + * {@link TableCatalogCapability#SUPPORTS_VIEW}, this must not drop a view and must return + * {@code false} for a view identifier -- purge semantics (data removal) do not apply to views. *

* If the catalog supports to purge a table, this method should be overridden. * The default implementation throws {@link UnsupportedOperationException}. @@ -416,9 +444,11 @@ default boolean purgeTable(Identifier ident) throws UnsupportedOperationExceptio /** * Renames a table in the catalog. *

- * If the catalog supports views and contains a view for the old identifier and not a table, this - * throws {@link NoSuchTableException}. Additionally, if the new identifier is a table or a view, - * this throws {@link TableAlreadyExistsException}. + * {@code renameTable} targets tables only -- v2 {@code ALTER VIEW ... RENAME TO} is tracked + * as a separate follow-up and is not routed here today. Even for catalogs that declare + * {@link TableCatalogCapability#SUPPORTS_VIEW}, this must throw {@link NoSuchTableException} + * when {@code oldIdent} resolves to a view, and must throw + * {@link TableAlreadyExistsException} if {@code newIdent} resolves to a table or a view. *

* If the catalog does not support table renames between namespaces, it throws * {@link UnsupportedOperationException}. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index cafffdc7db823..5826da9fc8402 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -1351,8 +1351,12 @@ case class ShowTablePartition( /** * The logical plan of the SHOW VIEWS command. * - * Notes: v2 catalogs do not support views API yet, the command will fallback to - * v1 ShowViewsCommand during ResolveSessionCatalog. + * Session-catalog targets fall back to v1 [[org.apache.spark.sql.execution.command + * .ShowViewsCommand]] via `ResolveSessionCatalog`. v2 catalogs that declare + * [[org.apache.spark.sql.connector.catalog.TableCatalogCapability#SUPPORTS_VIEW]] are handled + * in `DataSourceV2Strategy` (enumerates via `listTableSummaries` filtered to + * `VIEW_TABLE_TYPE`). Non-SUPPORTS_VIEW v2 catalogs are rejected up front in + * `ResolveSessionCatalog` with `MISSING_CATALOG_ABILITY.VIEWS`. */ case class ShowViews( namespace: LogicalPlan, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala index cf6052009c927..b11f06bf58159 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala @@ -171,6 +171,17 @@ private[sql] object CatalogV2Implicits { throw QueryCompilationErrors.requiresSinglePartNamespaceError(asMultipartIdentifier) } + /** + * Build a v1 [[TableIdentifier]] for display / error-rendering purposes. Collapses a + * multi-part namespace to its last segment (v1 [[TableIdentifier]] has a single-string + * database field). Callers that need a lossless multi-part form should build a + * `Seq[String]` from [[toQualifiedNameParts]] instead. + */ + def asLegacyTableIdentifier(catalogName: String): TableIdentifier = TableIdentifier( + table = ident.name(), + database = ident.namespace().lastOption, + catalog = Some(catalogName)) + /** * Tries to convert catalog identifier to the table identifier. Table identifier does not * support multiple namespaces (nested namespaces), so if identifier contains nested namespace, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index 81af259e08d53..e12e823a0c30a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -22,7 +22,6 @@ import java.util import scala.collection.mutable import scala.jdk.CollectionConverters._ -import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils, ClusterBySpec} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ @@ -126,7 +125,14 @@ private[sql] object V1Table { case Some(TableSummary.MANAGED_TABLE_TYPE) => CatalogTableType.MANAGED case _ => CatalogTableType.EXTERNAL } - val viewText = props.get(TableCatalog.PROP_VIEW_TEXT) + // Only expose viewText when this table actually is a view; otherwise downstream callers that + // use `catalogTable.viewText.isDefined` as an "is-view" proxy would misclassify a + // misconfigured table entry. + val viewText = if (tableType == CatalogTableType.VIEW) { + props.get(TableCatalog.PROP_VIEW_TEXT) + } else { + None + } // Reserved keys are promoted to first-class CatalogTable fields; strip them from the // user-visible properties map so they're not double-persisted or leaked into the serde bag. val userProps = props -- CatalogV2Util.TABLE_RESERVED_PROPERTIES @@ -150,14 +156,11 @@ private[sql] object V1Table { Map.empty[String, String] } CatalogTable( - // CatalogTable.identifier uses a single-string database; for multi-part namespaces we - // preserve only the last part here and record the full multi-part form in - // `multipartIdentifier` below. Callers needing the real fully-qualified name (e.g. cyclic - // view detection) should read `CatalogTable.fullIdent`. - identifier = TableIdentifier( - table = ident.name(), - database = ident.namespace().lastOption, - catalog = Some(catalog.name())), + // `asLegacyTableIdentifier` collapses multi-part namespaces to their last segment (v1 + // limitation). We record the full multi-part form in `multipartIdentifier` below; + // callers needing the real fully-qualified name (e.g. cyclic view detection) should + // read `CatalogTable.fullIdent`. + identifier = ident.asLegacyTableIdentifier(catalog.name()), tableType = tableType, storage = CatalogStorageFormat.empty.copy( locationUri = props.get(TableCatalog.PROP_LOCATION).map(CatalogUtils.stringToURI), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 4707b7f30b3f7..df20d7e3187b6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, CharVarcharUtils, ResolveDefaultColumns => DefaultCols} import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._ -import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table} +import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, TableCatalog, TableCatalogCapability, V1Table} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.execution.command._ @@ -524,7 +524,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) // CreateViewCommand is a separate AnalysisOnlyCommand and gets its own markAsAnalyzed pass // from HandleSpecialCommand after this rewrite. case CreateView(CreateViewInSessionCatalog(ident), userSpecifiedColumns, comment, - collation, properties, originalText, child, allowExisting, replace, viewSchemaMode, + collation, properties, originalText, query, allowExisting, replace, viewSchemaMode, _, _) => CreateViewCommand( name = ident, @@ -533,13 +533,17 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) collation = collation, properties = properties, originalText = originalText, - plan = child, + plan = query, allowExisting = allowExisting, replace = replace, viewType = PersistedView, viewSchemaMode = viewSchemaMode) - case ShowViews(ns: ResolvedNamespace, pattern, output) => + // SUPPORTS_VIEW catalogs are handled by the v2 strategy (enumerates via + // listTableSummaries); we skip the match here so the plan flows through unchanged. Only + // non-session, non-SUPPORTS_VIEW catalogs hit the MISSING_CATALOG_ABILITY.VIEWS rejection. + case ShowViews(ns: ResolvedNamespace, pattern, output) + if !isSupportsViewCatalog(ns.catalog) => ns match { case ResolvedDatabaseInSessionCatalog(db) => ShowViewsCommand(db, pattern, output) case _ => @@ -771,8 +775,11 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) object ResolvedViewIdentifier { // Only matches session-catalog persistent views. Non-session-catalog persistent views - // (produced for `MetadataOnlyTable`) fall through so they can be picked up by v2 strategies - // rather than silently collapsed to a v1 `TableIdentifier`. + // (produced for `MetadataOnlyTable`) fall through; `AlterViewAs` is picked up by the v2 + // strategy, and the remaining view DDL / inspection plans (SET/UNSET TBLPROPERTIES, + // ALTER VIEW ... WITH SCHEMA, RENAME TO, SHOW CREATE TABLE, SHOW TBLPROPERTIES, SHOW + // COLUMNS, DESCRIBE [COLUMN]) are rejected with `UNSUPPORTED_FEATURE.TABLE_OPERATION` by + // dedicated v2 strategy cases -- tracked for a follow-up PR (SPARK-52729). def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match { case ResolvedPersistentView(catalog, ident, _) if isSessionCatalog(catalog) => Some(ident.asTableIdentifier.copy(catalog = Some(catalog.name))) @@ -938,4 +945,9 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) SQLConf.get.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION) == "builtin" || catalog.isInstanceOf[CatalogExtension]) } + + private def isSupportsViewCatalog(catalog: CatalogPlugin): Boolean = catalog match { + case tc: TableCatalog => tc.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) + case _ => false + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 73fdbf8927f59..172582e8f1def 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -886,10 +886,7 @@ object CheckViewReferences extends (LogicalPlan => Unit) { private def legacyNameFor(resolved: LogicalPlan): TableIdentifier = { val (catalog, ident) = catalogAndIdent(resolved) - TableIdentifier( - table = ident.name(), - database = ident.namespace().lastOption, - catalog = Some(catalog.name())) + ident.asLegacyTableIdentifier(catalog.name()) } private def fullIdentFor(resolved: LogicalPlan): Seq[String] = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala index c639a79309e7e..3754a5153507f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.jdk.CollectionConverters._ -import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, ResolvedIdentifier, TableAlreadyExistsException, ViewSchemaMode} import org.apache.spark.sql.catalyst.catalog.CatalogTable @@ -34,12 +33,10 @@ import org.apache.spark.util.Utils /** * Shared bits for the v2 ALTER VIEW ... AS execs. Loads the existing view once via * `existingTable` and uses its properties to preserve user-set properties, comment, collation, - * and schema-binding mode when constructing the replacement `TableInfo`. A v2 identifier that - * does not resolve to a view-typed [[MetadataOnlyTable]] is a catalog contract violation -- - * the analyzer has already verified the target is a view, and `SUPPORTS_VIEW` catalogs must - * round-trip `MetadataOnlyTable` from `loadTable`. The only way to hit the internal-error - * branch is a racing DDL between analysis and exec, which is rare enough that we surface it - * as an internal error rather than a user-facing one. + * and schema-binding mode when constructing the replacement `TableInfo`. A racing DDL between + * analysis and exec can change the target out from under us (dropped, or replaced with a + * non-view table); in that case we surface a regular no-such-table / not-a-view analysis + * error rather than propagating a stale analyzer decision. * * `generateViewProperties` (invoked from `buildTableInfo`) strips the transient view keys * (SQL configs, query column names, referred-temp names) from the inherited properties and @@ -55,10 +52,15 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { } table match { case mot: MetadataOnlyTable if isViewTable(mot) => mot - case other => - throw SparkException.internalError( - s"Expected a view-typed MetadataOnlyTable from ${catalog.name()} for " + - s"${identifier.quoted}, got ${other.getClass.getName}") + case _ => + // Analyzer verified this was a view, but a racing DDL (drop + recreate as a + // non-view table, or a catalog that now returns a different Table subclass for this + // identifier) can invalidate that. Surface as a user-facing error. + throw QueryCompilationErrors.expectViewNotTableError( + (catalog.name() +: identifier.asMultipartIdentifier).toSeq, + cmd = "ALTER VIEW ... AS", + suggestAlternative = false, + t = this) } } @@ -86,6 +88,14 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { // viewSchemaBindingEnabled and the same default when the property is absent). override def viewSchemaMode: ViewSchemaMode = CatalogTable.viewSchemaModeFromProperties(existingProps) + + /** + * Force-evaluate `existingTable` so `NoSuchTableException` / `expectViewNotTableError` + * surfaces before any other work (e.g. `buildTableInfo`, uncache, drop). The result is + * intentionally discarded; call this purely for its side effect of materializing the + * lazy val. + */ + protected def requireExistingView(): Unit = existingTable } /** @@ -100,9 +110,7 @@ case class AlterV2ViewExec( query: LogicalPlan) extends V2AlterViewPreparation { override protected def run(): Seq[InternalRow] = { - // Force evaluation of the existingTable lazy val so NoSuchTableException surfaces before - // we do any other work. - val _ = existingTable + requireExistingView() val info = buildTableInfo() // Cyclic reference detection is done at analysis time in CheckViewReferences. CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) @@ -132,7 +140,7 @@ case class AtomicAlterV2ViewExec( DataSourceV2Utils.commitMetrics(sparkContext, catalog) override protected def run(): Seq[InternalRow] = { - val _ = existingTable + requireExistingView() val info = buildTableInfo() // Cyclic reference detection is done at analysis time in CheckViewReferences. CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala index 678728b93386e..6482f6f6f8f21 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, QuotingUtils} import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableInfo, TableSummary} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command.{CommandUtils, ViewHelper} import org.apache.spark.sql.execution.metric.SQLMetric @@ -56,10 +57,8 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { // Build a synthetic v1 TableIdentifier for error messages and for ViewHelper methods that // accept it purely for rendering. This carries no semantic weight - the v2 Identifier is the // actual target. - protected lazy val legacyName: TableIdentifier = TableIdentifier( - table = identifier.name(), - database = identifier.namespace().lastOption, - catalog = Some(catalog.name())) + protected lazy val legacyName: TableIdentifier = + identifier.asLegacyTableIdentifier(catalog.name()) override def output: Seq[Attribute] = Seq.empty @@ -223,6 +222,9 @@ case class AtomicCreateV2ViewExec( throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( legacyName, replace) } + // Match the non-atomic exec: reject plain CREATE against an existing view up front + // rather than relying on `stageCreate` to throw. + if (!replace) throw viewAlreadyExists() } val info = buildTableInfo() val staged: StagedTable = if (replace) { @@ -231,6 +233,9 @@ case class AtomicCreateV2ViewExec( } catalog.stageCreateOrReplace(identifier, info) } else { + // TOCTOU: a concurrent writer can create an entry between `tryLoadTable` and + // `stageCreate`; translate the catalog's `TableAlreadyExistsException` into the same + // view-already-exists error the fast-path uses. try { catalog.stageCreate(identifier, info) } catch { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 0a96699ffeec5..4289b9f7eb332 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreePattern.SCALAR_SUBQUERY import org.apache.spark.sql.catalyst.util.{toPrettySQL, GeneratedColumn, IdentityColumn, ResolveDefaultColumns, ResolveTableConstraints, V2ExpressionBuilder} import org.apache.spark.sql.classic.SparkSession -import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TruncatableTable, V1Table} +import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableCatalogCapability, TruncatableTable, V1Table} import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.connector.catalog.index.SupportsIndex import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue} @@ -326,6 +326,47 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat AlterV2ViewExec(tableCatalog, ident, originalText, query) :: Nil } + // View DDL / inspection on a non-session v2 catalog that the v1 rewrite in + // `ResolveSessionCatalog` can't handle. These are tracked as follow-up work in SPARK-52729; + // pin the current failure mode with a clean `UNSUPPORTED_FEATURE.TABLE_OPERATION` error + // so users get a meaningful message (and test coverage catches a future regression to a + // generic planner error). + case SetViewProperties(ResolvedPersistentView(catalog, ident, _), _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "ALTER VIEW ... SET TBLPROPERTIES") + + case UnsetViewProperties(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "ALTER VIEW ... UNSET TBLPROPERTIES") + + case AlterViewSchemaBinding(ResolvedPersistentView(catalog, ident, _), _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "ALTER VIEW ... WITH SCHEMA") + + case RenameTable(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "ALTER VIEW ... RENAME TO") + + case ShowCreateTable(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "SHOW CREATE TABLE") + + case ShowTableProperties(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "SHOW TBLPROPERTIES") + + case ShowColumns(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "SHOW COLUMNS") + + case DescribeRelation(ResolvedPersistentView(catalog, ident, _), _, _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "DESCRIBE TABLE") + + case DescribeColumn(ResolvedPersistentView(catalog, ident, _), _, _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "DESCRIBE TABLE ... COLUMN") + case ReplaceTableAsSelect(ResolvedIdentifier(catalog, ident), parts, query, tableSpec: TableSpec, options, orCreate, true) => catalog match { @@ -517,6 +558,13 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case ShowTables(ResolvedNamespace(catalog, ns, _), pattern, output) => ShowTablesExec(output, catalog.asTableCatalog, ns, pattern) :: Nil + // SHOW VIEWS on a non-session v2 catalog. Session-catalog targets are rewritten to v1 + // `ShowViewsCommand` by `ResolveSessionCatalog`; non-SUPPORTS_VIEW catalogs are rejected + // there too. This case only sees non-session SUPPORTS_VIEW catalogs. + case ShowViews(ResolvedNamespace(catalog: TableCatalog, ns, _), pattern, output) + if catalog.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) => + ShowViewsExec(output, catalog, ns, pattern) :: Nil + case ShowTablesExtended( ResolvedNamespace(catalog, ns, _), pattern, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala new file mode 100644 index 0000000000000..26ca6a819f55b --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.util.StringUtils +import org.apache.spark.sql.connector.catalog.{TableCatalog, TableSummary} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper +import org.apache.spark.sql.execution.LeafExecNode + +/** + * Physical plan node for SHOW VIEWS on a v2 catalog that declares + * [[org.apache.spark.sql.connector.catalog.TableCatalogCapability#SUPPORTS_VIEW]]. + * + * Enumerates via [[TableCatalog#listTableSummaries]] and filters to + * [[TableSummary#VIEW_TABLE_TYPE]]. v2 catalogs have no temp views, so the `isTemporary` + * column is always false -- mirroring v1 `ShowViewsCommand`, which sets `isTemporary=true` + * only for local/global temp views that live in the session catalog. + */ +case class ShowViewsExec( + output: Seq[Attribute], + catalog: TableCatalog, + namespace: Seq[String], + pattern: Option[String]) extends V2CommandExec with LeafExecNode { + override protected def run(): Seq[InternalRow] = { + val rows = new ArrayBuffer[InternalRow]() + val summaries = catalog.listTableSummaries(namespace.toArray) + summaries.foreach { summary => + val ident = summary.identifier + val nameMatches = + pattern.forall(p => StringUtils.filterPattern(Seq(ident.name), p).nonEmpty) + if (TableSummary.VIEW_TABLE_TYPE == summary.tableType && nameMatches) { + rows += toCatalystRow(ident.namespace().quoted, ident.name(), false) + } + } + rows.toSeq + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 59f650c716fcd..3e47b0cbdf19f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.connector import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException} -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableCatalogCapability, TableChange, TableInfo, TableSummary} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableCatalogCapability, TableChange, TableInfo, TableSummary, V1Table} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType @@ -101,6 +101,36 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) } + test("multi-part captured namespace round-trips through V1Table.toCatalogTable") { + // End-to-end coverage of the v2 encoder -> parser round-trip for multi-level namespaces: + // (a) TableInfo.Builder serializes (cat, Array(db1, db2)) into a quoted multi-part + // identifier, (b) V1Table.toCatalogTable parses it back via parseMultipartIdentifier, and + // (c) the resulting CatalogTable exposes the full (cat, db1, db2) via + // viewCatalogAndNamespace -- which is what the v1 view-resolution path consumes to expand + // unqualified references in the view body. + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT col FROM t") + .withCurrentCatalogAndNamespace("my_cat", Array("db1", "db2")) + .build() + val motTable = new MetadataOnlyTable(info) + // Any CatalogPlugin works here; toCatalogTable only reads `catalog.name()`. + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + val ct = V1Table.toCatalogTable( + catalog, Identifier.of(Array("ns"), "v"), motTable) + assert(ct.viewCatalogAndNamespace == Seq("my_cat", "db1", "db2")) + + // And for a namespace part that needs backtick-quoting. + val infoWeird = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT col FROM t") + .withCurrentCatalogAndNamespace("my_cat", Array("weird.db", "normal")) + .build() + val ctWeird = V1Table.toCatalogTable( + catalog, Identifier.of(Array("ns"), "v"), new MetadataOnlyTable(infoWeird)) + assert(ctWeird.viewCatalogAndNamespace == Seq("my_cat", "weird.db", "normal")) + } + test("withCurrentCatalogAndNamespace clears the property when catalog is null or empty") { val infoNull = new TableInfo.Builder() .withSchema(new StructType().add("col", "string")) @@ -527,17 +557,18 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } - test("ALTER VIEW on a catalog without SUPPORTS_VIEW fails") { - // An identifier the TestingTableOnlyCatalog can't find -- we never get past the view - // lookup stage, so the error here is the no-such-table / not-a-view path. The capability - // gate in DataSourceV2Strategy is only reachable once the existing view is resolvable, - // which this catalog can't do; the capability rejection is already exercised by the - // CREATE VIEW test above. + test("ALTER VIEW on a catalog without SUPPORTS_VIEW fails with MISSING_CATALOG_ABILITY") { + // TestingTableOnlyCatalog does NOT declare SUPPORTS_VIEW but DOES round-trip + // `default.v` as a view-typed MetadataOnlyTable, so view resolution succeeds and we + // reach the capability gate in `CheckViewReferences`. Verifies the gate fires on the + // ALTER path (not only on CREATE), which would otherwise silently regress if + // `SUPPORTS_VIEW` got added to the default capability set. withSQLConf( "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { - intercept[AnalysisException] { - sql("ALTER VIEW no_view_catalog.default.v AS SELECT 1") + val ex = intercept[AnalysisException] { + sql("ALTER VIEW no_view_catalog.default.v AS SELECT 1 AS x") } + assert(ex.getCondition == "MISSING_CATALOG_ABILITY.VIEWS") } } @@ -594,6 +625,131 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } + // --- Follow-up-blocked view DDL / inspection on a non-session v2 catalog ------------ + // These plans don't have a dedicated v2 strategy yet (tracked for a follow-up PR). We pin + // the current failure mode -- UNSUPPORTED_FEATURE.TABLE_OPERATION with a statement-specific + // operation string -- so a future generic "no plan found" regression would surface here + // rather than silently degrading the UX. + + private def seedV2View(name: String): Unit = { + sql(s"CREATE VIEW view_catalog.default.$name AS SELECT 1 AS x") + } + + private def assertUnsupportedViewOp(statement: String): Unit = { + val ex = intercept[AnalysisException](sql(statement)) + assert(ex.getCondition == "UNSUPPORTED_FEATURE.TABLE_OPERATION", s"got ${ex.getCondition}") + } + + test("ALTER VIEW ... SET TBLPROPERTIES on a v2 view is rejected") { + seedV2View("v_set_props") + assertUnsupportedViewOp( + "ALTER VIEW view_catalog.default.v_set_props SET TBLPROPERTIES ('k' = 'v')") + } + + test("ALTER VIEW ... UNSET TBLPROPERTIES on a v2 view is rejected") { + seedV2View("v_unset_props") + assertUnsupportedViewOp( + "ALTER VIEW view_catalog.default.v_unset_props UNSET TBLPROPERTIES ('k')") + } + + test("ALTER VIEW ... WITH SCHEMA on a v2 view is rejected") { + seedV2View("v_schema_binding") + assertUnsupportedViewOp( + "ALTER VIEW view_catalog.default.v_schema_binding WITH SCHEMA EVOLUTION") + } + + test("ALTER VIEW ... RENAME TO on a v2 view is rejected") { + seedV2View("v_rename") + assertUnsupportedViewOp( + "ALTER VIEW view_catalog.default.v_rename RENAME TO view_catalog.default.v_renamed") + } + + test("SHOW CREATE TABLE on a v2 view is rejected") { + seedV2View("v_show_create") + assertUnsupportedViewOp("SHOW CREATE TABLE view_catalog.default.v_show_create") + } + + test("SHOW TBLPROPERTIES on a v2 view is rejected") { + seedV2View("v_show_props") + assertUnsupportedViewOp("SHOW TBLPROPERTIES view_catalog.default.v_show_props") + } + + test("SHOW COLUMNS on a v2 view is rejected") { + seedV2View("v_show_cols") + assertUnsupportedViewOp("SHOW COLUMNS IN view_catalog.default.v_show_cols") + } + + test("DESCRIBE TABLE on a v2 view is rejected") { + seedV2View("v_describe") + assertUnsupportedViewOp("DESCRIBE TABLE view_catalog.default.v_describe") + } + + test("DESCRIBE TABLE ... COLUMN on a v2 view is rejected") { + seedV2View("v_describe_col") + // Column resolution against a v2 view's output isn't wired up yet, so the analyzer fails + // with UNRESOLVED_COLUMN before reaching the planner. That's still a clean + // AnalysisException (not a generic "no plan found"), which is the pin we care about. + intercept[AnalysisException]( + sql("DESCRIBE TABLE view_catalog.default.v_describe_col x")) + } + + // --- SHOW TABLES / SHOW VIEWS on a v2 catalog -------------------------------- + + private def seedV2Table(name: String): Unit = { + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingViewCatalog] + catalog.createTable( + Identifier.of(Array("default"), name), + new TableInfo.Builder() + .withSchema(new StructType().add("x", "int")) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build()) + } + + test("SHOW TABLES on a v2 catalog includes views (v1 parity)") { + // v1 SHOW TABLES returns both tables and views; the `isTemporary` column distinguishes + // temp views from everything else. v2 catalogs have no temp views, so `isTemporary` is + // always false -- tables and permanent views are indistinguishable at the row level, but + // both must appear (callers that want only tables should use listTableSummaries and + // filter). + seedV2View("v_in_show_tables") + seedV2Table("t_in_show_tables") + val rows = sql("SHOW TABLES IN view_catalog.default").collect() + val names = rows.map(_.getString(1)).toSet + assert(names.contains("v_in_show_tables"), s"view missing from SHOW TABLES: $names") + assert(names.contains("t_in_show_tables"), s"table missing from SHOW TABLES: $names") + rows.foreach(r => assert(!r.getBoolean(2), s"isTemporary must be false: $r")) + } + + test("SHOW VIEWS on a v2 catalog returns only views") { + seedV2View("v_in_show_views") + seedV2Table("t_not_in_show_views") + val rows = sql("SHOW VIEWS IN view_catalog.default").collect() + val names = rows.map(_.getString(1)).toSet + assert(names.contains("v_in_show_views"), s"view missing: $names") + assert(!names.contains("t_not_in_show_views"), + s"non-view leaked into SHOW VIEWS: $names") + rows.foreach(r => assert(!r.getBoolean(2), s"isTemporary must be false for v2: $r")) + } + + test("SHOW VIEWS with LIKE pattern filters on the view name") { + seedV2View("v_foo") + seedV2View("v_bar") + val rows = sql("SHOW VIEWS IN view_catalog.default LIKE 'v_foo'").collect() + val names = rows.map(_.getString(1)).toSet + assert(names == Set("v_foo"), s"expected only v_foo, got $names") + } + + test("SHOW VIEWS on a catalog without SUPPORTS_VIEW is rejected") { + withSQLConf( + "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { + val ex = intercept[AnalysisException] { + sql("SHOW VIEWS IN no_view_catalog.default") + } + assert(ex.getCondition == "MISSING_CATALOG_ABILITY.VIEWS") + } + } + test("ALTER VIEW detects cyclic view references") { withTable("spark_catalog.default.t") { Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") @@ -612,12 +768,15 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio /** * A [[TableCatalog]] that supports SUPPORTS_VIEW: round-trips [[MetadataOnlyTable]] for created - * views (via `createTable` / `dropTable` / `tableExists`) and exposes two canned read-only - * fixtures (`test_view`, `test_unqualified_view`) used by the view-read tests. + * views and tables (via `createTable` / `dropTable` / `tableExists` / `listTables`) and exposes + * two canned read-only fixtures (`test_view`, `test_unqualified_view`) used by the view-read + * tests. Entries created via `createTable` can be either tables or views -- their + * [[TableCatalog#PROP_TABLE_TYPE]] property is what distinguishes them. */ class TestingViewCatalog extends TableCatalog { - // Holds views created via createTable within the session. Keyed by (namespace, name). + // Holds entries (views and tables) created via createTable within the session. Keyed by + // (namespace, name); PROP_TABLE_TYPE in the stored TableInfo distinguishes views from tables. private val createdViews = new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() @@ -683,7 +842,15 @@ class TestingViewCatalog extends TableCatalog { throw new RuntimeException("shouldn't be called") } override def listTables(namespace: Array[String]): Array[Identifier] = { - throw new RuntimeException("shouldn't be called") + // Per the TableCatalog contract (v1 parity), this returns identifiers for both tables and + // views; `listTableSummaries` (default impl: listTables + loadTable + read PROP_TABLE_TYPE) + // is what distinguishes them. + val targetNs = namespace.toSeq + val ids = new java.util.ArrayList[Identifier]() + createdViews.forEach { (key, _) => + if (key._1 == targetNs) ids.add(Identifier.of(key._1.toArray, key._2)) + } + ids.toArray(new Array[Identifier](0)) } private var catalogName = "" @@ -759,11 +926,26 @@ private class RecordingStagedTable( } /** - * A v2 catalog that does not declare SUPPORTS_VIEW. Used to exercise the capability - * gate in `DataSourceV2Strategy`. + * A v2 catalog that does not declare SUPPORTS_VIEW. Used to exercise the capability gate: + * it returns a view-typed [[MetadataOnlyTable]] from `loadTable`, so ALTER VIEW progresses + * past view resolution and actually hits the gate in [[CheckViewReferences]]. */ class TestingTableOnlyCatalog extends TableCatalog { - override def loadTable(ident: Identifier): Table = throw new NoSuchTableException(ident) + // Pre-seeded view at default.v, used by the ALTER VIEW capability-gate test. Stored here + // rather than in createTable so tests don't need to first create the view through Spark + // (which would itself be blocked by the capability gate they're verifying). + private val fixtureView: TableInfo = new TableInfo.Builder() + .withSchema(new StructType().add("x", "int")) + .withViewText("SELECT 1 AS x") + .build() + + override def loadTable(ident: Identifier): Table = + if (ident.namespace().toSeq == Seq("default") && ident.name() == "v") { + new MetadataOnlyTable(fixtureView) + } else { + throw new NoSuchTableException(ident) + } + override def alterTable(ident: Identifier, changes: TableChange*): Table = throw new RuntimeException("shouldn't be called") override def dropTable(ident: Identifier): Boolean = false From 5d38bd0fd7420958a02121434de764dda5f814a4 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 23 Apr 2026 03:24:19 +0000 Subject: [PATCH 23/59] address self-review findings: extract SUPPORTS_VIEW helper, fix test comment, add multi-part captured-namespace read test - CatalogV2Util.supportsView: shared predicate replacing duplicated TableCatalog+SUPPORTS_VIEW check in CheckViewReferences and ResolveSessionCatalog. - DataSourceV2MetadataOnlyViewSuite: correct the misleading "body is validated first" comment around CREATE VIEW IF NOT EXISTS on the atomic exec (tryLoadTable short-circuits before buildTableInfo), and add an end-to-end SQL test exercising multi-part captured catalog/namespace round-trip for an unqualified view-body reference. Co-authored-by: Isaac --- .../sql/connector/catalog/CatalogV2Util.scala | 9 +++++ .../analysis/ResolveSessionCatalog.scala | 8 ++--- .../spark/sql/execution/command/views.scala | 8 ++--- .../DataSourceV2MetadataOnlyViewSuite.scala | 34 +++++++++++++++++-- 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala index 6e41cd64332b6..b372f2b568c73 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala @@ -80,6 +80,15 @@ private[sql] object CatalogV2Util { SupportsNamespaces.PROP_LOCATION, SupportsNamespaces.PROP_OWNER) + /** + * Whether the given catalog is a [[TableCatalog]] that declares + * [[TableCatalogCapability.SUPPORTS_VIEW]]. Returns false for non-`TableCatalog` plugins. + */ + def supportsView(catalog: CatalogPlugin): Boolean = catalog match { + case tc: TableCatalog => tc.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) + case _ => false + } + /** * Apply properties changes to a map and return the result. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index df20d7e3187b6..7695d912b534d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, CharVarcharUtils, ResolveDefaultColumns => DefaultCols} import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._ -import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, TableCatalog, TableCatalogCapability, V1Table} +import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.execution.command._ @@ -543,7 +543,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) // listTableSummaries); we skip the match here so the plan flows through unchanged. Only // non-session, non-SUPPORTS_VIEW catalogs hit the MISSING_CATALOG_ABILITY.VIEWS rejection. case ShowViews(ns: ResolvedNamespace, pattern, output) - if !isSupportsViewCatalog(ns.catalog) => + if !CatalogV2Util.supportsView(ns.catalog) => ns match { case ResolvedDatabaseInSessionCatalog(db) => ShowViewsCommand(db, pattern, output) case _ => @@ -946,8 +946,4 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) catalog.isInstanceOf[CatalogExtension]) } - private def isSupportsViewCatalog(catalog: CatalogPlugin): Boolean = catalog match { - case tc: TableCatalog => tc.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) - case _ => false - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 172582e8f1def..fb867b7ef3dd5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, SubqueryExpr import org.apache.spark.sql.catalyst.plans.logical.{AlterViewAs, AnalysisOnlyCommand, CreateTempView, CreateView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.classic.ClassicConversions.castToImpl -import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, TableCatalog, TableCatalogCapability} +import org.apache.spark.sql.connector.catalog.{CatalogPlugin, CatalogV2Util, Identifier} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{IdentifierHelper, NamespaceHelper} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation @@ -898,11 +898,7 @@ object CheckViewReferences extends (LogicalPlan => Unit) { // callers get the VIEWS-specific error rather than a generic cast failure later. private def requireSupportsView(resolved: LogicalPlan): Unit = { val (catalog, _) = catalogAndIdent(resolved) - val supportsView = catalog match { - case tc: TableCatalog => tc.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) - case _ => false - } - if (!supportsView) { + if (!CatalogV2Util.supportsView(catalog)) { throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 3e47b0cbdf19f..4ec977da40829 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -67,6 +67,17 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } + test("read view resolves unqualified refs via multi-part captured namespace") { + // End-to-end coverage of the v2 encoder -> parser round-trip: test_unqualified_multi is a + // view whose captured catalog+namespace is view_catalog.ns1.ns2 (two-part namespace) and + // whose body references `t` unqualified. At read time the unqualified `t` must expand to + // view_catalog.ns1.ns2.t via the captured context -- which TestingViewCatalog resolves to + // its own `t` fixture at that namespace. + checkAnswer( + spark.table("view_catalog.outer_ns.test_unqualified_multi"), + Row("multi")) + } + // --- TableInfo.Builder unit tests for view-specific properties ---------- test("view current catalog/namespace are serialized into a single property") { @@ -352,9 +363,8 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio "SELECT x FROM spark_catalog.default.t WHERE x > 2") checkAnswer(spark.table("staging_catalog.default.v_atomic"), Row(3)) - // CREATE IF NOT EXISTS on an existing view -- no-op, but the body is still validated - // first (the atomic exec builds the TableInfo before the allow-existing short-circuit), - // so a malformed body is rejected even when creation is skipped. + // CREATE IF NOT EXISTS on an existing view -- no-op; the atomic exec short-circuits on + // tryLoadTable() before buildTableInfo, matching the non-atomic path. sql("CREATE VIEW IF NOT EXISTS staging_catalog.default.v_atomic AS " + "SELECT x + 100 AS x FROM spark_catalog.default.t") // Value unchanged -- IF NOT EXISTS was a no-op. @@ -806,6 +816,24 @@ class TestingViewCatalog extends TableCatalog { .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) .build() new MetadataOnlyTable(info) + case "test_unqualified_multi" => + // View whose captured catalog+namespace is view_catalog.ns1.ns2 (two-part). The + // unqualified `t` in the body must resolve via that captured context to + // view_catalog.ns1.ns2.t, which this catalog also serves (see `t` case below). + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT col FROM t") + .withCurrentCatalogAndNamespace("view_catalog", Array("ns1", "ns2")) + .build() + new MetadataOnlyTable(info) + case "t" if ident.namespace().toSeq == Seq("ns1", "ns2") => + // Target of test_unqualified_multi's unqualified reference. Self-contained view so + // the test doesn't need external data. + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withViewText("SELECT 'multi' AS col") + .build() + new MetadataOnlyTable(info) case _ => throw new NoSuchTableException(ident) } } From 6329d77067ce19d6695f607ac91a2f8bb5777e95 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 23 Apr 2026 04:34:41 +0000 Subject: [PATCH 24/59] address self-review findings: pin more orphan v2-view plans, route DROP VIEW Co-authored-by: Isaac --- .../sql/connector/catalog/TableCatalog.java | 4 +- .../catalog/TableCatalogCapability.java | 16 +++-- .../analysis/ResolveSessionCatalog.scala | 6 +- .../datasources/v2/DataSourceV2Strategy.scala | 34 ++++++++- .../DataSourceV2MetadataOnlyTableSuite.scala | 9 +-- .../DataSourceV2MetadataOnlyViewSuite.scala | 70 +++++++++++++++++++ 6 files changed, 125 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index 05e22cb11ae3f..6843e68ad50ff 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -184,7 +184,7 @@ default TableSummary[] listTableSummaries(String[] namespace) * @param ident a table identifier * @return the table's metadata * @throws NoSuchTableException If the table doesn't exist, or is a view and the catalog - * does not declare {@link TableCatalogCapability#SUPPORTS_VIEW} + * does not declare {@link TableCatalogCapability#SUPPORTS_VIEW}. */ Table loadTable(Identifier ident) throws NoSuchTableException; @@ -198,7 +198,7 @@ default TableSummary[] listTableSummaries(String[] namespace) * @param writePrivileges * @return the table's metadata * @throws NoSuchTableException If the table doesn't exist or is a view (see - * {@link #loadTable(Identifier)} for the view contract) + * {@link #loadTable(Identifier)} for the view contract). * * @since 3.5.3 */ diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java index 46afd1f1476f0..80fbb79711b0d 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java @@ -103,12 +103,16 @@ public enum TableCatalogCapability { *

* Catalogs declaring this capability must round-trip those properties and return a * {@link MetadataOnlyTable} from {@link TableCatalog#loadTable} so Spark's view resolution - * path can expand the view text. {@code ALTER VIEW ... AS} is implemented as a - * {@code dropTable} + {@code createTable} on a plain {@code TableCatalog}, or as - * {@link StagingTableCatalog#stageReplace} when the catalog also implements - * {@link StagingTableCatalog}. Without this capability, Spark rejects {@code CREATE VIEW} - * and {@code ALTER VIEW} statements targeting the catalog up front rather than letting the - * catalog silently persist a table entry that cannot be read as a view. + * path can expand the view text. On a plain {@code TableCatalog}, {@code CREATE VIEW} uses + * {@code createTable} and {@code ALTER VIEW ... AS} is implemented as {@code dropTable} + + * {@code createTable}. On a {@link StagingTableCatalog}, Spark routes + * {@code CREATE VIEW} through {@link StagingTableCatalog#stageCreate}, + * {@code CREATE OR REPLACE VIEW} through {@link StagingTableCatalog#stageCreateOrReplace}, + * and {@code ALTER VIEW ... AS} through {@link StagingTableCatalog#stageReplace} so the + * metadata swap commits atomically. Without this capability, Spark rejects + * {@code CREATE VIEW} and {@code ALTER VIEW} statements targeting the catalog up front + * rather than letting the catalog silently persist a table entry that cannot be read as a + * view. */ SUPPORTS_VIEW } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 7695d912b534d..6ce2df7067985 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -321,7 +321,11 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) case DropView(DropViewInSessionCatalog(ident), ifExists) => DropTableCommand(ident, ifExists, isView = true, purge = false) - case DropView(r @ ResolvedIdentifier(catalog, ident), ifExists) => + // SUPPORTS_VIEW catalogs fall through to `DataSourceV2Strategy`, which routes DROP VIEW to + // `TableCatalog.dropTable` (contractually required to drop views for such catalogs). Other + // non-session catalogs still get the `catalogOperationNotSupported` rejection. + case DropView(r @ ResolvedIdentifier(catalog, ident), ifExists) + if !CatalogV2Util.supportsView(catalog) => if (catalog == FakeSystemCatalog) { DropTempViewCommand(ident, ifExists) } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 4289b9f7eb332..9ed529925d502 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreePattern.SCALAR_SUBQUERY import org.apache.spark.sql.catalyst.util.{toPrettySQL, GeneratedColumn, IdentityColumn, ResolveDefaultColumns, ResolveTableConstraints, V2ExpressionBuilder} import org.apache.spark.sql.classic.SparkSession -import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableCatalogCapability, TruncatableTable, V1Table} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableCatalogCapability, TruncatableTable, V1Table} import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.connector.catalog.index.SupportsIndex import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue} @@ -367,6 +367,38 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat throw QueryCompilationErrors.unsupportedTableOperationError( catalog, ident, "DESCRIBE TABLE ... COLUMN") + // Plans that resolve through `UnresolvedTableOrView` reach here with a + // `ResolvedPersistentView` child for non-session v2 views (the v1 rewrite in + // `ResolveSessionCatalog` no longer matches them because `ResolvedViewIdentifier` is gated + // on `isSessionCatalog`). Pin each with `UNSUPPORTED_FEATURE.TABLE_OPERATION` so users get + // a clean `AnalysisException` instead of a generic "No plan for ..." assertion from the + // planner. Tracked for follow-up real handlers in SPARK-52729. + case RefreshTable(ResolvedPersistentView(catalog, ident, _)) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "REFRESH TABLE") + + case AnalyzeTable(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "ANALYZE TABLE") + + case AnalyzeColumn(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "ANALYZE TABLE ... FOR COLUMNS") + + // SHOW PARTITIONS on a view is already rejected during analysis: the parser uses + // `UnresolvedTable` (not `UnresolvedTableOrView`), so `CheckAnalysis` surfaces + // `EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE` before planning. No strategy case needed. + + // DROP VIEW on a non-session SUPPORTS_VIEW catalog. The v1 rewrite in `ResolveSessionCatalog` + // skips SUPPORTS_VIEW catalogs (guard on line 324 case) so they fall through here. Reuses + // `DropTableExec` because `TableCatalog.dropTable` is contractually required to drop views + // at the same identifier for SUPPORTS_VIEW catalogs. + case DropView(r @ ResolvedIdentifier(catalog, ident), ifExists) + if CatalogV2Util.supportsView(catalog) => + val invalidateFunc = () => CommandUtils.uncacheTableOrView(session, r) + DropTableExec( + catalog.asTableCatalog, ident, ifExists, purge = false, invalidateFunc) :: Nil + case ReplaceTableAsSelect(ResolvedIdentifier(catalog, ident), parts, query, tableSpec: TableSpec, options, orCreate, true) => catalog match { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index 98b1b1b8b92ff..0efda0d6a26a3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -98,10 +98,11 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi sql(s"SELECT `$loc`.test_json.col FROM $tableName"), Seq(Row("0"), Row("1"), Row("2"))) - // 3-part reference must use `table_catalog`. The v1 `SessionCatalog.getRelation` that - // `RelationResolution.createRelation` delegates to hardcodes `spark_catalog` in the - // SubqueryAlias qualifier, so the attribute qualifier becomes - // `[spark_catalog, , test_json]` -- the reference below fails to resolve. + // 3-part reference uses the real catalog name. `V1Table.toCatalogTable` sets + // `CatalogTable.multipartIdentifier` to `[table_catalog, , test_json]`; the + // SessionCatalog change in this PR makes `getRelation` prefer that over the hardcoded + // `spark_catalog` qualifier, so the SubqueryAlias carries the real catalog and this + // 3-part column ref resolves. checkAnswer( sql(s"SELECT $tableName.col FROM $tableName"), Seq(Row("0"), Row("1"), Row("2"))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 4ec977da40829..dabb4c698a384 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -703,6 +703,76 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio sql("DESCRIBE TABLE view_catalog.default.v_describe_col x")) } + // These plans reach `DataSourceV2Strategy` with a `ResolvedPersistentView` child on a + // non-session v2 view (because `ResolvedV1TableOrViewIdentifier` now skips non-session views). + // Without explicit pins they would hit `QueryPlanner`'s `assert(pruned.hasNext, "No plan for + // ...")` and surface a raw AssertionError. Pin each to UNSUPPORTED_FEATURE.TABLE_OPERATION. + + test("REFRESH TABLE on a v2 view is rejected") { + seedV2View("v_refresh") + assertUnsupportedViewOp("REFRESH TABLE view_catalog.default.v_refresh") + } + + test("ANALYZE TABLE on a v2 view is rejected") { + seedV2View("v_analyze") + assertUnsupportedViewOp( + "ANALYZE TABLE view_catalog.default.v_analyze COMPUTE STATISTICS") + } + + test("ANALYZE TABLE ... FOR COLUMNS on a v2 view is rejected") { + seedV2View("v_analyze_cols") + assertUnsupportedViewOp( + "ANALYZE TABLE view_catalog.default.v_analyze_cols COMPUTE STATISTICS FOR COLUMNS x") + } + + // --- DROP VIEW on a v2 catalog -------------------------------- + + test("DROP VIEW on a SUPPORTS_VIEW v2 catalog drops the view") { + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingViewCatalog] + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_drop AS " + + "SELECT x FROM spark_catalog.default.t") + assert(catalog.tableExists(Identifier.of(Array("default"), "v_drop"))) + sql("DROP VIEW view_catalog.default.v_drop") + assert(!catalog.tableExists(Identifier.of(Array("default"), "v_drop"))) + } + } + + test("DROP VIEW IF EXISTS on a v2 catalog is a no-op when the view is missing") { + // Exercises the `ifExists=true` path -- DropTableExec should not throw when the view + // doesn't exist on a SUPPORTS_VIEW catalog. + sql("DROP VIEW IF EXISTS view_catalog.default.v_never_existed") + } + + test("DROP VIEW on a StagingTableCatalog drops the view") { + withSQLConf( + "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { + val catalog = spark.sessionState.catalogManager.catalog("staging_catalog") + .asInstanceOf[TestingStagingCatalog] + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW staging_catalog.default.v_drop_atomic AS " + + "SELECT x FROM spark_catalog.default.t") + assert(catalog.tableExists(Identifier.of(Array("default"), "v_drop_atomic"))) + sql("DROP VIEW staging_catalog.default.v_drop_atomic") + assert(!catalog.tableExists(Identifier.of(Array("default"), "v_drop_atomic"))) + } + } + } + + test("DROP VIEW on a catalog without SUPPORTS_VIEW is rejected") { + withSQLConf( + "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { + val ex = intercept[AnalysisException] { + sql("DROP VIEW no_view_catalog.default.v") + } + // Preserves the pre-PR error surface for non-SUPPORTS_VIEW catalogs. + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("views")) + } + } + // --- SHOW TABLES / SHOW VIEWS on a v2 catalog -------------------------------- private def seedV2Table(name: String): Unit = { From 6f1e4a7b0e21155a5548da7ff934d54e791066cd Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 23 Apr 2026 05:18:01 +0000 Subject: [PATCH 25/59] update 'View commands are not supported' test for the new error shape Pre-PR, Analyzer.lookupTableOrView had a viewOnly gate that rejected all UnresolvedView lookups on non-session catalogs up front with UNSUPPORTED_FEATURE.CATALOG_OPERATION. That gate was removed earlier in this PR. For non-SUPPORTS_VIEW catalogs the ALTER VIEW path now falls through to CheckAnalysis, which surfaces TABLE_OR_VIEW_NOT_FOUND when the view does not exist. Either error is acceptable; this aligns the test with the simpler no-gate behavior. Co-authored-by: Isaac --- .../sql/connector/DataSourceV2SQLSuite.scala | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index d2cc342f48112..fa622f8f29940 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2966,19 +2966,24 @@ class DataSourceV2SQLSuiteV1Filter } } - test("View commands are not supported in v2 catalogs") { - def validateViewCommand(sqlStatement: String): Unit = { - val e = analysisException(sqlStatement) - checkError( - e, - condition = "UNSUPPORTED_FEATURE.CATALOG_OPERATION", - parameters = Map("catalogName" -> "`testcat`", "operation" -> "views")) - } + test("View commands on a v2 catalog without SUPPORTS_VIEW are rejected") { + // DROP VIEW resolves through `UnresolvedIdentifier` and is rejected by + // `ResolveSessionCatalog` with `UNSUPPORTED_FEATURE.CATALOG_OPERATION`. + checkError( + analysisException("DROP VIEW testcat.v"), + condition = "UNSUPPORTED_FEATURE.CATALOG_OPERATION", + parameters = Map("catalogName" -> "`testcat`", "operation" -> "views")) - validateViewCommand("DROP VIEW testcat.v") - validateViewCommand("ALTER VIEW testcat.v SET TBLPROPERTIES ('key' = 'val')") - validateViewCommand("ALTER VIEW testcat.v UNSET TBLPROPERTIES ('key')") - validateViewCommand("ALTER VIEW testcat.v AS SELECT 1") + // ALTER VIEW variants resolve through `UnresolvedView`; when the view does not exist + // (and this catalog cannot host views), `CheckAnalysis` surfaces TABLE_OR_VIEW_NOT_FOUND. + Seq( + "ALTER VIEW testcat.v SET TBLPROPERTIES ('key' = 'val')", + "ALTER VIEW testcat.v UNSET TBLPROPERTIES ('key')", + "ALTER VIEW testcat.v AS SELECT 1" + ).foreach { stmt => + val e = analysisException(stmt) + assert(e.getCondition == "TABLE_OR_VIEW_NOT_FOUND", s"got ${e.getCondition} for $stmt") + } } test("SPARK-33924: INSERT INTO .. PARTITION preserves the partition location") { From ce557d7abd2165b4856750004c3787a4850cb2f7 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 23 Apr 2026 05:29:03 +0000 Subject: [PATCH 26/59] restore viewOnly gate with SUPPORTS_VIEW carve-out; unify view-DDL rejection on MISSING_CATALOG_ABILITY.VIEWS Without the gate, ALTER VIEW variants on a non-SUPPORTS_VIEW v2 catalog fell through to TABLE_OR_VIEW_NOT_FOUND when the view did not exist -- misleading, since the catalog cannot host views at all. Bring back `lookupTableOrView`'s `viewOnly` flag and reject non-session non-SUPPORTS_VIEW catalogs upfront. Switch DROP VIEW's existing rejection path and the restored gate to use the same MISSING_CATALOG_ABILITY.VIEWS error class CheckViewReferences already uses for CREATE/ALTER VIEW AS, so users see one consistent error for the "catalog does not support views" condition across all view DDL. Co-authored-by: Isaac --- .../sql/catalyst/analysis/Analyzer.scala | 14 +++++++-- .../analysis/ResolveSessionCatalog.scala | 5 ++-- .../sql/connector/DataSourceV2SQLSuite.scala | 29 ++++++++----------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index d218e33383645..e1d40a3467d83 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1078,7 +1078,7 @@ class Analyzer( }.getOrElse(u) case u @ UnresolvedView(identifier, cmd, allowTemp, suggestAlternative) => - lookupTableOrView(identifier).map { + lookupTableOrView(identifier, viewOnly = true).map { case _: ResolvedTempView if !allowTemp => throw QueryCompilationErrors.expectPermanentViewNotTempViewError( identifier, cmd, u) @@ -1102,13 +1102,23 @@ class Analyzer( * Resolves relations to `ResolvedTable` or `Resolved[Temp/Persistent]View`. This is * for resolving DDL and misc commands. UnresolvedView callers reject non-view results * downstream via `expectViewNotTableError`. + * + * When `viewOnly=true`, non-session catalogs that do not declare SUPPORTS_VIEW are + * rejected up front with MISSING_CATALOG_ABILITY.VIEWS -- they cannot host views at + * all, so surfacing a downstream "view not found" would hide the real reason. */ - private def lookupTableOrView(identifier: Seq[String]): Option[LogicalPlan] = { + private def lookupTableOrView( + identifier: Seq[String], + viewOnly: Boolean = false): Option[LogicalPlan] = { relationResolution.lookupTempView(identifier).map { tempView => ResolvedTempView(identifier.asIdentifier, tempView.tableMeta) }.orElse { relationResolution.expandIdentifier(identifier) match { case CatalogAndIdentifier(catalog, ident) => + if (viewOnly && !CatalogV2Util.isSessionCatalog(catalog) && + !CatalogV2Util.supportsView(catalog)) { + throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) + } CatalogV2Util.loadTable(catalog, ident).map { case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) && v1Table.v1Table.tableType == CatalogTableType.VIEW => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 6ce2df7067985..8a374d3bbf345 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -323,13 +323,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) // SUPPORTS_VIEW catalogs fall through to `DataSourceV2Strategy`, which routes DROP VIEW to // `TableCatalog.dropTable` (contractually required to drop views for such catalogs). Other - // non-session catalogs still get the `catalogOperationNotSupported` rejection. + // non-session catalogs get `MISSING_CATALOG_ABILITY.VIEWS`, matching the error raised from + // `CheckViewReferences` for CREATE/ALTER VIEW and from the analyzer gate on UnresolvedView. case DropView(r @ ResolvedIdentifier(catalog, ident), ifExists) if !CatalogV2Util.supportsView(catalog) => if (catalog == FakeSystemCatalog) { DropTempViewCommand(ident, ifExists) } else { - throw QueryCompilationErrors.catalogOperationNotSupported(catalog, "views") + throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) } case c @ CreateNamespace(DatabaseNameInSessionCatalog(name), _, _) if conf.useV1Command => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index fa622f8f29940..976c3485f1f77 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2966,24 +2966,19 @@ class DataSourceV2SQLSuiteV1Filter } } - test("View commands on a v2 catalog without SUPPORTS_VIEW are rejected") { - // DROP VIEW resolves through `UnresolvedIdentifier` and is rejected by - // `ResolveSessionCatalog` with `UNSUPPORTED_FEATURE.CATALOG_OPERATION`. - checkError( - analysisException("DROP VIEW testcat.v"), - condition = "UNSUPPORTED_FEATURE.CATALOG_OPERATION", - parameters = Map("catalogName" -> "`testcat`", "operation" -> "views")) - - // ALTER VIEW variants resolve through `UnresolvedView`; when the view does not exist - // (and this catalog cannot host views), `CheckAnalysis` surfaces TABLE_OR_VIEW_NOT_FOUND. - Seq( - "ALTER VIEW testcat.v SET TBLPROPERTIES ('key' = 'val')", - "ALTER VIEW testcat.v UNSET TBLPROPERTIES ('key')", - "ALTER VIEW testcat.v AS SELECT 1" - ).foreach { stmt => - val e = analysisException(stmt) - assert(e.getCondition == "TABLE_OR_VIEW_NOT_FOUND", s"got ${e.getCondition} for $stmt") + test("View commands are not supported in v2 catalogs without SUPPORTS_VIEW") { + def validateViewCommand(sqlStatement: String): Unit = { + val e = analysisException(sqlStatement) + checkError( + e, + condition = "MISSING_CATALOG_ABILITY.VIEWS", + parameters = Map("plugin" -> "testcat")) } + + validateViewCommand("DROP VIEW testcat.v") + validateViewCommand("ALTER VIEW testcat.v SET TBLPROPERTIES ('key' = 'val')") + validateViewCommand("ALTER VIEW testcat.v UNSET TBLPROPERTIES ('key')") + validateViewCommand("ALTER VIEW testcat.v AS SELECT 1") } test("SPARK-33924: INSERT INTO .. PARTITION preserves the partition location") { From 1a532b9ef26db5b83ee407e35ecaa086c7c371bd Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 23 Apr 2026 06:51:41 +0000 Subject: [PATCH 27/59] address self-review findings: require explicit MetadataOnlyTable name, drop defensive AlterViewAs gate, misc cleanups - MetadataOnlyTable: drop the no-arg constructor (and the "data_source_table_or_view" placeholder it defaulted to); require callers to pass a name, typically ident.toString. Before this, DESCRIBE TABLE EXTENDED on a MetadataOnlyTable-backed table showed "Name: data_source_table_or_view" instead of the real identifier. Updated all (test-only) callsites and added a DESCRIBE pin. - V2AlterViewPreparation.existingTable: fold through the parent trait's tryLoadTable helper so the load/view-check lives in one place. - CheckViewReferences: remove the redundant requireSupportsView call on the AlterViewAs branch. The analyzer's lookupTableOrView(viewOnly=true) already rejects non-SUPPORTS_VIEW catalogs before we get here. - V1Table.toCatalogTable: default owner to "" (matches v1 CatalogTable default) instead of "unknown". - Tests: add ALTER VIEW rejections for temp views and temp variables to mirror the CREATE VIEW matrix; fix the stale ALTER-capability-gate test comment; add a DESCRIBE-extended pin for the MetadataOnlyTable name surface. - Doc: fix a hardcoded line-number reference in DataSourceV2Strategy and a split Scaladoc link in v2Commands.ShowViews. Co-authored-by: Isaac --- .../connector/catalog/MetadataOnlyTable.java | 14 +-- .../catalyst/plans/logical/v2Commands.scala | 4 +- .../spark/sql/connector/catalog/V1Table.scala | 2 +- .../spark/sql/execution/command/views.scala | 4 +- .../datasources/v2/AlterV2ViewExec.scala | 33 +++---- .../datasources/v2/DataSourceV2Strategy.scala | 6 +- .../DataSourceV2MetadataOnlyTableSuite.scala | 24 ++++- .../DataSourceV2MetadataOnlyViewSuite.scala | 93 +++++++++++++------ 8 files changed, 116 insertions(+), 64 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java index b75e0fbe4101d..e731484422ae8 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -39,15 +39,17 @@ */ @Evolving public class MetadataOnlyTable implements Table { - private static final String DEFAULT_NAME = "data_source_table_or_view"; - private final TableInfo info; private final String name; - public MetadataOnlyTable(TableInfo info) { - this(info, DEFAULT_NAME); - } - + /** + * @param info metadata for the table or view. + * @param name human-readable name for this table, used by places that read {@link #name()} + * (e.g. the {@code Name} row of {@code DESCRIBE TABLE EXTENDED}). Catalogs + * returning a {@code MetadataOnlyTable} from {@link TableCatalog#loadTable} + * should typically pass {@code ident.toString()}, matching the quoted + * multi-part form used elsewhere for v2 identifiers. + */ public MetadataOnlyTable(TableInfo info, String name) { this.info = info; this.name = name; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 5826da9fc8402..60d02dc96447c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -1351,8 +1351,8 @@ case class ShowTablePartition( /** * The logical plan of the SHOW VIEWS command. * - * Session-catalog targets fall back to v1 [[org.apache.spark.sql.execution.command - * .ShowViewsCommand]] via `ResolveSessionCatalog`. v2 catalogs that declare + * Session-catalog targets fall back to v1 `ShowViewsCommand` via `ResolveSessionCatalog`. + * v2 catalogs that declare * [[org.apache.spark.sql.connector.catalog.TableCatalogCapability#SUPPORTS_VIEW]] are handled * in `DataSourceV2Strategy` (enumerates via `listTableSummaries` filtered to * `VIEW_TABLE_TYPE`). Non-SUPPORTS_VIEW v2 catalogs are rejected up front in diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index e12e823a0c30a..d39d83de59c40 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -174,7 +174,7 @@ private[sql] object V1Table { provider = props.get(TableCatalog.PROP_PROVIDER), partitionColumnNames = partCols, bucketSpec = bucketSpec, - owner = props.getOrElse(TableCatalog.PROP_OWNER, "unknown"), + owner = props.getOrElse(TableCatalog.PROP_OWNER, ""), viewText = viewText, viewOriginalText = viewText, comment = props.get(TableCatalog.PROP_COMMENT), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index fb867b7ef3dd5..b8bdd7e3217d2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -919,7 +919,9 @@ object CheckViewReferences extends (LogicalPlan => Unit) { } case av: AlterViewAs if av.isAnalyzed => - requireSupportsView(av.child) + // No capability check here: `Analyzer.lookupTableOrView(identifier, viewOnly=true)` + // already rejects non-SUPPORTS_VIEW catalogs upstream for `UnresolvedView`, so by the + // time an AlterViewAs reaches this rule the catalog is guaranteed to support views. val legacyName = legacyNameFor(av.child) verifyTemporaryObjectsNotExists( isTemporary = false, legacyName, av.query, av.referredTempFunctions) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala index 3754a5153507f..9dd5363fecef0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -43,25 +43,20 @@ import org.apache.spark.util.Utils * re-emits them from the current session, matching v1 `AlterViewAsCommand.alterPermanentView`. */ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { - protected lazy val existingTable: MetadataOnlyTable = { - val table = try { - catalog.loadTable(identifier) - } catch { - case _: NoSuchTableException => - throw QueryCompilationErrors.noSuchTableError(catalog.name(), identifier) - } - table match { - case mot: MetadataOnlyTable if isViewTable(mot) => mot - case _ => - // Analyzer verified this was a view, but a racing DDL (drop + recreate as a - // non-view table, or a catalog that now returns a different Table subclass for this - // identifier) can invalidate that. Surface as a user-facing error. - throw QueryCompilationErrors.expectViewNotTableError( - (catalog.name() +: identifier.asMultipartIdentifier).toSeq, - cmd = "ALTER VIEW ... AS", - suggestAlternative = false, - t = this) - } + // Reuses `tryLoadTable` / `isViewTable` from the parent trait. A racing DDL between + // analysis and exec (drop, or replace with a non-view table) can invalidate the analyzer's + // ResolvedPersistentView decision -- we re-check here and surface user-facing errors + // rather than propagate the stale resolution. + protected lazy val existingTable: MetadataOnlyTable = tryLoadTable() match { + case None => + throw QueryCompilationErrors.noSuchTableError(catalog.name(), identifier) + case Some(mot: MetadataOnlyTable) if isViewTable(mot) => mot + case _ => + throw QueryCompilationErrors.expectViewNotTableError( + (catalog.name() +: identifier.asMultipartIdentifier).toSeq, + cmd = "ALTER VIEW ... AS", + suggestAlternative = false, + t = this) } // Carry the existing view's full property map forward. Keys the ALTER actually changes are diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 9ed529925d502..b4dce11b45414 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -390,9 +390,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat // `EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE` before planning. No strategy case needed. // DROP VIEW on a non-session SUPPORTS_VIEW catalog. The v1 rewrite in `ResolveSessionCatalog` - // skips SUPPORTS_VIEW catalogs (guard on line 324 case) so they fall through here. Reuses - // `DropTableExec` because `TableCatalog.dropTable` is contractually required to drop views - // at the same identifier for SUPPORTS_VIEW catalogs. + // skips SUPPORTS_VIEW catalogs (its DropView case has a `!supportsView(catalog)` guard), so + // they fall through here. Reuses `DropTableExec` because `TableCatalog.dropTable` is + // contractually required to drop views at the same identifier for SUPPORTS_VIEW catalogs. case DropView(r @ ResolvedIdentifier(catalog, ident), ifExists) if CatalogV2Util.supportsView(catalog) => val invalidateFunc = () => CommandUtils.uncacheTableOrView(session, r) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala index 0efda0d6a26a3..8d3ad19419dff 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -83,6 +83,24 @@ class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSessi checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i, -i))) } + test("DESCRIBE TABLE EXTENDED on a non-view MetadataOnlyTable shows the real identifier") { + // MetadataOnlyTable.name() is read by DescribeTableExec's "Name" row. Pin that it + // reflects the catalog-supplied identifier (here TestingDataSourceTableCatalog passes + // `ident.toString`) rather than a generic placeholder, so the DESCRIBE output is + // meaningful for users. + withTempPath { path => + val loc = path.getCanonicalPath + val tableName = s"table_catalog.`$loc`.test_json" + spark.range(1).select($"id".cast("string").as("col")).write.json(loc) + val nameRow = sql(s"DESCRIBE TABLE EXTENDED $tableName") + .collect() + .find(_.getString(0) == "Name") + .getOrElse(fail("DESCRIBE output missing the `Name` row")) + val rendered = nameRow.getString(1) + assert(rendered.contains("test_json"), s"expected the real identifier, got: $rendered") + } + } + test("fully-qualified column reference uses the real catalog name") { withTempPath { path => val loc = path.getCanonicalPath @@ -124,7 +142,7 @@ class TestingDataSourceTableCatalog extends TableCatalog { .withLocation(ident.namespace().head) .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) .build() - new MetadataOnlyTable(info) + new MetadataOnlyTable(info, ident.toString) case "test_partitioned_json" => val partitioning = LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2"))) val info = new TableInfo.Builder() @@ -134,13 +152,13 @@ class TestingDataSourceTableCatalog extends TableCatalog { .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) .withPartitions(Array(partitioning)) .build() - new MetadataOnlyTable(info) + new MetadataOnlyTable(info, ident.toString) case "test_v2" => val info = new TableInfo.Builder() .withSchema(FakeV2Provider.schema) .withProvider(classOf[FakeV2Provider].getName) .build() - new MetadataOnlyTable(info) + new MetadataOnlyTable(info, ident.toString) case _ => throw new NoSuchTableException(ident) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index dabb4c698a384..ced0329d7b24e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -86,7 +86,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio .withViewText("SELECT * FROM t") .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) .build() - val table = new MetadataOnlyTable(info) + val table = new MetadataOnlyTable(info, "v") assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) == "spark_catalog.default") } @@ -97,7 +97,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio .withViewText("SELECT * FROM t") .withCurrentCatalogAndNamespace("spark_catalog", Array("weird.db", "normal")) .build() - val table = new MetadataOnlyTable(info) + val table = new MetadataOnlyTable(info, "v") assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) == "spark_catalog.`weird.db`.normal") } @@ -107,7 +107,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio .withSchema(new StructType().add("col", "string")) .withViewText("SELECT * FROM spark_catalog.default.t") .build() - val table = new MetadataOnlyTable(info) + val table = new MetadataOnlyTable(info, "v") assert(!table.properties().containsKey( TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) } @@ -124,7 +124,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio .withViewText("SELECT col FROM t") .withCurrentCatalogAndNamespace("my_cat", Array("db1", "db2")) .build() - val motTable = new MetadataOnlyTable(info) + val motTable = new MetadataOnlyTable(info, "v") // Any CatalogPlugin works here; toCatalogTable only reads `catalog.name()`. val catalog = spark.sessionState.catalogManager.catalog("view_catalog") val ct = V1Table.toCatalogTable( @@ -138,7 +138,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio .withCurrentCatalogAndNamespace("my_cat", Array("weird.db", "normal")) .build() val ctWeird = V1Table.toCatalogTable( - catalog, Identifier.of(Array("ns"), "v"), new MetadataOnlyTable(infoWeird)) + catalog, Identifier.of(Array("ns"), "v"), new MetadataOnlyTable(infoWeird, "v")) assert(ctWeird.viewCatalogAndNamespace == Seq("my_cat", "weird.db", "normal")) } @@ -453,6 +453,36 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } + test("ALTER VIEW rejects reference to a temporary view") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_alter_tempview AS " + + "SELECT x FROM spark_catalog.default.t") + withTempView("tv_alter") { + spark.range(3).createOrReplaceTempView("tv_alter") + val ex = intercept[AnalysisException] { + sql("ALTER VIEW view_catalog.default.v_alter_tempview AS SELECT id FROM tv_alter") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("temporary")) + } + } + } + + test("ALTER VIEW rejects reference to a temporary variable") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_alter_tempvar AS " + + "SELECT x FROM spark_catalog.default.t") + withSessionVariable("temp_var_alter") { + sql("DECLARE VARIABLE temp_var_alter INT DEFAULT 1") + val ex = intercept[AnalysisException] { + sql("ALTER VIEW view_catalog.default.v_alter_tempvar AS SELECT temp_var_alter AS x") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("temporary")) + } + } + } + test("ALTER VIEW preserves user-set TBLPROPERTIES") { withTable("spark_catalog.default.t") { Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") @@ -568,11 +598,13 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } test("ALTER VIEW on a catalog without SUPPORTS_VIEW fails with MISSING_CATALOG_ABILITY") { - // TestingTableOnlyCatalog does NOT declare SUPPORTS_VIEW but DOES round-trip - // `default.v` as a view-typed MetadataOnlyTable, so view resolution succeeds and we - // reach the capability gate in `CheckViewReferences`. Verifies the gate fires on the - // ALTER path (not only on CREATE), which would otherwise silently regress if - // `SUPPORTS_VIEW` got added to the default capability set. + // ALTER VIEW's identifier is resolved via `UnresolvedView`, whose `viewOnly=true` path + // in `Analyzer.lookupTableOrView` rejects non-SUPPORTS_VIEW catalogs up front with the + // expected error class -- before `loadTable` is even called. `TestingTableOnlyCatalog` + // happens to round-trip `default.v` as a view-typed MetadataOnlyTable, but that fixture + // is not actually consulted on this path. CREATE VIEW's capability check lives in + // `CheckViewReferences`; ALTER VIEW's lives in the analyzer gate. Both yield + // `MISSING_CATALOG_ABILITY.VIEWS`. withSQLConf( "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { val ex = intercept[AnalysisException] { @@ -865,7 +897,7 @@ class TestingViewCatalog extends TableCatalog { override def loadTable(ident: Identifier): Table = { val key = (ident.namespace().toSeq, ident.name()) - Option(createdViews.get(key)).map(new MetadataOnlyTable(_)).getOrElse { + Option(createdViews.get(key)).map(new MetadataOnlyTable(_, ident.toString)).getOrElse { ident.name() match { case "test_view" => val viewProps = new java.util.HashMap[String, String]() @@ -878,14 +910,14 @@ class TestingViewCatalog extends TableCatalog { .withViewText( "SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") .build() - new MetadataOnlyTable(info) + new MetadataOnlyTable(info, ident.toString) case "test_unqualified_view" => val info = new TableInfo.Builder() .withSchema(new StructType().add("col", "string")) .withViewText("SELECT col FROM t WHERE col = 'b'") .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) .build() - new MetadataOnlyTable(info) + new MetadataOnlyTable(info, ident.toString) case "test_unqualified_multi" => // View whose captured catalog+namespace is view_catalog.ns1.ns2 (two-part). The // unqualified `t` in the body must resolve via that captured context to @@ -895,7 +927,7 @@ class TestingViewCatalog extends TableCatalog { .withViewText("SELECT col FROM t") .withCurrentCatalogAndNamespace("view_catalog", Array("ns1", "ns2")) .build() - new MetadataOnlyTable(info) + new MetadataOnlyTable(info, ident.toString) case "t" if ident.namespace().toSeq == Seq("ns1", "ns2") => // Target of test_unqualified_multi's unqualified reference. Self-contained view so // the test doesn't need external data. @@ -903,7 +935,7 @@ class TestingViewCatalog extends TableCatalog { .withSchema(new StructType().add("col", "string")) .withViewText("SELECT 'multi' AS col") .build() - new MetadataOnlyTable(info) + new MetadataOnlyTable(info, ident.toString) case _ => throw new NoSuchTableException(ident) } } @@ -919,7 +951,7 @@ class TestingViewCatalog extends TableCatalog { if (createdViews.putIfAbsent(key, info) != null) { throw new TableAlreadyExistsException(ident) } - new MetadataOnlyTable(info) + new MetadataOnlyTable(info, ident.toString) } /** Test-only accessor: returns the stored TableInfo for a created view. */ @@ -974,7 +1006,7 @@ class TestingStagingCatalog extends StagingTableCatalog { (ident.namespace().toSeq, ident.name()) override def loadTable(ident: Identifier): Table = { - Option(views.get(keyOf(ident))).map(new MetadataOnlyTable(_)) + Option(views.get(keyOf(ident))).map(new MetadataOnlyTable(_, ident.toString)) .getOrElse(throw new NoSuchTableException(ident)) } @@ -984,21 +1016,24 @@ class TestingStagingCatalog extends StagingTableCatalog { if (views.putIfAbsent(keyOf(ident), info) != null) { throw new TableAlreadyExistsException(ident) } - new MetadataOnlyTable(info) + new MetadataOnlyTable(info, ident.toString) } override def stageCreate(ident: Identifier, info: TableInfo): StagedTable = { if (views.containsKey(keyOf(ident))) throw new TableAlreadyExistsException(ident) - new RecordingStagedTable(info, () => views.put(keyOf(ident), info), () => ()) + new RecordingStagedTable( + info, ident.toString, () => views.put(keyOf(ident), info), () => ()) } override def stageReplace(ident: Identifier, info: TableInfo): StagedTable = { if (!views.containsKey(keyOf(ident))) throw new NoSuchTableException(ident) - new RecordingStagedTable(info, () => views.put(keyOf(ident), info), () => ()) + new RecordingStagedTable( + info, ident.toString, () => views.put(keyOf(ident), info), () => ()) } override def stageCreateOrReplace(ident: Identifier, info: TableInfo): StagedTable = { - new RecordingStagedTable(info, () => views.put(keyOf(ident), info), () => ()) + new RecordingStagedTable( + info, ident.toString, () => views.put(keyOf(ident), info), () => ()) } override def alterTable(ident: Identifier, changes: TableChange*): Table = @@ -1017,21 +1052,21 @@ class TestingStagingCatalog extends StagingTableCatalog { private class RecordingStagedTable( info: TableInfo, + name: String, onCommit: () => Unit, - onAbort: () => Unit) extends MetadataOnlyTable(info) with StagedTable { + onAbort: () => Unit) extends MetadataOnlyTable(info, name) with StagedTable { override def commitStagedChanges(): Unit = onCommit() override def abortStagedChanges(): Unit = onAbort() } /** - * A v2 catalog that does not declare SUPPORTS_VIEW. Used to exercise the capability gate: - * it returns a view-typed [[MetadataOnlyTable]] from `loadTable`, so ALTER VIEW progresses - * past view resolution and actually hits the gate in [[CheckViewReferences]]. + * A v2 catalog that does not declare SUPPORTS_VIEW. Used by capability-gate tests. The + * gate actually fires in `Analyzer.lookupTableOrView(viewOnly=true)` for ALTER VIEW and in + * [[CheckViewReferences]] for CREATE VIEW -- in both cases before `loadTable` is called -- + * so the pre-seeded view fixture is effectively unused on the happy-path-error flow. It's + * kept to make future tests that deliberately bypass the upstream gate easy to write. */ class TestingTableOnlyCatalog extends TableCatalog { - // Pre-seeded view at default.v, used by the ALTER VIEW capability-gate test. Stored here - // rather than in createTable so tests don't need to first create the view through Spark - // (which would itself be blocked by the capability gate they're verifying). private val fixtureView: TableInfo = new TableInfo.Builder() .withSchema(new StructType().add("x", "int")) .withViewText("SELECT 1 AS x") @@ -1039,7 +1074,7 @@ class TestingTableOnlyCatalog extends TableCatalog { override def loadTable(ident: Identifier): Table = if (ident.namespace().toSeq == Seq("default") && ident.name() == "v") { - new MetadataOnlyTable(fixtureView) + new MetadataOnlyTable(fixtureView, ident.toString) } else { throw new NoSuchTableException(ident) } From c2fccb06ea92d77fe0fdfa7dadc4fd2f2c6ecf4c Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 23 Apr 2026 07:28:03 +0000 Subject: [PATCH 28/59] address self-review findings: stamp PROP_OWNER on v2 CREATE VIEW, preserve on ALTER; minor dash nit Co-authored-by: Isaac --- .../datasources/v2/AlterV2ViewExec.scala | 4 ++++ .../datasources/v2/CreateV2ViewExec.scala | 15 ++++++++++++--- .../DataSourceV2MetadataOnlyViewSuite.scala | 18 ++++++++++++++++++ 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala index 9dd5363fecef0..2023cb730c3fb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -76,6 +76,10 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { override def userSpecifiedColumns: Seq[(String, Option[String])] = Seq.empty override def comment: Option[String] = existingProp(TableCatalog.PROP_COMMENT) override def collation: Option[String] = existingProp(TableCatalog.PROP_COLLATION) + // Preserve the existing view's owner (v1-parity with AlterViewAsCommand's viewMeta.copy, + // which leaves `owner` untouched). If the existing view has no PROP_OWNER, pass it through + // as None so the replacement TableInfo also has no owner. + override def owner: Option[String] = existingProp(TableCatalog.PROP_OWNER) override def userProperties: Map[String, String] = existingProps // Read the schema binding mode directly from the properties map; shares decoding with diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala index 6482f6f6f8f21..5dbe14a8e50bd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -20,8 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.jdk.CollectionConverters._ import org.apache.spark.SparkException -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.{CurrentUserContext, InternalRow, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, ResolvedIdentifier, SchemaEvolution, TableAlreadyExistsException, ViewSchemaMode} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -49,13 +48,14 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { def userSpecifiedColumns: Seq[(String, Option[String])] def comment: Option[String] def collation: Option[String] + def owner: Option[String] def userProperties: Map[String, String] def originalText: String def query: LogicalPlan def viewSchemaMode: ViewSchemaMode // Build a synthetic v1 TableIdentifier for error messages and for ViewHelper methods that - // accept it purely for rendering. This carries no semantic weight - the v2 Identifier is the + // accept it purely for rendering. This carries no semantic weight -- the v2 Identifier is the // actual target. protected lazy val legacyName: TableIdentifier = identifier.asLegacyTableIdentifier(catalog.name()) @@ -104,10 +104,15 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { viewSchemaMode = viewSchemaMode, catalogAndNamespaceEncoder = v2Encoder) + // CREATE stamps the current user into PROP_OWNER (matching v2 CREATE TABLE via + // CatalogV2Util.withDefaultOwnership and v1 CREATE VIEW via CatalogTable.owner's default); + // ALTER preserves the existing view's owner (v1-parity with AlterViewAsCommand's + // viewMeta.copy). Both cases are expressed via the `owner` hook provided by the subclass. val builder = new TableInfo.Builder() .withSchema(aliasedSchema) .withProperties(viewProps.asJava) .withViewText(originalText) + owner.foreach(builder.withOwner) comment.foreach(builder.withComment) collation.foreach(builder.withCollation) builder.build() @@ -155,6 +160,8 @@ case class CreateV2ViewExec( replace: Boolean, viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { + override def owner: Option[String] = Some(CurrentUserContext.getCurrentUser) + override protected def run(): Seq[InternalRow] = { // Probe the catalog before preparing the view body so `IF NOT EXISTS` short-circuits // without running `aliasPlan` / `generateViewProperties`, matching v1 @@ -205,6 +212,8 @@ case class AtomicCreateV2ViewExec( replace: Boolean, viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { + override def owner: Option[String] = Some(CurrentUserContext.getCurrentUser) + override val metrics: Map[String, SQLMetric] = DataSourceV2Utils.commitMetrics(sparkContext, catalog) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index ced0329d7b24e..3a8caa3aa8caa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -499,6 +499,24 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } + test("CREATE VIEW stamps PROP_OWNER on the stored TableInfo") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_owner_create AS " + + "SELECT x FROM spark_catalog.default.t") + + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingViewCatalog] + val info = catalog.getStoredView(Array("default"), "v_owner_create") + // v2 CREATE VIEW stamps the current user into PROP_OWNER, matching v2 CREATE TABLE + // (via CatalogV2Util.withDefaultOwnership) and v1 CREATE VIEW (via CatalogTable.owner's + // default). Without this, the ALTER VIEW preservation test above would have nothing to + // carry forward on a v2-created view. + val owner = info.properties().get(TableCatalog.PROP_OWNER) + assert(owner != null && owner.nonEmpty, s"expected a non-empty owner, got: $owner") + } + } + test("ALTER VIEW preserves PROP_OWNER (v1-parity)") { val catalog = spark.sessionState.catalogManager.catalog("view_catalog") .asInstanceOf[TestingViewCatalog] From 21821760ecf4c313bed23b4c272d8dc95bf1b189 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 23 Apr 2026 10:38:00 +0000 Subject: [PATCH 29/59] address self-review findings: rework v2 view API to ViewInfo; fix multi-part error rendering - Introduce ViewInfo extends TableInfo carrying typed fields (queryText, currentCatalog, currentNamespace, sqlConfigs, schemaMode, queryColumnNames). SUPPORTS_VIEW catalogs branch on `instanceof ViewInfo` inside createTable and the StagingTableCatalog staging variants; loadTable returns MetadataOnlyTable wrapping a ViewInfo for views. ViewInfo's ctor auto-sets PROP_TABLE_TYPE=VIEW so generic viewers (listTableSummaries default impl, DESCRIBE) classify correctly. - Remove the property-bag encoding: PROP_VIEW_TEXT, PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE, VIEW_CONF_PREFIX gone from TableCatalog; the corresponding TABLE_RESERVED_PROPERTIES entries gone from CatalogV2Util; CatalogTable.VIEW_SQL_CONFIG_PREFIX reverted to its pre-PR form. - Delete the dormant ViewCatalog / View / ViewChange / old ViewInfo (@DeveloperApi but never wired into analyzer/planner) since TableCatalog + SUPPORTS_VIEW subsumes it. - Fix multi-level-namespace rendering in four view error constructors: viewAlreadyExistsError, unsupportedCreateOrReplaceViewOnTableError, and the two CREATE_VIEW_COLUMN_ARITY_MISMATCH errors now take Seq[String] instead of a lossy TableIdentifier (asLegacyTableIdentifier collapsed cat.ns1.ns2.v to cat.ns2.v). v2 callers pass catalog.name +: ident.asMultipartIdentifier; v1 callers pass name.nameParts. - MetadataOnlyTable.constraints() now delegates to info.constraints() instead of returning an empty array. --- .../connector/catalog/MetadataOnlyTable.java | 20 +- .../catalog/StagingTableCatalog.java | 18 +- .../sql/connector/catalog/TableCatalog.java | 31 +-- .../catalog/TableCatalogCapability.java | 41 ++-- .../sql/connector/catalog/TableInfo.java | 116 ++++----- .../spark/sql/connector/catalog/View.java | 74 ------ .../sql/connector/catalog/ViewCatalog.java | 199 --------------- .../sql/connector/catalog/ViewChange.java | 79 ------ .../spark/sql/connector/catalog/ViewInfo.java | 226 ++++++++---------- .../sql/catalyst/analysis/Analyzer.scala | 10 +- .../sql/catalyst/catalog/interface.scala | 4 +- .../sql/connector/catalog/CatalogV2Util.scala | 4 +- .../spark/sql/connector/catalog/V1Table.scala | 97 +++++--- .../sql/errors/QueryCompilationErrors.scala | 18 +- .../command/metricViewCommands.scala | 4 +- .../spark/sql/execution/command/views.scala | 15 +- .../datasources/v2/AlterV2ViewExec.scala | 66 ++--- .../datasources/v2/CreateV2ViewExec.scala | 98 ++++---- .../DataSourceV2MetadataOnlyViewSuite.scala | 207 ++++++++-------- 19 files changed, 474 insertions(+), 853 deletions(-) delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java index e731484422ae8..5044b9f451820 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -22,18 +22,19 @@ import java.util.Set; import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.connector.catalog.constraints.Constraint; import org.apache.spark.sql.connector.expressions.Transform; /** * A concrete {@code Table} implementation that contains only table metadata, deferring * read/write to Spark. It represents a general Spark data source table or a Spark view; - * Spark resolves the table provider into a data source or expands the view text at read time. + * Spark resolves the table provider into a data source (for tables) or expands the view text + * (for views) at read time. *

- * Catalogs build the metadata via {@link TableInfo.Builder} (which provides convenience - * setters for reserved properties such as {@link TableCatalog#PROP_PROVIDER}, - * {@link TableCatalog#PROP_LOCATION}, {@link TableCatalog#PROP_VIEW_TEXT}, etc.) and wrap - * the resulting {@link TableInfo} in a {@code MetadataOnlyTable} to return from - * {@link TableCatalog#loadTable(Identifier)}. + * Catalogs build the metadata via {@link TableInfo.Builder} (for data-source tables) or + * {@link ViewInfo.Builder} (for views) and wrap the result in a {@code MetadataOnlyTable} to + * return from {@link TableCatalog#loadTable(Identifier)}. Downstream consumers distinguish + * the two by checking {@code getTableInfo() instanceof ViewInfo}. * * @since 4.2.0 */ @@ -43,7 +44,7 @@ public class MetadataOnlyTable implements Table { private final String name; /** - * @param info metadata for the table or view. + * @param info metadata for the table or view. Pass a {@link ViewInfo} for a view. * @param name human-readable name for this table, used by places that read {@link #name()} * (e.g. the {@code Name} row of {@code DESCRIBE TABLE EXTENDED}). Catalogs * returning a {@code MetadataOnlyTable} from {@link TableCatalog#loadTable} @@ -74,6 +75,11 @@ public Transform[] partitioning() { return info.partitions(); } + @Override + public Constraint[] constraints() { + return info.constraints(); + } + @Override public String name() { return name; diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java index 05edc9033279b..f37dc3dccc5c8 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java @@ -97,11 +97,10 @@ default StagedTable stageCreate( * committed, an exception should be thrown by {@link StagedTable#commitStagedChanges()}. *

* Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} also route atomic v2 - * {@code CREATE VIEW} through this method when {@code tableInfo.properties()} includes - * {@link TableCatalog#PROP_VIEW_TEXT}. + * {@code CREATE VIEW} through this method when {@code tableInfo} is a {@link ViewInfo}. * * @param ident a table identifier - * @param tableInfo information about the table + * @param tableInfo information about the table or view * @return metadata for the new table. This can be null if the catalog does not support atomic * creation for this table. Spark will call {@link #loadTable(Identifier)} later. * @throws TableAlreadyExistsException If a table or view already exists for the identifier @@ -165,12 +164,11 @@ default StagedTable stageReplace( * operation. *

* Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} also route atomic v2 - * {@code ALTER VIEW ... AS} through this method when {@code tableInfo.properties()} includes - * {@link TableCatalog#PROP_VIEW_TEXT}; the existing entry at {@code ident} is expected to be - * a view. + * {@code ALTER VIEW ... AS} through this method when {@code tableInfo} is a {@link ViewInfo}; + * the existing entry at {@code ident} is expected to be a view. * * @param ident a table identifier - * @param tableInfo information about the table + * @param tableInfo information about the table or view * @return metadata for the new table. This can be null if the catalog does not support atomic * creation for this table. Spark will call {@link #loadTable(Identifier)} later. * @throws UnsupportedOperationException If a requested partition transform is not supported @@ -233,11 +231,11 @@ default StagedTable stageCreateOrReplace( * the staged changes are committed but the table doesn't exist at commit time. *

* Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} also route atomic v2 - * {@code CREATE OR REPLACE VIEW} through this method when {@code tableInfo.properties()} - * includes {@link TableCatalog#PROP_VIEW_TEXT}. + * {@code CREATE OR REPLACE VIEW} through this method when {@code tableInfo} is a + * {@link ViewInfo}. * * @param ident a table identifier - * @param tableInfo information about the table + * @param tableInfo information about the table or view * @return metadata for the new table. This can be null if the catalog does not support atomic * creation for this table. Spark will call {@link #loadTable(Identifier)} later. * @throws UnsupportedOperationException If a requested partition transform is not supported diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index 6843e68ad50ff..f55bd6f44452c 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -87,29 +87,6 @@ public interface TableCatalog extends CatalogPlugin { */ String PROP_OWNER = "owner"; - /** - * A reserved property that holds the SQL text of a view. Unqualified identifiers in the - * view text are resolved against {@link #PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE} at read - * time. - */ - String PROP_VIEW_TEXT = "view_text"; - - /** - * A reserved property that captures the current catalog and namespace at the time the view - * was created. The value is a Spark multi-part identifier string: parts are joined with - * {@code "."} and any part that isn't a simple identifier is backtick-quoted (see - * {@code QuotingUtils.quoted}). The first part is the catalog; the remaining parts are the - * namespace. For example, {@code my_catalog.db1.db2} or {@code my_catalog.`weird.db`.normal}. - * The value is parsed with {@code ParserInterface.parseMultipartIdentifier}. An absent or - * empty value means the view was created with no captured resolution context. - */ - String PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE = "view.currentCatalogAndNamespace"; - - /** - * A prefix used to specify the Spark SQL configurations for reading this view. - */ - String VIEW_CONF_PREFIX = "view.sqlConfig."; - /** * A prefix used to pass OPTIONS in table properties */ @@ -322,9 +299,15 @@ default Table createTable( /** * Create a table in the catalog. + *

+ * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} also receive view writes + * through this method: when {@code tableInfo} is a {@link ViewInfo}, the call is a + * {@code CREATE VIEW} / {@code CREATE OR REPLACE VIEW} (combined with {@code dropTable}) / + * {@code ALTER VIEW ... AS} (combined with {@code dropTable}) request and must be persisted + * as a view. Implementations should branch on {@code tableInfo instanceof ViewInfo}. * * @param ident a table identifier - * @param tableInfo information about the table. + * @param tableInfo information about the table or view * @return metadata for the new table. This can be null if getting the metadata for the new table * is expensive. Spark will call {@link #loadTable(Identifier)} if needed (e.g. CTAS). * diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java index 80fbb79711b0d..3039d4211a23b 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java @@ -95,24 +95,29 @@ public enum TableCatalogCapability { SUPPORTS_CREATE_TABLE_WITH_IDENTITY_COLUMNS, /** - * Signals that the TableCatalog supports views. Views are created and altered via - * {@link TableCatalog#createTable} by accepting a {@link TableInfo} whose properties include - * {@link TableCatalog#PROP_VIEW_TEXT} (and related view keys: - * {@link TableCatalog#PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE} and - * {@link TableCatalog#VIEW_CONF_PREFIX}-prefixed SQL configs). - *

- * Catalogs declaring this capability must round-trip those properties and return a - * {@link MetadataOnlyTable} from {@link TableCatalog#loadTable} so Spark's view resolution - * path can expand the view text. On a plain {@code TableCatalog}, {@code CREATE VIEW} uses - * {@code createTable} and {@code ALTER VIEW ... AS} is implemented as {@code dropTable} + - * {@code createTable}. On a {@link StagingTableCatalog}, Spark routes - * {@code CREATE VIEW} through {@link StagingTableCatalog#stageCreate}, - * {@code CREATE OR REPLACE VIEW} through {@link StagingTableCatalog#stageCreateOrReplace}, - * and {@code ALTER VIEW ... AS} through {@link StagingTableCatalog#stageReplace} so the - * metadata swap commits atomically. Without this capability, Spark rejects - * {@code CREATE VIEW} and {@code ALTER VIEW} statements targeting the catalog up front - * rather than letting the catalog silently persist a table entry that cannot be read as a - * view. + * Signals that the TableCatalog supports views. Views flow through the same write methods as + * tables, using {@link ViewInfo} (a {@link TableInfo} subtype carrying the view-specific + * fields -- query text, captured current catalog/namespace, captured SQL configs, schema + * binding mode, query output column names) as the DTO. Catalogs declaring this capability + * must: + *

+ * Spark routes the view DDL through the standard write APIs: {@code CREATE VIEW} uses + * {@code createTable} (or {@code stageCreate}); {@code CREATE OR REPLACE VIEW} uses + * {@code createTable} (after {@code dropTable}) or {@code stageCreateOrReplace}; + * {@code ALTER VIEW ... AS} uses {@code createTable} (after {@code dropTable}) or + * {@code stageReplace}. Without this capability, Spark rejects {@code CREATE VIEW} and + * {@code ALTER VIEW} statements targeting the catalog up front rather than letting the + * catalog silently persist a table entry that cannot be read as a view. */ SUPPORTS_VIEW } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java index 64a9dedabc611..1ae04a3d3fab2 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java @@ -20,7 +20,6 @@ import java.util.Map; import java.util.Objects; -import org.apache.spark.sql.catalyst.util.QuotingUtils; import org.apache.spark.sql.connector.catalog.constraints.Constraint; import org.apache.spark.sql.connector.expressions.Transform; import org.apache.spark.sql.types.StructType; @@ -34,9 +33,8 @@ public class TableInfo { /** * Constructor for TableInfo used by the builder. - * @param builder Builder. */ - private TableInfo(Builder builder) { + protected TableInfo(BaseBuilder builder) { this.columns = builder.columns; this.properties = builder.properties; this.partitions = builder.partitions; @@ -61,40 +59,58 @@ public Transform[] partitions() { public Constraint[] constraints() { return constraints; } - public static class Builder { - private Column[] columns = new Column[0]; - private Map properties = new HashMap<>(); - private Transform[] partitions = new Transform[0]; - private Constraint[] constraints = new Constraint[0]; + public static class Builder extends BaseBuilder { + @Override + protected Builder self() { return this; } - public Builder withColumns(Column[] columns) { + @Override + public TableInfo build() { + Objects.requireNonNull(columns, "columns should not be null"); + return new TableInfo(this); + } + } + + /** + * Shared builder state for {@link TableInfo} and its subclasses. Setters return {@code B} so + * subclass builders (e.g. {@link ViewInfo.Builder}) chain through their own type without + * a covariant override on each inherited setter. + */ + protected abstract static class BaseBuilder> { + protected Column[] columns = new Column[0]; + protected Map properties = new HashMap<>(); + protected Transform[] partitions = new Transform[0]; + protected Constraint[] constraints = new Constraint[0]; + + protected abstract B self(); + + public B withColumns(Column[] columns) { this.columns = columns; - return this; + return self(); } - public Builder withSchema(StructType schema) { + public B withSchema(StructType schema) { this.columns = CatalogV2Util.structTypeToV2Columns(schema); - return this; + return self(); } /** * Replaces the current properties map with a defensive copy of the given map. Any reserved - * keys set earlier via convenience setters (e.g. {@link #withProvider}, {@link #withViewText}) - * are discarded -- call those setters after this method, not before. + * keys set earlier via convenience setters (e.g. {@link #withProvider}) are discarded -- + * call those setters after this method, not before. */ - public Builder withProperties(Map properties) { + public B withProperties(Map properties) { this.properties = new HashMap<>(properties); - return this; + return self(); } - public Builder withPartitions(Transform[] partitions) { + public B withPartitions(Transform[] partitions) { this.partitions = partitions; - return this; + return self(); } - public Builder withConstraints(Constraint[] constraints) { + public B withConstraints(Constraint[] constraints) { this.constraints = constraints; - return this; + return self(); } // Convenience setters below write reserved keys into the current `properties` map. Pair @@ -103,70 +119,36 @@ public Builder withConstraints(Constraint[] constraints) { // write. /** Writes {@link TableCatalog#PROP_PROVIDER} into the current properties map. */ - public Builder withProvider(String provider) { + public B withProvider(String provider) { properties.put(TableCatalog.PROP_PROVIDER, provider); - return this; + return self(); } - public Builder withLocation(String location) { + public B withLocation(String location) { properties.put(TableCatalog.PROP_LOCATION, location); - return this; + return self(); } - public Builder withComment(String comment) { + public B withComment(String comment) { properties.put(TableCatalog.PROP_COMMENT, comment); - return this; + return self(); } - public Builder withCollation(String collation) { + public B withCollation(String collation) { properties.put(TableCatalog.PROP_COLLATION, collation); - return this; + return self(); } - public Builder withOwner(String owner) { + public B withOwner(String owner) { properties.put(TableCatalog.PROP_OWNER, owner); - return this; + return self(); } - public Builder withTableType(String tableType) { + public B withTableType(String tableType) { properties.put(TableCatalog.PROP_TABLE_TYPE, tableType); - return this; - } - - /** - * Sets the view SQL text and marks this TableInfo as a view by setting - * {@link TableCatalog#PROP_TABLE_TYPE} to {@link TableSummary#VIEW_TABLE_TYPE}. - */ - public Builder withViewText(String viewText) { - properties.put(TableCatalog.PROP_VIEW_TEXT, viewText); - properties.put(TableCatalog.PROP_TABLE_TYPE, TableSummary.VIEW_TABLE_TYPE); - return this; + return self(); } - /** - * Sets the current catalog and namespace at view creation time, encoded as a single quoted - * multi-part identifier string (see - * {@link TableCatalog#PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE}). The first part is the - * catalog; remaining parts are the namespace. Passing a null or empty catalog clears the - * property. - */ - public Builder withCurrentCatalogAndNamespace(String catalog, String[] namespace) { - if (catalog == null || catalog.isEmpty()) { - properties.remove(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE); - return this; - } - String[] ns = namespace == null ? new String[0] : namespace; - String[] parts = new String[ns.length + 1]; - parts[0] = catalog; - System.arraycopy(ns, 0, parts, 1, ns.length); - properties.put(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE, - QuotingUtils.quoted(parts)); - return this; - } - - public TableInfo build() { - Objects.requireNonNull(columns, "columns should not be null"); - return new TableInfo(this); - } + public abstract TableInfo build(); } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java deleted file mode 100644 index a4dc5f2f2d20f..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.catalog; - -import java.util.Map; - -import org.apache.spark.annotation.DeveloperApi; -import org.apache.spark.sql.types.StructType; - -/** - * An interface representing a persisted view. - */ -@DeveloperApi -public interface View { - /** - * A name to identify this view. - */ - String name(); - - /** - * The view query SQL text. - */ - String query(); - - /** - * The current catalog when the view is created. - */ - String currentCatalog(); - - /** - * The current namespace when the view is created. - */ - String[] currentNamespace(); - - /** - * The schema for the view when the view is created after applying column aliases. - */ - StructType schema(); - - /** - * The output column names of the query that creates this view. - */ - String[] queryColumnNames(); - - /** - * The view column aliases. - */ - String[] columnAliases(); - - /** - * The view column comments. - */ - String[] columnComments(); - - /** - * The view properties. - */ - Map properties(); -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java deleted file mode 100644 index abe5fb3148d08..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.catalog; - -import java.util.Arrays; -import java.util.List; - -import org.apache.spark.annotation.DeveloperApi; -import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; -import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; -import org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException; - -/** - * Catalog methods for working with views. - */ -@DeveloperApi -public interface ViewCatalog extends CatalogPlugin { - - /** - * A reserved property to specify the description of the view. - */ - String PROP_COMMENT = "comment"; - - /** - * A reserved property to specify the owner of the view. - */ - String PROP_OWNER = "owner"; - - /** - * A reserved property to specify the software version used to create the view. - */ - String PROP_CREATE_ENGINE_VERSION = "create_engine_version"; - - /** - * A reserved property to specify the software version used to change the view. - */ - String PROP_ENGINE_VERSION = "engine_version"; - - /** - * All reserved properties of the view. - */ - List RESERVED_PROPERTIES = Arrays.asList( - PROP_COMMENT, - PROP_OWNER, - PROP_CREATE_ENGINE_VERSION, - PROP_ENGINE_VERSION); - - /** - * List the views in a namespace from the catalog. - *

- * If the catalog supports tables, this must return identifiers for only views and not tables. - * - * @param namespace a multi-part namespace - * @return an array of Identifiers for views - * @throws NoSuchNamespaceException If the namespace does not exist (optional). - */ - Identifier[] listViews(String... namespace) throws NoSuchNamespaceException; - - /** - * Load view metadata by {@link Identifier ident} from the catalog. - *

- * If the catalog supports tables and contains a table for the identifier and not a view, - * this must throw {@link NoSuchViewException}. - * - * @param ident a view identifier - * @return the view description - * @throws NoSuchViewException If the view doesn't exist or is a table - */ - View loadView(Identifier ident) throws NoSuchViewException; - - /** - * Invalidate cached view metadata for an {@link Identifier identifier}. - *

- * If the view is already loaded or cached, drop cached data. If the view does not exist or is - * not cached, do nothing. Calling this method should not query remote services. - * - * @param ident a view identifier - */ - default void invalidateView(Identifier ident) { - } - - /** - * Test whether a view exists using an {@link Identifier identifier} from the catalog. - *

- * If the catalog supports views and contains a view for the identifier and not a table, - * this must return false. - * - * @param ident a view identifier - * @return true if the view exists, false otherwise - */ - default boolean viewExists(Identifier ident) { - try { - return loadView(ident) != null; - } catch (NoSuchViewException e) { - return false; - } - } - - /** - * Create a view in the catalog. - * - * @param viewInfo the info class holding all view information - * @return the created view. This can be null if getting the metadata for the view is expensive - * @throws ViewAlreadyExistsException If a view or table already exists for the identifier - * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional) - */ - View createView(ViewInfo viewInfo) throws ViewAlreadyExistsException, NoSuchNamespaceException; - - /** - * Replace a view in the catalog. - *

- * The default implementation has a race condition. - * Catalogs are encouraged to implement this operation atomically. - * - * @param viewInfo the info class holding all view information - * @param orCreate create the view if it doesn't exist - * @return the created/replaced view. This can be null if getting the metadata - * for the view is expensive - * @throws NoSuchViewException If the view doesn't exist or is a table - * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional) - */ - default View replaceView( - ViewInfo viewInfo, - boolean orCreate) - throws NoSuchViewException, NoSuchNamespaceException { - if (viewExists(viewInfo.ident())) { - dropView(viewInfo.ident()); - } else if (!orCreate) { - throw new NoSuchViewException(viewInfo.ident()); - } - - try { - return createView(viewInfo); - } catch (ViewAlreadyExistsException e) { - throw new RuntimeException("Race condition when creating/replacing view", e); - } - } - - /** - * Apply {@link ViewChange changes} to a view in the catalog. - *

- * Implementations may reject the requested changes. If any change is rejected, none of the - * changes should be applied to the view. - * - * @param ident a view identifier - * @param changes an array of changes to apply to the view - * @return the view altered - * @throws NoSuchViewException If the view doesn't exist or is a table. - * @throws IllegalArgumentException If any change is rejected by the implementation. - */ - View alterView(Identifier ident, ViewChange... changes) - throws NoSuchViewException, IllegalArgumentException; - - /** - * Drop a view in the catalog. - *

- * If the catalog supports tables and contains a table for the identifier and not a view, this - * must not drop the table and must return false. - * - * @param ident a view identifier - * @return true if a view was deleted, false if no view exists for the identifier - */ - boolean dropView(Identifier ident); - - /** - * Rename a view in the catalog. - *

- * If the catalog supports tables and contains a table with the old identifier, this throws - * {@link NoSuchViewException}. Additionally, if it contains a table with the new identifier, - * this throws {@link ViewAlreadyExistsException}. - *

- * If the catalog does not support view renames between namespaces, it throws - * {@link UnsupportedOperationException}. - * - * @param oldIdent the view identifier of the existing view to rename - * @param newIdent the new view identifier of the view - * @throws NoSuchViewException If the view to rename doesn't exist or is a table - * @throws ViewAlreadyExistsException If the new view name already exists or is a table - * @throws UnsupportedOperationException If the namespaces of old and new identifiers do not - * match (optional) - */ - void renameView(Identifier oldIdent, Identifier newIdent) - throws NoSuchViewException, ViewAlreadyExistsException; -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java deleted file mode 100644 index c94933beed7f6..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.catalog; - -import org.apache.spark.annotation.DeveloperApi; - -/** - * ViewChange subclasses represent requested changes to a view. - * These are passed to {@link ViewCatalog#alterView}. - */ -@DeveloperApi -public interface ViewChange { - - /** - * Create a ViewChange for setting a table property. - * - * @param property the property name - * @param value the new property value - * @return a ViewChange - */ - static ViewChange setProperty(String property, String value) { - return new SetProperty(property, value); - } - - /** - * Create a ViewChange for removing a table property. - * - * @param property the property name - * @return a ViewChange - */ - static ViewChange removeProperty(String property) { - return new RemoveProperty(property); - } - - final class SetProperty implements ViewChange { - private final String property; - private final String value; - - private SetProperty(String property, String value) { - this.property = property; - this.value = value; - } - - public String property() { - return property; - } - - public String value() { - return value; - } - } - - final class RemoveProperty implements ViewChange { - private final String property; - - private RemoveProperty(String property) { - this.property = property; - } - - public String property() { - return property; - } - } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java index b01e133365661..b3b3c37f5569a 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java @@ -14,168 +14,138 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.spark.sql.connector.catalog; -import org.apache.spark.annotation.DeveloperApi; -import org.apache.spark.sql.types.StructType; - -import javax.annotation.Nonnull; - -import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; import java.util.Map; import java.util.Objects; -import java.util.StringJoiner; + +import org.apache.spark.annotation.Evolving; /** - * A class that holds view information. + * A {@link TableInfo} specialization for views. Views are a kind of table: they share the + * {@link Table} abstraction and flow through the same {@link TableCatalog#createTable} write + * path and the same {@link TableCatalog#loadTable} read path. {@code ViewInfo} carries the + * view-specific fields that cannot be represented as string table properties -- the query + * text, captured creation-time resolution context, captured SQL configs, schema-binding mode, + * and the query output column names. + *

+ * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} recognize a + * {@code ViewInfo} argument to {@code createTable} (and the {@link StagingTableCatalog} + * staging variants) as a view write, and return a {@link MetadataOnlyTable} wrapping a + * {@code ViewInfo} from {@code loadTable} for a view identifier. + * + * @since 4.2.0 */ -@DeveloperApi -public class ViewInfo { - private final Identifier ident; - private final String sql; +@Evolving +public class ViewInfo extends TableInfo { + + private final String queryText; private final String currentCatalog; private final String[] currentNamespace; - private final StructType schema; + private final Map sqlConfigs; + private final String schemaMode; private final String[] queryColumnNames; - private final String[] columnAliases; - private final String[] columnComments; - private final Map properties; - - public ViewInfo( - Identifier ident, - String sql, - String currentCatalog, - String[] currentNamespace, - StructType schema, - String[] queryColumnNames, - String[] columnAliases, - String[] columnComments, - Map properties) { - this.ident = ident; - this.sql = sql; - this.currentCatalog = currentCatalog; - this.currentNamespace = currentNamespace; - this.schema = schema; - this.queryColumnNames = queryColumnNames; - this.columnAliases = columnAliases; - this.columnComments = columnComments; - this.properties = properties; - } - /** - * @return The view identifier - */ - @Nonnull - public Identifier ident() { - return ident; + private ViewInfo(Builder builder) { + super(builder); + this.queryText = Objects.requireNonNull(builder.queryText, "queryText should not be null"); + this.currentCatalog = builder.currentCatalog; + this.currentNamespace = builder.currentNamespace; + this.sqlConfigs = Collections.unmodifiableMap(builder.sqlConfigs); + this.schemaMode = builder.schemaMode; + this.queryColumnNames = builder.queryColumnNames; + // Force PROP_TABLE_TYPE = VIEW so that `properties()` reflects the typed ViewInfo + // classification. Catalogs and generic viewers reading PROP_TABLE_TYPE from the properties + // bag (e.g. TableCatalog.listTableSummaries default impl, DESCRIBE) see "VIEW" without + // requiring authors to remember withTableType(VIEW). + properties().put(TableCatalog.PROP_TABLE_TYPE, TableSummary.VIEW_TABLE_TYPE); } - /** - * @return The SQL text that defines the view - */ - @Nonnull - public String sql() { - return sql; - } + /** The SQL text of the view. */ + public String queryText() { return queryText; } /** - * @return The current catalog + * The current catalog at the time the view was created, used to resolve unqualified + * identifiers in {@link #queryText()} at read time. May be {@code null} if the view was + * created with no captured resolution context. */ - @Nonnull - public String currentCatalog() { - return currentCatalog; - } + public String currentCatalog() { return currentCatalog; } /** - * @return The current namespace + * The current namespace at the time the view was created, used alongside + * {@link #currentCatalog()} to resolve unqualified identifiers in {@link #queryText()} at + * read time. Never {@code null}; empty when no namespace was captured. */ - @Nonnull - public String[] currentNamespace() { - return currentNamespace; - } + public String[] currentNamespace() { return currentNamespace; } /** - * @return The view query output schema + * The SQL configs captured at view creation time, applied when parsing and analyzing the + * view body. Keys are unprefixed SQL config names (e.g. {@code spark.sql.ansi.enabled}). */ - @Nonnull - public StructType schema() { - return schema; - } + public Map sqlConfigs() { return sqlConfigs; } /** - * @return The query column names + * The view's schema binding mode. Allowed values match the {@code toString} form of + * {@code org.apache.spark.sql.catalyst.analysis.ViewSchemaMode}: + * {@code BINDING}, {@code COMPENSATION}, {@code TYPE EVOLUTION}, {@code EVOLUTION}. + * May be {@code null} when schema binding is not configured. */ - @Nonnull - public String[] queryColumnNames() { - return queryColumnNames; - } + public String schemaMode() { return schemaMode; } /** - * @return The column aliases + * Output column names of the query that created the view, used to map the query output to + * the view's declared columns during view resolution. Empty for views in {@code EVOLUTION} + * mode, which always use the view's current schema. */ - @Nonnull - public String[] columnAliases() { - return columnAliases; - } + public String[] queryColumnNames() { return queryColumnNames; } + + public static class Builder extends BaseBuilder { + private String queryText; + private String currentCatalog; + private String[] currentNamespace = new String[0]; + private Map sqlConfigs = new HashMap<>(); + private String schemaMode; + private String[] queryColumnNames = new String[0]; + + @Override + protected Builder self() { return this; } + + public Builder withQueryText(String queryText) { + this.queryText = queryText; + return this; + } - /** - * @return The column comments - */ - @Nonnull - public String[] columnComments() { - return columnComments; - } + public Builder withCurrentCatalog(String currentCatalog) { + this.currentCatalog = currentCatalog; + return this; + } - /** - * @return The view properties - */ - @Nonnull - public Map properties() { - return properties; - } + public Builder withCurrentNamespace(String[] currentNamespace) { + this.currentNamespace = currentNamespace == null ? new String[0] : currentNamespace; + return this; + } - @Override - public boolean equals(Object o) { - if (this == o) { - return true; + public Builder withSqlConfigs(Map sqlConfigs) { + this.sqlConfigs = new HashMap<>(sqlConfigs); + return this; } - if (o == null || getClass() != o.getClass()) { - return false; + + public Builder withSchemaMode(String schemaMode) { + this.schemaMode = schemaMode; + return this; } - ViewInfo viewInfo = (ViewInfo) o; - return ident.equals(viewInfo.ident) && sql.equals(viewInfo.sql) && - currentCatalog.equals(viewInfo.currentCatalog) && - Arrays.equals(currentNamespace, viewInfo.currentNamespace) && - schema.equals(viewInfo.schema) && - Arrays.equals(queryColumnNames, viewInfo.queryColumnNames) && - Arrays.equals(columnAliases, viewInfo.columnAliases) && - Arrays.equals(columnComments, viewInfo.columnComments) && - properties.equals(viewInfo.properties); - } - @Override - public int hashCode() { - int result = Objects.hash(ident, sql, currentCatalog, schema, properties); - result = 31 * result + Arrays.hashCode(currentNamespace); - result = 31 * result + Arrays.hashCode(queryColumnNames); - result = 31 * result + Arrays.hashCode(columnAliases); - result = 31 * result + Arrays.hashCode(columnComments); - return result; - } + public Builder withQueryColumnNames(String[] queryColumnNames) { + this.queryColumnNames = queryColumnNames == null ? new String[0] : queryColumnNames; + return this; + } - @Override - public String toString() { - return new StringJoiner(", ", ViewInfo.class.getSimpleName() + "[", "]") - .add("ident=" + ident) - .add("sql='" + sql + "'") - .add("currentCatalog='" + currentCatalog + "'") - .add("currentNamespace=" + Arrays.toString(currentNamespace)) - .add("schema=" + schema) - .add("queryColumnNames=" + Arrays.toString(queryColumnNames)) - .add("columnAliases=" + Arrays.toString(columnAliases)) - .add("columnComments=" + Arrays.toString(columnComments)) - .add("properties=" + properties) - .toString(); + @Override + public ViewInfo build() { + Objects.requireNonNull(columns, "columns should not be null"); + return new ViewInfo(this); + } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index e1d40a3467d83..3e7af3acec87b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -50,7 +50,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern._ import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.catalyst.util.{toPrettySQL, trimTempResolvedColumn, CharVarcharUtils} import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._ -import org.apache.spark.sql.connector.catalog.{View => _, _} +import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.TableChange.{After, ColumnPosition} import org.apache.spark.sql.connector.catalog.functions.UnboundFunction @@ -1126,13 +1126,9 @@ class Analyzer( val v2Ident = Identifier.of(v1Ident.database.toArray, v1Ident.identifier) ResolvedPersistentView( catalog, v2Ident, v1Table.catalogTable) - case t: MetadataOnlyTable => + case t: MetadataOnlyTable if t.getTableInfo.isInstanceOf[ViewInfo] => val catalogTable = V1Table.toCatalogTable(catalog, ident, t) - if (catalogTable.tableType == CatalogTableType.VIEW) { - ResolvedPersistentView(catalog, ident, catalogTable) - } else { - ResolvedTable.create(catalog.asTableCatalog, ident, t) - } + ResolvedPersistentView(catalog, ident, catalogTable) case table => ResolvedTable.create(catalog.asTableCatalog, ident, table) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index d5f6114261f68..12963b0da959e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -43,7 +43,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.connector.catalog.{CatalogManager, TableCatalog} +import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.connector.expressions.{ClusterByTransform, FieldReference, NamedReference, Transform} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf @@ -751,7 +751,7 @@ object CatalogTable { props.toMap } - val VIEW_SQL_CONFIG_PREFIX = TableCatalog.VIEW_CONF_PREFIX + val VIEW_SQL_CONFIG_PREFIX = VIEW_PREFIX + "sqlConfig." val VIEW_QUERY_OUTPUT_PREFIX = VIEW_PREFIX + "query.out." val VIEW_QUERY_OUTPUT_NUM_COLUMNS = VIEW_QUERY_OUTPUT_PREFIX + "numCols" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala index b372f2b568c73..fc484d4675f36 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala @@ -61,9 +61,7 @@ private[sql] object CatalogV2Util { TableCatalog.PROP_OWNER, TableCatalog.PROP_EXTERNAL, TableCatalog.PROP_IS_MANAGED_LOCATION, - TableCatalog.PROP_TABLE_TYPE, - TableCatalog.PROP_VIEW_TEXT, - TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) + TableCatalog.PROP_TABLE_TYPE) /** * The list of reserved namespace properties, which can not be removed or changed directly by diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index d39d83de59c40..d2d8a3fe4bdde 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -23,7 +23,6 @@ import scala.collection.mutable import scala.jdk.CollectionConverters._ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils, ClusterBySpec} -import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.V1Table.addV2TableProperties import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform} @@ -113,26 +112,25 @@ private[sql] object V1Table { def toCatalogTable( catalog: CatalogPlugin, ident: Identifier, - t: MetadataOnlyTable): CatalogTable = { - val info = t.getTableInfo + t: MetadataOnlyTable): CatalogTable = t.getTableInfo match { + case viewInfo: ViewInfo => toCatalogTable(catalog, ident, viewInfo) + case tableInfo => toCatalogTable(catalog, ident, tableInfo) + } + + private def toCatalogTable( + catalog: CatalogPlugin, + ident: Identifier, + info: TableInfo): CatalogTable = { val props = info.properties.asScala.toMap // PROP_TABLE_TYPE is advisory on the v2 side: it may be absent or carry a value that has no - // v1 mapping (e.g. TableSummary.FOREIGN_TABLE_TYPE). v1 only has EXTERNAL/MANAGED/VIEW, so - // anything other than the two explicit mappings below falls back to EXTERNAL for the v1 - // representation -- the same default v1 uses when the value is missing. + // v1 mapping (e.g. TableSummary.FOREIGN_TABLE_TYPE). v1 only has EXTERNAL/MANAGED, so + // anything other than the explicit MANAGED mapping falls back to EXTERNAL for the v1 + // representation -- the same default v1 uses when the value is missing. VIEW is reached + // only through the ViewInfo branch above. val tableType = props.get(TableCatalog.PROP_TABLE_TYPE) match { - case Some(TableSummary.VIEW_TABLE_TYPE) => CatalogTableType.VIEW case Some(TableSummary.MANAGED_TABLE_TYPE) => CatalogTableType.MANAGED case _ => CatalogTableType.EXTERNAL } - // Only expose viewText when this table actually is a view; otherwise downstream callers that - // use `catalogTable.viewText.isDefined` as an "is-view" proxy would misclassify a - // misconfigured table entry. - val viewText = if (tableType == CatalogTableType.VIEW) { - props.get(TableCatalog.PROP_VIEW_TEXT) - } else { - None - } // Reserved keys are promoted to first-class CatalogTable fields; strip them from the // user-visible properties map so they're not double-persisted or leaked into the serde bag. val userProps = props -- CatalogV2Util.TABLE_RESERVED_PROPERTIES @@ -140,26 +138,10 @@ private[sql] object V1Table { .partition(_._1.startsWith(TableCatalog.OPTION_PREFIX)) val tablePropsMap = tableProps.toMap val (partCols, bucketSpec, clusterBySpec) = info.partitions.toSeq.convertTransforms - // For views, translate the V2 view context (PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE, a - // single quoted multi-part identifier whose first part is the catalog) into V1's numbered - // viewCatalogAndNamespace properties so the V1 view resolution path can expand unqualified - // identifiers in the view text. - val viewContextProps = if (tableType == CatalogTableType.VIEW) { - props.get(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) match { - case Some(s) if s.nonEmpty => - val parts = CatalystSqlParser.parseMultipartIdentifier(s) - CatalogTable.catalogAndNamespaceToProps(parts.head, parts.tail) - case _ => - Map.empty[String, String] - } - } else { - Map.empty[String, String] - } CatalogTable( // `asLegacyTableIdentifier` collapses multi-part namespaces to their last segment (v1 // limitation). We record the full multi-part form in `multipartIdentifier` below; - // callers needing the real fully-qualified name (e.g. cyclic view detection) should - // read `CatalogTable.fullIdent`. + // callers needing the real fully-qualified name should read `CatalogTable.fullIdent`. identifier = ident.asLegacyTableIdentifier(catalog.name()), tableType = tableType, storage = CatalogStorageFormat.empty.copy( @@ -175,13 +157,56 @@ private[sql] object V1Table { partitionColumnNames = partCols, bucketSpec = bucketSpec, owner = props.getOrElse(TableCatalog.PROP_OWNER, ""), - viewText = viewText, - viewOriginalText = viewText, comment = props.get(TableCatalog.PROP_COMMENT), collation = props.get(TableCatalog.PROP_COLLATION), properties = tablePropsMap ++ - clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation) ++ - viewContextProps, + clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation), + multipartIdentifier = Some(catalog.name() +: ident.asMultipartIdentifier) + ) + } + + private def toCatalogTable( + catalog: CatalogPlugin, + ident: Identifier, + info: ViewInfo): CatalogTable = { + val props = info.properties.asScala.toMap + val userProps = props -- CatalogV2Util.TABLE_RESERVED_PROPERTIES + // Serde/OPTION properties only apply to data-source tables; views' user properties are a + // plain TBLPROPERTIES bag. + val tablePropsMap = userProps + val viewContextProps = if (info.currentCatalog != null && info.currentCatalog.nonEmpty) { + CatalogTable.catalogAndNamespaceToProps( + info.currentCatalog, info.currentNamespace.toSeq) + } else { + Map.empty[String, String] + } + val sqlConfigProps = info.sqlConfigs.asScala.map { + case (k, v) => s"${CatalogTable.VIEW_SQL_CONFIG_PREFIX}$k" -> v + }.toMap + val queryOutputProps = if (info.queryColumnNames.isEmpty) { + Map.empty[String, String] + } else { + val numCols = info.queryColumnNames.length + val perColProps = info.queryColumnNames.zipWithIndex.map { case (name, idx) => + s"${CatalogTable.VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX}$idx" -> name + }.toMap + perColProps + (CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> numCols.toString) + } + val schemaModeProps = Option(info.schemaMode) + .map(m => Map(CatalogTable.VIEW_SCHEMA_MODE -> m)) + .getOrElse(Map.empty) + CatalogTable( + identifier = ident.asLegacyTableIdentifier(catalog.name()), + tableType = CatalogTableType.VIEW, + storage = CatalogStorageFormat.empty, + schema = CatalogV2Util.v2ColumnsToStructType(info.columns), + owner = props.getOrElse(TableCatalog.PROP_OWNER, ""), + viewText = Some(info.queryText), + viewOriginalText = Some(info.queryText), + comment = props.get(TableCatalog.PROP_COMMENT), + collation = props.get(TableCatalog.PROP_COLLATION), + properties = tablePropsMap ++ viewContextProps ++ sqlConfigProps ++ + queryOutputProps ++ schemaModeProps, multipartIdentifier = Some(catalog.name() +: ident.asMultipartIdentifier) ) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 4c5e7e3d80f30..1c5077b214b1f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -3344,25 +3344,25 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat } def cannotCreateViewTooManyColumnsError( - viewIdent: TableIdentifier, + viewNameParts: Seq[String], expected: Seq[String], query: LogicalPlan): Throwable = { new AnalysisException( errorClass = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS", messageParameters = Map( - "viewName" -> toSQLId(viewIdent.nameParts), + "viewName" -> toSQLId(viewNameParts), "viewColumns" -> expected.map(c => toSQLId(c)).mkString(", "), "dataColumns" -> query.output.map(c => toSQLId(c.name)).mkString(", "))) } def cannotCreateViewNotEnoughColumnsError( - viewIdent: TableIdentifier, + viewNameParts: Seq[String], expected: Seq[String], query: LogicalPlan): Throwable = { new AnalysisException( errorClass = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS", messageParameters = Map( - "viewName" -> toSQLId(viewIdent.nameParts), + "viewName" -> toSQLId(viewNameParts), "viewColumns" -> expected.map(c => toSQLId(c)).mkString(", "), "dataColumns" -> query.output.map(c => toSQLId(c.name)).mkString(", "))) } @@ -3374,12 +3374,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat } def unsupportedCreateOrReplaceViewOnTableError( - name: TableIdentifier, replace: Boolean): Throwable = { + nameParts: Seq[String], replace: Boolean): Throwable = { if (replace) { new AnalysisException( errorClass = "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE", messageParameters = Map( - "tableName" -> toSQLId(name.nameParts), + "tableName" -> toSQLId(nameParts), "operation" -> "CREATE OR REPLACE VIEW" ) ) @@ -3387,16 +3387,16 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat new AnalysisException( errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS", messageParameters = Map( - "relationName" -> toSQLId(name.nameParts) + "relationName" -> toSQLId(nameParts) ) ) } } - def viewAlreadyExistsError(name: TableIdentifier): Throwable = { + def viewAlreadyExistsError(nameParts: Seq[String]): Throwable = { new AnalysisException( errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS", - messageParameters = Map("relationName" -> name.toString)) + messageParameters = Map("relationName" -> toSQLId(nameParts))) } def createPersistedViewFromDatasetAPINotAllowedError(): Throwable = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala index 8c21a908ddf32..ec8acbd02139a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala @@ -51,10 +51,10 @@ case class CreateMetricViewCommand( if (userSpecifiedColumns.nonEmpty) { if (userSpecifiedColumns.length > analyzed.output.length) { throw QueryCompilationErrors.cannotCreateViewNotEnoughColumnsError( - name, userSpecifiedColumns.map(_._1), analyzed) + name.nameParts, userSpecifiedColumns.map(_._1), analyzed) } else if (userSpecifiedColumns.length < analyzed.output.length) { throw QueryCompilationErrors.cannotCreateViewTooManyColumnsError( - name, userSpecifiedColumns.map(_._1), analyzed) + name.nameParts, userSpecifiedColumns.map(_._1), analyzed) } } catalog.createTable( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index b8bdd7e3217d2..8a7b395d74a77 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -119,10 +119,10 @@ case class CreateViewCommand( if (userSpecifiedColumns.nonEmpty) { if (userSpecifiedColumns.length > analyzedPlan.output.length) { throw QueryCompilationErrors.cannotCreateViewNotEnoughColumnsError( - name, userSpecifiedColumns.map(_._1), analyzedPlan) + name.nameParts, userSpecifiedColumns.map(_._1), analyzedPlan) } else if (userSpecifiedColumns.length < analyzedPlan.output.length) { throw QueryCompilationErrors.cannotCreateViewTooManyColumnsError( - name, userSpecifiedColumns.map(_._1), analyzedPlan) + name.nameParts, userSpecifiedColumns.map(_._1), analyzedPlan) } if (viewSchemaMode == SchemaEvolution) { throw SparkException.internalError( @@ -172,7 +172,8 @@ case class CreateViewCommand( // Handles `CREATE VIEW IF NOT EXISTS v0 AS SELECT ...`. Does nothing when the target view // already exists. } else if (tableMetadata.tableType != CatalogTableType.VIEW) { - throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError(name, replace) + throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( + name.nameParts, replace) } else if (replace) { // Detect cyclic view reference on CREATE OR REPLACE VIEW. val viewIdent = tableMetadata.identifier @@ -193,7 +194,7 @@ case class CreateViewCommand( } else { // Handles `CREATE VIEW v0 AS SELECT ...`. Throws exception when the target view already // exists. - throw QueryCompilationErrors.viewAlreadyExistsError(name) + throw QueryCompilationErrors.viewAlreadyExistsError(name.nameParts) } } else { // Create the view if it doesn't exist. @@ -498,9 +499,7 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { viewSchemaMode: ViewSchemaMode, tempViewNames: Seq[Seq[String]] = Seq.empty, tempFunctionNames: Seq[String] = Seq.empty, - tempVariableNames: Seq[Seq[String]] = Seq.empty, - catalogAndNamespaceEncoder: (String, Seq[String]) => Map[String, String] = - catalogAndNamespaceToProps): Map[String, String] = { + tempVariableNames: Seq[Seq[String]] = Seq.empty): Map[String, String] = { val conf = session.sessionState.conf @@ -519,7 +518,7 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { // Generate the view default catalog and namespace, as well as captured SQL configs. val manager = session.sessionState.catalogManager removeReferredTempNames(removeSQLConfigs(removeQueryColumnNames(properties))) ++ - catalogAndNamespaceEncoder( + catalogAndNamespaceToProps( manager.currentCatalog.name, manager.currentNamespace.toImmutableArraySeq) ++ sqlConfigsToProps(conf, VIEW_SQL_CONFIG_PREFIX) ++ queryColumnNameProps ++ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala index 2023cb730c3fb..c8a210d31ec19 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, ResolvedIdentifier, TableAlreadyExistsException, ViewSchemaMode} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, TableCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, TableCatalog, ViewInfo} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command.CommandUtils @@ -32,25 +32,35 @@ import org.apache.spark.util.Utils /** * Shared bits for the v2 ALTER VIEW ... AS execs. Loads the existing view once via - * `existingTable` and uses its properties to preserve user-set properties, comment, collation, - * and schema-binding mode when constructing the replacement `TableInfo`. A racing DDL between - * analysis and exec can change the target out from under us (dropped, or replaced with a - * non-view table); in that case we surface a regular no-such-table / not-a-view analysis - * error rather than propagating a stale analyzer decision. + * `existingView` and uses it to preserve user-set properties, comment, collation, schema + * binding mode, and owner when constructing the replacement [[ViewInfo]]. A racing DDL + * between analysis and exec can change the target out from under us (dropped, or replaced + * with a non-view table); in that case we surface a regular no-such-table / not-a-view + * analysis error rather than propagating a stale analyzer decision. * - * `generateViewProperties` (invoked from `buildTableInfo`) strips the transient view keys - * (SQL configs, query column names, referred-temp names) from the inherited properties and - * re-emits them from the current session, matching v1 `AlterViewAsCommand.alterPermanentView`. + * Transient fields (SQL configs, query column names, schema mode) are re-captured from the + * current session by [[V2ViewPreparation.buildViewInfo]], matching v1 + * `AlterViewAsCommand.alterPermanentView`. PROP_OWNER and user TBLPROPERTIES flow through + * unchanged. */ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { // Reuses `tryLoadTable` / `isViewTable` from the parent trait. A racing DDL between // analysis and exec (drop, or replace with a non-view table) can invalidate the analyzer's // ResolvedPersistentView decision -- we re-check here and surface user-facing errors // rather than propagate the stale resolution. - protected lazy val existingTable: MetadataOnlyTable = tryLoadTable() match { + protected lazy val existingView: ViewInfo = tryLoadTable() match { case None => throw QueryCompilationErrors.noSuchTableError(catalog.name(), identifier) - case Some(mot: MetadataOnlyTable) if isViewTable(mot) => mot + case Some(mot: MetadataOnlyTable) => + mot.getTableInfo match { + case v: ViewInfo => v + case _ => + throw QueryCompilationErrors.expectViewNotTableError( + (catalog.name() +: identifier.asMultipartIdentifier).toSeq, + cmd = "ALTER VIEW ... AS", + suggestAlternative = false, + t = this) + } case _ => throw QueryCompilationErrors.expectViewNotTableError( (catalog.name() +: identifier.asMultipartIdentifier).toSeq, @@ -59,16 +69,8 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { t = this) } - // Carry the existing view's full property map forward. Keys the ALTER actually changes are - // overwritten downstream: view text + PROP_TABLE_TYPE via `withViewText`, comment / collation - // via `withComment` / `withCollation`, view.sqlConfig.* / view.query.out.* / - // view.referredTempNames re-emitted by `generateViewProperties`, and - // PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE re-emitted by the v2 encoder inside - // `buildTableInfo`. Everything else -- notably PROP_OWNER and view.schemaMode -- flows - // through unchanged, matching v1 `AlterViewAsCommand.alterPermanentView`'s `viewMeta.copy` - // semantics. protected lazy val existingProps: Map[String, String] = - existingTable.getTableInfo.properties.asScala.toMap + existingView.properties.asScala.toMap private def existingProp(key: String): Option[String] = existingProps.get(key) @@ -78,23 +80,27 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { override def collation: Option[String] = existingProp(TableCatalog.PROP_COLLATION) // Preserve the existing view's owner (v1-parity with AlterViewAsCommand's viewMeta.copy, // which leaves `owner` untouched). If the existing view has no PROP_OWNER, pass it through - // as None so the replacement TableInfo also has no owner. + // as None so the replacement ViewInfo also has no owner. override def owner: Option[String] = existingProp(TableCatalog.PROP_OWNER) override def userProperties: Map[String, String] = existingProps - // Read the schema binding mode directly from the properties map; shares decoding with - // the v1 path via `CatalogTable.viewSchemaModeFromProperties` (honors - // viewSchemaBindingEnabled and the same default when the property is absent). + // Preserve the existing view's schema binding mode. Reuse `viewSchemaModeFromProperties` + // for a v1-identical decode -- it honors `viewSchemaBindingEnabled` and defaults missing + // values to SchemaBinding. We feed the typed `ViewInfo.schemaMode` String in via a + // single-key map so the decode logic stays in one place. override def viewSchemaMode: ViewSchemaMode = - CatalogTable.viewSchemaModeFromProperties(existingProps) + CatalogTable.viewSchemaModeFromProperties( + Option(existingView.schemaMode) + .map(CatalogTable.VIEW_SCHEMA_MODE -> _) + .toMap) /** - * Force-evaluate `existingTable` so `NoSuchTableException` / `expectViewNotTableError` - * surfaces before any other work (e.g. `buildTableInfo`, uncache, drop). The result is + * Force-evaluate `existingView` so `NoSuchTableException` / `expectViewNotTableError` + * surfaces before any other work (e.g. `buildViewInfo`, uncache, drop). The result is * intentionally discarded; call this purely for its side effect of materializing the * lazy val. */ - protected def requireExistingView(): Unit = existingTable + protected def requireExistingView(): Unit = existingView } /** @@ -110,7 +116,7 @@ case class AlterV2ViewExec( override protected def run(): Seq[InternalRow] = { requireExistingView() - val info = buildTableInfo() + val info = buildViewInfo() // Cyclic reference detection is done at analysis time in CheckViewReferences. CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) catalog.dropTable(identifier) @@ -140,7 +146,7 @@ case class AtomicAlterV2ViewExec( override protected def run(): Seq[InternalRow] = { requireExistingView() - val info = buildTableInfo() + val info = buildViewInfo() // Cyclic reference detection is done at analysis time in CheckViewReferences. CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) val staged: StagedTable = try { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala index 5dbe14a8e50bd..9d0aa8d09aa1d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -20,27 +20,26 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.jdk.CollectionConverters._ import org.apache.spark.SparkException -import org.apache.spark.sql.catalyst.{CurrentUserContext, InternalRow, TableIdentifier} +import org.apache.spark.sql.catalyst.{CurrentUserContext, InternalRow} import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, ResolvedIdentifier, SchemaEvolution, TableAlreadyExistsException, ViewSchemaMode} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, QuotingUtils} -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableInfo, TableSummary} +import org.apache.spark.sql.catalyst.util.CharVarcharUtils +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, ViewInfo} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command.{CommandUtils, ViewHelper} import org.apache.spark.sql.execution.metric.SQLMetric import org.apache.spark.sql.util.SchemaUtils +import org.apache.spark.util.ArrayImplicits._ import org.apache.spark.util.Utils /** - * Shared validation + TableInfo construction for v2 CREATE VIEW execs. + * Shared validation + ViewInfo construction for v2 CREATE VIEW execs. * * Mirrors the persistent-view portion of v1 [[ViewHelper.prepareTable]] + the execution-time - * checks in [[CreateViewCommand.run]]. Any future addition on the v1 side -- new view-specific - * reserved property, new validation, new schema-mode handling -- must be mirrored here. - * Post-analysis checks for temp-object references and auto-generated aliases run once for both - * v1 and v2 in [[CheckViewReferences]]. + * checks in [[CreateViewCommand.run]]. Post-analysis checks for temp-object references and + * auto-generated aliases run once for both v1 and v2 in [[CheckViewReferences]]. */ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { def catalog: TableCatalog @@ -54,25 +53,23 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { def query: LogicalPlan def viewSchemaMode: ViewSchemaMode - // Build a synthetic v1 TableIdentifier for error messages and for ViewHelper methods that - // accept it purely for rendering. This carries no semantic weight -- the v2 Identifier is the - // actual target. - protected lazy val legacyName: TableIdentifier = - identifier.asLegacyTableIdentifier(catalog.name()) + // Full multi-part identifier used for error rendering. Built once so we can avoid routing + // through the lossy v1 `TableIdentifier` for multi-level-namespace v2 catalogs. + protected lazy val fullNameParts: Seq[String] = + (catalog.name() +: identifier.asMultipartIdentifier).toSeq override def output: Seq[Attribute] = Seq.empty - protected def buildTableInfo(): TableInfo = { + protected def buildViewInfo(): ViewInfo = { import ViewHelper._ - import TableCatalog._ if (userSpecifiedColumns.nonEmpty) { if (userSpecifiedColumns.length > query.output.length) { throw QueryCompilationErrors.cannotCreateViewNotEnoughColumnsError( - legacyName, userSpecifiedColumns.map(_._1), query) + fullNameParts, userSpecifiedColumns.map(_._1), query) } else if (userSpecifiedColumns.length < query.output.length) { throw QueryCompilationErrors.cannotCreateViewTooManyColumnsError( - legacyName, userSpecifiedColumns.map(_._1), query) + fullNameParts, userSpecifiedColumns.map(_._1), query) } if (viewSchemaMode == SchemaEvolution) { throw SparkException.internalError( @@ -84,34 +81,29 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { val aliasedSchema = CharVarcharUtils.getRawSchema( aliasPlan(session, query, userSpecifiedColumns).schema, session.sessionState.conf) + SchemaUtils.checkColumnNameDuplication( + aliasedSchema.fieldNames.toImmutableArraySeq, session.sessionState.conf.resolver) - // Emit PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE (single quoted multi-part identifier, - // catalog as first part) instead of v1's numbered view.catalogAndNamespace.* keys. - val v2Encoder: (String, Seq[String]) => Map[String, String] = { (cat, ns) => - val parts = (cat +: ns).toArray - Map(PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE -> QuotingUtils.quoted(parts)) + val manager = session.sessionState.catalogManager + val queryColumnNames = if (viewSchemaMode == SchemaEvolution) { + Array.empty[String] + } else { + query.output.map(_.name).toArray } - // Temp-object collection arguments are omitted: persistent-view semantics are enforced by - // CheckViewReferences before this runs, so any referenced temp view/function/variable has - // already caused analysis to fail. This matches v1 ViewHelper.prepareTable, which also - // calls generateViewProperties without them on the persistent-view path. - val viewProps = generateViewProperties( - properties = userProperties, - session = session, - queryOutput = query.output.map(_.name).toArray, - fieldNames = aliasedSchema.fieldNames, - viewSchemaMode = viewSchemaMode, - catalogAndNamespaceEncoder = v2Encoder) - + val builder = new ViewInfo.Builder() + .withSchema(aliasedSchema) + .withProperties(userProperties.asJava) + .withQueryText(originalText) + .withCurrentCatalog(manager.currentCatalog.name) + .withCurrentNamespace(manager.currentNamespace) + .withSqlConfigs(sqlConfigsToProps(session.sessionState.conf, "").asJava) + .withSchemaMode(viewSchemaMode.toString) + .withQueryColumnNames(queryColumnNames) // CREATE stamps the current user into PROP_OWNER (matching v2 CREATE TABLE via // CatalogV2Util.withDefaultOwnership and v1 CREATE VIEW via CatalogTable.owner's default); // ALTER preserves the existing view's owner (v1-parity with AlterViewAsCommand's // viewMeta.copy). Both cases are expressed via the `owner` hook provided by the subclass. - val builder = new TableInfo.Builder() - .withSchema(aliasedSchema) - .withProperties(viewProps.asJava) - .withViewText(originalText) owner.foreach(builder.withOwner) comment.foreach(builder.withComment) collation.foreach(builder.withCollation) @@ -119,7 +111,7 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { } protected def viewAlreadyExists(): Throwable = - QueryCompilationErrors.viewAlreadyExistsError(legacyName) + QueryCompilationErrors.viewAlreadyExistsError(fullNameParts) // Loads the existing entry at `identifier` or returns None if it does not exist. Combines // the existence check and type check into a single catalog round-trip (vs. the previous @@ -132,13 +124,11 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { } } - // A catalog with SUPPORTS_VIEW round-trips views as MetadataOnlyTable with PROP_TABLE_TYPE - // set to VIEW. Anything else at the same identifier is a non-view table -- REPLACE'ing it as - // a view would silently destroy the table's data, so we reject at the exec layer. + // A SUPPORTS_VIEW catalog round-trips views as MetadataOnlyTable wrapping a ViewInfo. + // Anything else at the same identifier is a non-view table -- REPLACE'ing it as a view would + // silently destroy the table's data, so we reject at the exec layer. protected def isViewTable(table: Table): Boolean = table match { - case mot: MetadataOnlyTable => - TableSummary.VIEW_TABLE_TYPE.equals( - mot.getTableInfo.properties.get(TableCatalog.PROP_TABLE_TYPE)) + case mot: MetadataOnlyTable => mot.getTableInfo.isInstanceOf[ViewInfo] case _ => false } } @@ -164,9 +154,8 @@ case class CreateV2ViewExec( override protected def run(): Seq[InternalRow] = { // Probe the catalog before preparing the view body so `IF NOT EXISTS` short-circuits - // without running `aliasPlan` / `generateViewProperties`, matching v1 - // `CreateViewCommand.run`. Cyclic-reference detection is done at analysis time in - // `CheckViewReferences`. + // without running `aliasPlan` / config capture, matching v1 `CreateViewCommand.run`. + // Cyclic-reference detection is done at analysis time in `CheckViewReferences`. val existing = tryLoadTable() if (allowExisting && existing.isDefined) { return Seq.empty @@ -174,11 +163,11 @@ case class CreateV2ViewExec( existing.foreach { table => if (!isViewTable(table)) { throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( - legacyName, replace) + fullNameParts, replace) } if (!replace) throw viewAlreadyExists() } - val info = buildTableInfo() + val info = buildViewInfo() if (existing.isDefined) { CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) catalog.dropTable(identifier) @@ -219,9 +208,8 @@ case class AtomicCreateV2ViewExec( override protected def run(): Seq[InternalRow] = { // Probe the catalog before preparing the view body so `IF NOT EXISTS` short-circuits - // without running `aliasPlan` / `generateViewProperties`, matching v1 - // `CreateViewCommand.run`. Cyclic-reference detection is done at analysis time in - // `CheckViewReferences`. + // without running `aliasPlan` / config capture, matching v1 `CreateViewCommand.run`. + // Cyclic-reference detection is done at analysis time in `CheckViewReferences`. val existing = tryLoadTable() if (allowExisting && existing.isDefined) { return Seq.empty @@ -229,13 +217,13 @@ case class AtomicCreateV2ViewExec( existing.foreach { table => if (!isViewTable(table)) { throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( - legacyName, replace) + fullNameParts, replace) } // Match the non-atomic exec: reject plain CREATE against an existing view up front // rather than relying on `stageCreate` to throw. if (!replace) throw viewAlreadyExists() } - val info = buildTableInfo() + val info = buildViewInfo() val staged: StagedTable = if (replace) { if (existing.isDefined) { CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 3a8caa3aa8caa..74999a43fd2ad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.connector import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException} -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableCatalogCapability, TableChange, TableInfo, TableSummary, V1Table} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableCatalogCapability, TableChange, TableInfo, TableSummary, V1Table, ViewInfo} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType @@ -78,51 +78,19 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio Row("multi")) } - // --- TableInfo.Builder unit tests for view-specific properties ---------- - - test("view current catalog/namespace are serialized into a single property") { - val info = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT * FROM t") - .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) - .build() - val table = new MetadataOnlyTable(info, "v") - assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) == - "spark_catalog.default") - } - - test("view current catalog/namespace quotes multi-part names with dots") { - val info = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT * FROM t") - .withCurrentCatalogAndNamespace("spark_catalog", Array("weird.db", "normal")) - .build() - val table = new MetadataOnlyTable(info, "v") - assert(table.properties().get(TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE) == - "spark_catalog.`weird.db`.normal") - } - - test("view with no current catalog/namespace omits the property") { - val info = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT * FROM spark_catalog.default.t") - .build() - val table = new MetadataOnlyTable(info, "v") - assert(!table.properties().containsKey( - TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) - } + // --- ViewInfo unit tests ----------------------------------------------- test("multi-part captured namespace round-trips through V1Table.toCatalogTable") { - // End-to-end coverage of the v2 encoder -> parser round-trip for multi-level namespaces: - // (a) TableInfo.Builder serializes (cat, Array(db1, db2)) into a quoted multi-part - // identifier, (b) V1Table.toCatalogTable parses it back via parseMultipartIdentifier, and - // (c) the resulting CatalogTable exposes the full (cat, db1, db2) via - // viewCatalogAndNamespace -- which is what the v1 view-resolution path consumes to expand - // unqualified references in the view body. - val info = new TableInfo.Builder() + // (a) ViewInfo.Builder stores (cat, Array(db1, db2)) as typed fields. + // (b) V1Table.toCatalogTable reads them directly and emits v1's numbered + // view.catalogAndNamespace.* keys so (c) the resulting CatalogTable's + // `viewCatalogAndNamespace` exposes the full (cat, db1, db2), which is what the v1 + // view-resolution path consumes to expand unqualified references in the view body. + val info = new ViewInfo.Builder() .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT col FROM t") - .withCurrentCatalogAndNamespace("my_cat", Array("db1", "db2")) + .withQueryText("SELECT col FROM t") + .withCurrentCatalog("my_cat") + .withCurrentNamespace(Array("db1", "db2")) .build() val motTable = new MetadataOnlyTable(info, "v") // Any CatalogPlugin works here; toCatalogTable only reads `catalog.name()`. @@ -131,35 +99,27 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio catalog, Identifier.of(Array("ns"), "v"), motTable) assert(ct.viewCatalogAndNamespace == Seq("my_cat", "db1", "db2")) - // And for a namespace part that needs backtick-quoting. - val infoWeird = new TableInfo.Builder() + // Namespace parts containing dots flow through structurally (no string encoding). + val infoWeird = new ViewInfo.Builder() .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT col FROM t") - .withCurrentCatalogAndNamespace("my_cat", Array("weird.db", "normal")) + .withQueryText("SELECT col FROM t") + .withCurrentCatalog("my_cat") + .withCurrentNamespace(Array("weird.db", "normal")) .build() val ctWeird = V1Table.toCatalogTable( catalog, Identifier.of(Array("ns"), "v"), new MetadataOnlyTable(infoWeird, "v")) assert(ctWeird.viewCatalogAndNamespace == Seq("my_cat", "weird.db", "normal")) } - test("withCurrentCatalogAndNamespace clears the property when catalog is null or empty") { - val infoNull = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT 1 AS col") - .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) - .withCurrentCatalogAndNamespace(null, Array("ignored")) - .build() - assert(!infoNull.properties().containsKey( - TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) - - val infoEmpty = new TableInfo.Builder() + test("view with no captured catalog omits viewCatalogAndNamespace") { + val info = new ViewInfo.Builder() .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT 1 AS col") - .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) - .withCurrentCatalogAndNamespace("", Array("ignored")) + .withQueryText("SELECT * FROM spark_catalog.default.t") .build() - assert(!infoEmpty.properties().containsKey( - TableCatalog.PROP_VIEW_CURRENT_CATALOG_AND_NAMESPACE)) + val motTable = new MetadataOnlyTable(info, "v") + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + val ct = V1Table.toCatalogTable(catalog, Identifier.of(Array("ns"), "v"), motTable) + assert(ct.viewCatalogAndNamespace.isEmpty) } // --- CREATE VIEW on a plain TableCatalog -------------------------------- @@ -325,7 +285,8 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio // CREATE VIEW IF NOT EXISTS is a no-op -- the table entry is untouched. sql("CREATE VIEW IF NOT EXISTS view_catalog.default.v_existing_table AS " + "SELECT x FROM spark_catalog.default.t") - val stored = catalog.getStoredView(Array("default"), "v_existing_table") + val stored = catalog.getStoredInfo(Array("default"), "v_existing_table") + assert(!stored.isInstanceOf[ViewInfo]) assert(stored.properties().get(TableCatalog.PROP_TABLE_TYPE) == TableSummary.EXTERNAL_TABLE_TYPE) } @@ -521,12 +482,13 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio val catalog = spark.sessionState.catalogManager.catalog("view_catalog") .asInstanceOf[TestingViewCatalog] val viewIdent = Identifier.of(Array("default"), "v_owner") - // Pre-seed a view whose stored TableInfo carries an explicit owner. - val initialInfo = new TableInfo.Builder() + // Pre-seed a view whose stored ViewInfo carries an explicit owner. + val initialInfo = new ViewInfo.Builder() .withSchema(new StructType().add("x", "int")) - .withViewText("SELECT 1 AS x") + .withQueryText("SELECT 1 AS x") .withOwner("alice") - .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) + .withCurrentCatalog("spark_catalog") + .withCurrentNamespace(Array("default")) .build() catalog.createTable(viewIdent, initialInfo) try { @@ -554,9 +516,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio val catalog = spark.sessionState.catalogManager.catalog("view_catalog") .asInstanceOf[TestingViewCatalog] - val info = catalog.getStoredView(Array("default"), "v_evo") - // Use the same stored key v1 uses (CatalogTable.VIEW_SCHEMA_MODE = "view.schemaMode"). - assert(info.properties().get("view.schemaMode") == "EVOLUTION") + assert(catalog.getStoredView(Array("default"), "v_evo").schemaMode() == "EVOLUTION") } } @@ -569,17 +529,16 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } val catalog = spark.sessionState.catalogManager.catalog("view_catalog") .asInstanceOf[TestingViewCatalog] - val ansiKey = TableCatalog.VIEW_CONF_PREFIX + SQLConf.ANSI_ENABLED.key - assert(catalog.getStoredView(Array("default"), "v_configs").properties().get(ansiKey) - == "true") + assert(catalog.getStoredView(Array("default"), "v_configs") + .sqlConfigs().get(SQLConf.ANSI_ENABLED.key) == "true") // ALTER under a different ANSI setting should replace the stored config, not merge. withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { sql("ALTER VIEW view_catalog.default.v_configs AS " + "SELECT col FROM spark_catalog.default.t WHERE col = 'b'") } - assert(catalog.getStoredView(Array("default"), "v_configs").properties().get(ansiKey) - == "false") + assert(catalog.getStoredView(Array("default"), "v_configs") + .sqlConfigs().get(SQLConf.ANSI_ENABLED.key) == "false") } } @@ -658,6 +617,57 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } + test("view error messages render the full multi-level namespace") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.ns1.inner.v_err AS " + + "SELECT x FROM spark_catalog.default.t") + // Second CREATE surfaces `viewAlreadyExistsError` (via TableAlreadyExistsException from + // the catalog). Before the error signatures took `Seq[String]`, `legacyName` collapsed + // ns1.inner into just `inner` and the error said `view_catalog.inner.v_err` -- missing + // the outer `ns1` segment. + val dup = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.ns1.inner.v_err AS " + + "SELECT x FROM spark_catalog.default.t") + } + assert(dup.getCondition == "TABLE_OR_VIEW_ALREADY_EXISTS") + assert(dup.getMessage.contains("`view_catalog`.`ns1`.`inner`.`v_err`"), + s"expected full multi-part name in error, got: ${dup.getMessage}") + + // CREATE OR REPLACE VIEW over a non-view table entry surfaces + // `unsupportedCreateOrReplaceViewOnTableError`. Pre-seed a non-view entry at a + // multi-level-namespace identifier to exercise the rendering. + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingViewCatalog] + val tblIdent = Identifier.of(Array("ns1", "inner"), "t_err") + catalog.createTable( + tblIdent, + new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build()) + try { + val notView = intercept[AnalysisException] { + sql("CREATE OR REPLACE VIEW view_catalog.ns1.inner.t_err AS " + + "SELECT x FROM spark_catalog.default.t") + } + assert(notView.getCondition == "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE") + assert(notView.getMessage.contains("`view_catalog`.`ns1`.`inner`.`t_err`"), + s"expected full multi-part name in error, got: ${notView.getMessage}") + } finally { + catalog.dropTable(tblIdent) + } + + // Column-arity mismatch error. + val arity = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.ns1.inner.v_arity (a, b) AS " + + "SELECT x FROM spark_catalog.default.t") + } + assert(arity.getMessage.contains("`view_catalog`.`ns1`.`inner`.`v_arity`"), + s"expected full multi-part name in error, got: ${arity.getMessage}") + } + } + test("ALTER VIEW cyclic detection distinguishes views across multi-level namespaces") { withTable("spark_catalog.default.t") { Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") @@ -918,40 +928,39 @@ class TestingViewCatalog extends TableCatalog { Option(createdViews.get(key)).map(new MetadataOnlyTable(_, ident.toString)).getOrElse { ident.name() match { case "test_view" => - val viewProps = new java.util.HashMap[String, String]() - viewProps.put( - TableCatalog.VIEW_CONF_PREFIX + SQLConf.ANSI_ENABLED.key, - (ident.namespace().head == "ansi").toString) - val info = new TableInfo.Builder() + val info = new ViewInfo.Builder() .withSchema(new StructType().add("col", "string").add("i", "int")) - .withProperties(viewProps) - .withViewText( + .withQueryText( "SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") + .withSqlConfigs(java.util.Collections.singletonMap( + SQLConf.ANSI_ENABLED.key, (ident.namespace().head == "ansi").toString)) .build() new MetadataOnlyTable(info, ident.toString) case "test_unqualified_view" => - val info = new TableInfo.Builder() + val info = new ViewInfo.Builder() .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT col FROM t WHERE col = 'b'") - .withCurrentCatalogAndNamespace("spark_catalog", Array("default")) + .withQueryText("SELECT col FROM t WHERE col = 'b'") + .withCurrentCatalog("spark_catalog") + .withCurrentNamespace(Array("default")) .build() new MetadataOnlyTable(info, ident.toString) case "test_unqualified_multi" => // View whose captured catalog+namespace is view_catalog.ns1.ns2 (two-part). The // unqualified `t` in the body must resolve via that captured context to // view_catalog.ns1.ns2.t, which this catalog also serves (see `t` case below). - val info = new TableInfo.Builder() + val info = new ViewInfo.Builder() .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT col FROM t") - .withCurrentCatalogAndNamespace("view_catalog", Array("ns1", "ns2")) + .withQueryText("SELECT col FROM t") + .withCurrentCatalog("view_catalog") + .withCurrentNamespace(Array("ns1", "ns2")) .build() new MetadataOnlyTable(info, ident.toString) case "t" if ident.namespace().toSeq == Seq("ns1", "ns2") => // Target of test_unqualified_multi's unqualified reference. Self-contained view so // the test doesn't need external data. - val info = new TableInfo.Builder() + val info = new ViewInfo.Builder() .withSchema(new StructType().add("col", "string")) - .withViewText("SELECT 'multi' AS col") + .withQueryText("SELECT 'multi' AS col") .build() new MetadataOnlyTable(info, ident.toString) case _ => throw new NoSuchTableException(ident) @@ -972,13 +981,21 @@ class TestingViewCatalog extends TableCatalog { new MetadataOnlyTable(info, ident.toString) } - /** Test-only accessor: returns the stored TableInfo for a created view. */ - def getStoredView(namespace: Array[String], name: String): TableInfo = { + /** Test-only accessor: returns the stored TableInfo (table or view) for the identifier. */ + def getStoredInfo(namespace: Array[String], name: String): TableInfo = { Option(createdViews.get((namespace.toSeq, name))).getOrElse { throw new NoSuchTableException(Identifier.of(namespace, name)) } } + /** Test-only accessor: returns the stored ViewInfo; fails if the entry is not a view. */ + def getStoredView(namespace: Array[String], name: String): ViewInfo = getStoredInfo( + namespace, name) match { + case v: ViewInfo => v + case _ => throw new IllegalStateException( + s"stored entry at ${namespace.mkString(".")}.$name is not a view") + } + override def alterTable(ident: Identifier, changes: TableChange*): Table = { throw new RuntimeException("shouldn't be called") } @@ -1085,9 +1102,9 @@ private class RecordingStagedTable( * kept to make future tests that deliberately bypass the upstream gate easy to write. */ class TestingTableOnlyCatalog extends TableCatalog { - private val fixtureView: TableInfo = new TableInfo.Builder() + private val fixtureView: ViewInfo = new ViewInfo.Builder() .withSchema(new StructType().add("x", "int")) - .withViewText("SELECT 1 AS x") + .withQueryText("SELECT 1 AS x") .build() override def loadTable(ident: Identifier): Table = From 4cca4e0d97e86b82c6d599efa1815dcad91d6565 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 23 Apr 2026 11:14:22 +0000 Subject: [PATCH 30/59] address self-review findings: DropViewExec type check; multi-part names in temp-object errors; restore 3-part v1 session-catalog SubqueryAlias Co-authored-by: Isaac --- .../sql/catalyst/catalog/SessionCatalog.scala | 7 +- .../sql/errors/QueryCompilationErrors.scala | 20 +++--- .../command/metricViewCommands.scala | 3 +- .../spark/sql/execution/command/views.scala | 41 +++++------- .../datasources/v2/DataSourceV2Strategy.scala | 7 +- .../datasources/v2/DropViewExec.scala | 65 +++++++++++++++++++ .../DataSourceV2MetadataOnlyViewSuite.scala | 49 +++++++++++++- 7 files changed, 149 insertions(+), 43 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 554bded472bde..af398eb8527e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1056,11 +1056,12 @@ class SessionCatalog( options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()): LogicalPlan = { // Prefer `multipartIdentifier` (set by non-session v2 catalogs via `V1Table.toCatalogTable`) // so the SubqueryAlias qualifier reflects the real catalog + multi-part namespace. - // Fall back to `qualifyIdentifier` for v1 session-catalog tables: it defaults catalog to - // `SESSION_CATALOG_NAME` and database to the current database when either is missing. + // Fall back to the historical 3-part form for v1 session-catalog tables -- we intentionally + // always include `SESSION_CATALOG_NAME` here and ignore + // `LEGACY_NON_IDENTIFIER_OUTPUT_CATALOG_NAME` to preserve pre-v2-MetadataOnlyTable behavior. val multiParts = metadata.multipartIdentifier.getOrElse { val qualifiedIdent = qualifyIdentifier(metadata.identifier) - qualifiedIdent.nameParts + Seq(CatalogManager.SESSION_CATALOG_NAME, qualifiedIdent.database.get, qualifiedIdent.table) } if (CatalogTable.isMetricView(metadata)) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 1c5077b214b1f..9d4605ab73d66 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -3416,47 +3416,47 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat } def notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError( - name: TableIdentifier, + viewNameParts: Seq[String], attr: Attribute): Throwable = { new AnalysisException( errorClass = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS", messageParameters = Map( - "name" -> toSQLId(name.nameParts), + "name" -> toSQLId(viewNameParts), "attr" -> toSQLExpr(attr))) } def notAllowedToCreatePermanentViewByReferencingTempViewError( - name: TableIdentifier, - nameParts: String): Throwable = { + viewNameParts: Seq[String], + tempViewNameParts: String): Throwable = { new AnalysisException( errorClass = "INVALID_TEMP_OBJ_REFERENCE", messageParameters = Map( "obj" -> "VIEW", - "objName" -> toSQLId(name.nameParts), + "objName" -> toSQLId(viewNameParts), "tempObj" -> "VIEW", - "tempObjName" -> toSQLId(nameParts))) + "tempObjName" -> toSQLId(tempViewNameParts))) } def notAllowedToCreatePermanentViewByReferencingTempFuncError( - name: TableIdentifier, + viewNameParts: Seq[String], funcName: String): Throwable = { new AnalysisException( errorClass = "INVALID_TEMP_OBJ_REFERENCE", messageParameters = Map( "obj" -> "VIEW", - "objName" -> toSQLId(name.nameParts), + "objName" -> toSQLId(viewNameParts), "tempObj" -> "FUNCTION", "tempObjName" -> toSQLId(funcName))) } def notAllowedToCreatePermanentViewByReferencingTempVarError( - nameParts: Seq[String], + viewNameParts: Seq[String], varName: Seq[String]): Throwable = { new AnalysisException( errorClass = "INVALID_TEMP_OBJ_REFERENCE", messageParameters = Map( "obj" -> "VIEW", - "objName" -> toSQLId(nameParts), + "objName" -> toSQLId(viewNameParts), "tempObj" -> "VARIABLE", "tempObjName" -> toSQLId(varName))) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala index ec8acbd02139a..623685f6c20a7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala @@ -90,7 +90,8 @@ object MetricViewHelper { val metricViewNode = MetricViewPlanner.planWrite( tableMeta, viewText, session.sessionState.sqlParser) val analyzed = analyzer.executeAndCheck(metricViewNode, new QueryPlanningTracker) - ViewHelper.verifyTemporaryObjectsNotExists(isTemporary = false, name, analyzed, Seq.empty) + ViewHelper.verifyTemporaryObjectsNotExists( + isTemporary = false, name.nameParts, analyzed, Seq.empty) analyzed } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 8a7b395d74a77..78275f8dcf0fb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -134,8 +134,9 @@ case class CreateViewCommand( // When creating a permanent view, not allowed to reference temporary objects. // This should be called after `qe.assertAnalyzed()` (i.e., `child` can be resolved) - verifyTemporaryObjectsNotExists(isTemporary, name, analyzedPlan, referredTempFunctions) - verifyAutoGeneratedAliasesNotExists(analyzedPlan, isTemporary, name) + verifyTemporaryObjectsNotExists( + isTemporary, name.nameParts, analyzedPlan, referredTempFunctions) + verifyAutoGeneratedAliasesNotExists(analyzedPlan, isTemporary, name.nameParts) SchemaUtils.checkIndeterminateCollationInSchema(plan.schema) @@ -256,8 +257,8 @@ case class AlterViewAsCommand( override def run(session: SparkSession): Seq[Row] = { val isTemporary = session.sessionState.catalog.isTempView(name) - verifyTemporaryObjectsNotExists(isTemporary, name, query, referredTempFunctions) - verifyAutoGeneratedAliasesNotExists(query, isTemporary, name) + verifyTemporaryObjectsNotExists(isTemporary, name.nameParts, query, referredTempFunctions) + verifyAutoGeneratedAliasesNotExists(query, isTemporary, name.nameParts) SchemaUtils.checkIndeterminateCollationInSchema(query.schema) if (isTemporary) { alterTemporaryView(session, query) @@ -575,12 +576,13 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { } def verifyAutoGeneratedAliasesNotExists( - child: LogicalPlan, isTemporary: Boolean, name: TableIdentifier): Unit = { + child: LogicalPlan, isTemporary: Boolean, viewNameParts: Seq[String]): Unit = { if (!isTemporary && !conf.allowAutoGeneratedAliasForView) { child.output.foreach { attr => if (attr.metadata.contains("__autoGeneratedAlias")) { throw QueryCompilationErrors - .notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError(name, attr) + .notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError( + viewNameParts, attr) } } } @@ -591,7 +593,7 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { */ def verifyTemporaryObjectsNotExists( isTemporary: Boolean, - name: TableIdentifier, + viewNameParts: Seq[String], child: LogicalPlan, referredTempFunctions: Seq[String]): Unit = { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ @@ -599,16 +601,16 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { val tempViews = collectTemporaryViews(child) tempViews.foreach { nameParts => throw QueryCompilationErrors.notAllowedToCreatePermanentViewByReferencingTempViewError( - name, nameParts.quoted) + viewNameParts, nameParts.quoted) } referredTempFunctions.foreach { funcName => throw QueryCompilationErrors.notAllowedToCreatePermanentViewByReferencingTempFuncError( - name, funcName) + viewNameParts, funcName) } val tempVars = collectTemporaryVariables(child) tempVars.foreach { nameParts => throw QueryCompilationErrors.notAllowedToCreatePermanentViewByReferencingTempVarError( - name.nameParts, nameParts) + viewNameParts, nameParts) } } } @@ -883,11 +885,6 @@ object CheckViewReferences extends (LogicalPlan => Unit) { s"Unexpected child of view command: ${other.getClass.getName}") } - private def legacyNameFor(resolved: LogicalPlan): TableIdentifier = { - val (catalog, ident) = catalogAndIdent(resolved) - ident.asLegacyTableIdentifier(catalog.name()) - } - private def fullIdentFor(resolved: LogicalPlan): Seq[String] = { val (catalog, ident) = catalogAndIdent(resolved) catalog.name() +: ident.asMultipartIdentifier @@ -905,15 +902,14 @@ object CheckViewReferences extends (LogicalPlan => Unit) { override def apply(plan: LogicalPlan): Unit = plan.foreach { case cv: CreateView if cv.isAnalyzed => requireSupportsView(cv.child) - val legacyName = legacyNameFor(cv.child) + val fullIdent = fullIdentFor(cv.child) verifyTemporaryObjectsNotExists( - isTemporary = false, legacyName, cv.query, cv.referredTempFunctions) - verifyAutoGeneratedAliasesNotExists(cv.query, isTemporary = false, legacyName) + isTemporary = false, fullIdent, cv.query, cv.referredTempFunctions) + verifyAutoGeneratedAliasesNotExists(cv.query, isTemporary = false, fullIdent) // Cycles can only form when REPLACE'ing an existing view; a plain CREATE against an // existing view fails earlier with `viewAlreadyExistsError` and against a non-existent // view has nothing to cycle with. if (cv.replace) { - val fullIdent = fullIdentFor(cv.child) checkCyclicViewReference(cv.query, Seq(fullIdent), fullIdent) } @@ -921,11 +917,10 @@ object CheckViewReferences extends (LogicalPlan => Unit) { // No capability check here: `Analyzer.lookupTableOrView(identifier, viewOnly=true)` // already rejects non-SUPPORTS_VIEW catalogs upstream for `UnresolvedView`, so by the // time an AlterViewAs reaches this rule the catalog is guaranteed to support views. - val legacyName = legacyNameFor(av.child) - verifyTemporaryObjectsNotExists( - isTemporary = false, legacyName, av.query, av.referredTempFunctions) - verifyAutoGeneratedAliasesNotExists(av.query, isTemporary = false, legacyName) val fullIdent = fullIdentFor(av.child) + verifyTemporaryObjectsNotExists( + isTemporary = false, fullIdent, av.query, av.referredTempFunctions) + verifyAutoGeneratedAliasesNotExists(av.query, isTemporary = false, fullIdent) checkCyclicViewReference(av.query, Seq(fullIdent), fullIdent) case _ => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index b4dce11b45414..c03f5a97834af 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -391,13 +391,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat // DROP VIEW on a non-session SUPPORTS_VIEW catalog. The v1 rewrite in `ResolveSessionCatalog` // skips SUPPORTS_VIEW catalogs (its DropView case has a `!supportsView(catalog)` guard), so - // they fall through here. Reuses `DropTableExec` because `TableCatalog.dropTable` is - // contractually required to drop views at the same identifier for SUPPORTS_VIEW catalogs. + // they fall through here. `DropViewExec` verifies the target is a view before calling + // `dropTable`, mirroring v1's `DropTableCommand(isView = true)` safety net. case DropView(r @ ResolvedIdentifier(catalog, ident), ifExists) if CatalogV2Util.supportsView(catalog) => val invalidateFunc = () => CommandUtils.uncacheTableOrView(session, r) - DropTableExec( - catalog.asTableCatalog, ident, ifExists, purge = false, invalidateFunc) :: Nil + DropViewExec(catalog.asTableCatalog, ident, ifExists, invalidateFunc) :: Nil case ReplaceTableAsSelect(ResolvedIdentifier(catalog, ident), parts, query, tableSpec: TableSpec, options, orCreate, true) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala new file mode 100644 index 0000000000000..589fffd529f26 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, TableCatalog, ViewInfo} +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.util.ArrayImplicits._ + +/** + * Physical plan node for DROP VIEW on a v2 `TableCatalog` that declares + * [[org.apache.spark.sql.connector.catalog.TableCatalogCapability#SUPPORTS_VIEW]]. Loads the + * target entry once to verify it is a view (a [[MetadataOnlyTable]] wrapping a [[ViewInfo]]) + * before calling [[TableCatalog#dropTable]]. Matching the v1 path's + * `DropTableCommand(isView = true)` safety net keeps `DROP VIEW some_table` from silently + * destroying a non-view table on a SUPPORTS_VIEW catalog. + */ +case class DropViewExec( + catalog: TableCatalog, + ident: Identifier, + ifExists: Boolean, + invalidateCache: () => Unit) extends LeafV2CommandExec { + + override protected def run(): Seq[InternalRow] = { + val loaded = try { + Some(catalog.loadTable(ident)) + } catch { + case _: NoSuchTableException => None + } + val nameParts = + (catalog.name() +: ident.namespace() :+ ident.name()).toImmutableArraySeq + loaded match { + case Some(mot: MetadataOnlyTable) if mot.getTableInfo.isInstanceOf[ViewInfo] => + invalidateCache() + catalog.dropTable(ident) + case Some(_) => + throw QueryCompilationErrors.expectViewNotTableError( + nameParts, cmd = "DROP VIEW", suggestAlternative = false, t = this) + case None if !ifExists => + throw QueryCompilationErrors.noSuchTableError(nameParts) + case None => + // IF EXISTS: no-op. + } + Seq.empty + } + + override def output: Seq[Attribute] = Seq.empty +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 74999a43fd2ad..a8106f5f56fd1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -325,7 +325,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio checkAnswer(spark.table("staging_catalog.default.v_atomic"), Row(3)) // CREATE IF NOT EXISTS on an existing view -- no-op; the atomic exec short-circuits on - // tryLoadTable() before buildTableInfo, matching the non-atomic path. + // tryLoadTable() before buildViewInfo, matching the non-atomic path. sql("CREATE VIEW IF NOT EXISTS staging_catalog.default.v_atomic AS " + "SELECT x + 100 AS x FROM spark_catalog.default.t") // Value unchanged -- IF NOT EXISTS was a no-op. @@ -695,6 +695,26 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } + test("temp-object reference errors render the full multi-level namespace") { + // `verifyTemporaryObjectsNotExists` / `verifyAutoGeneratedAliasesNotExists` used to take a + // `TableIdentifier` built via `asLegacyTableIdentifier`, which collapses multi-level + // namespaces to the last segment -- so a temp-function reference on + // `view_catalog.ns1.inner.v_tempfn` produced an error naming + // `view_catalog.inner.v_tempfn` and dropped the `ns1` middle segment. Post-migration the + // errors render the full multi-part name. + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + spark.udf.register("temp_udf_multi", (i: Int) => i + 1) + val ex = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.ns1.inner.v_tempfn AS " + + "SELECT temp_udf_multi(x) FROM spark_catalog.default.t") + } + assert(ex.getCondition == "INVALID_TEMP_OBJ_REFERENCE") + assert(ex.getMessage.contains("`view_catalog`.`ns1`.`inner`.`v_tempfn`"), + s"expected full multi-part name, got: ${ex.getMessage}") + } + } + // --- Follow-up-blocked view DDL / inspection on a non-session v2 catalog ------------ // These plans don't have a dedicated v2 strategy yet (tracked for a follow-up PR). We pin // the current failure mode -- UNSUPPORTED_FEATURE.TABLE_OPERATION with a statement-specific @@ -801,11 +821,36 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } test("DROP VIEW IF EXISTS on a v2 catalog is a no-op when the view is missing") { - // Exercises the `ifExists=true` path -- DropTableExec should not throw when the view + // Exercises the `ifExists=true` path -- DropViewExec should not throw when the view // doesn't exist on a SUPPORTS_VIEW catalog. sql("DROP VIEW IF EXISTS view_catalog.default.v_never_existed") } + test("DROP VIEW on a non-view table entry is rejected (v1-parity)") { + // v1 `DropTableCommand(isView = true)` rejects a non-view target via + // `wrongCommandForObjectTypeError`. The v2 path must also refuse -- otherwise + // `DROP VIEW view_catalog.default.` would silently destroy the table's entry. + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingViewCatalog] + val tableIdent = Identifier.of(Array("default"), "t_not_a_view") + catalog.createTable( + tableIdent, + new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build()) + try { + val ex = intercept[AnalysisException] { + sql("DROP VIEW view_catalog.default.t_not_a_view") + } + assert(ex.getCondition == "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE") + // The table entry must still be there -- DROP VIEW did not destroy it. + assert(catalog.tableExists(tableIdent)) + } finally { + catalog.dropTable(tableIdent) + } + } + test("DROP VIEW on a StagingTableCatalog drops the view") { withSQLConf( "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { From 2e9b6bb4e4cd6a9e909294ed31f10fc5951801ae Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 23 Apr 2026 18:43:36 +0000 Subject: [PATCH 31/59] fix tests for new error class and CreateView field - PlanResolutionSuite drop-view v2: expect MISSING_CATALOG_ABILITY.VIEWS (Analyzer.lookupTableOrView now routes non-SUPPORTS_VIEW v2 catalogs through that error instead of UNSUPPORTED_FEATURE.CATALOG_OPERATION). - explain golden files: regenerate to include CreateView.isAnalyzed in argString (new field from this PR's AnalysisOnlyCommand conversion). Co-authored-by: Isaac --- .../resources/sql-tests/analyzer-results/explain-aqe.sql.out | 2 +- .../test/resources/sql-tests/analyzer-results/explain.sql.out | 2 +- .../spark/sql/execution/command/PlanResolutionSuite.scala | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out index 4b9bb859cd567..3f16d4f756511 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out @@ -174,7 +174,7 @@ EXPLAIN FORMATTED CREATE VIEW explain_view AS SELECT key, val FROM explain_temp1 -- !query analysis -ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, FormattedMode +ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, false, FormattedMode -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out index 4b9bb859cd567..3f16d4f756511 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out @@ -174,7 +174,7 @@ EXPLAIN FORMATTED CREATE VIEW explain_view AS SELECT key, val FROM explain_temp1 -- !query analysis -ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, FormattedMode +ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, false, FormattedMode -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 92d56e800c722..a1227864a9aba 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -778,8 +778,8 @@ class PlanResolutionSuite extends SharedSparkSession with AnalysisTest { } checkError( e, - condition = "UNSUPPORTED_FEATURE.CATALOG_OPERATION", - parameters = Map("catalogName" -> "`testcat`", "operation" -> "views")) + condition = "MISSING_CATALOG_ABILITY.VIEWS", + parameters = Map("plugin" -> "testcat")) } // ALTER VIEW view_name SET TBLPROPERTIES ('comment' = new_comment); From b4a40bf69adb48c5f9ef5b30e1f3a66737390365 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 24 Apr 2026 11:19:03 +0000 Subject: [PATCH 32/59] fix DescribeRelation pattern arity after SPARK-39660 merge SPARK-39660 split v2 DESCRIBE TABLE PARTITION off into its own DescribeTablePartition plan and dropped `partitionSpec` from DescribeRelation. Our v2-view pin case had 4 wildcards; reduce to 3 to match the new 3-field case class. Co-authored-by: Isaac --- .../sql/execution/datasources/v2/DataSourceV2Strategy.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index df3744fc35631..790a2da41fb18 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -359,7 +359,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat throw QueryCompilationErrors.unsupportedTableOperationError( catalog, ident, "SHOW COLUMNS") - case DescribeRelation(ResolvedPersistentView(catalog, ident, _), _, _, _) => + case DescribeRelation(ResolvedPersistentView(catalog, ident, _), _, _) => throw QueryCompilationErrors.unsupportedTableOperationError( catalog, ident, "DESCRIBE TABLE") From f76d92ae973f6e95b64f4b852031ab9973a1c6c0 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 24 Apr 2026 16:59:39 +0000 Subject: [PATCH 33/59] unblock javadoc generation: downgrade scaladoc on public-package methods Javadoc died mid-stream while generating CatalogV2Implicits.IdentifierHelper.html (the failing PR's log stops exactly there; the succeeding PRs continue past to MultipartIdentifierHelper and CatalogV2Util). The only diff in IdentifierHelper on this branch was the new asLegacyTableIdentifier method, whose scaladoc used `[[TableIdentifier]]` / `[[toQualifiedNameParts]]` / backtick-inlined code refs. Something in that doc tripped javadoc into a hard exit (not a warning) instead of a broken-link warning. Fix: downgrade both new scaladoc blocks in the exposed-to-javadoc connector/catalog package to plain `//` comments so genjavadoc doesn't emit them into the Java stub at all: - CatalogV2Implicits.IdentifierHelper.asLegacyTableIdentifier - CatalogV2Util.supportsView (same risky pattern, hasn't been reached yet because javadoc died earlier, but would break next) The method names are self-documenting; internal callers don't need the scaladoc. Co-authored-by: Isaac --- .../sql/connector/catalog/CatalogV2Implicits.scala | 10 ++++------ .../spark/sql/connector/catalog/CatalogV2Util.scala | 6 ++---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala index b11f06bf58159..a5f1ca7f1d289 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala @@ -171,12 +171,10 @@ private[sql] object CatalogV2Implicits { throw QueryCompilationErrors.requiresSinglePartNamespaceError(asMultipartIdentifier) } - /** - * Build a v1 [[TableIdentifier]] for display / error-rendering purposes. Collapses a - * multi-part namespace to its last segment (v1 [[TableIdentifier]] has a single-string - * database field). Callers that need a lossless multi-part form should build a - * `Seq[String]` from [[toQualifiedNameParts]] instead. - */ + // Build a v1 TableIdentifier for display / error-rendering purposes. Collapses a + // multi-part namespace to its last segment (v1 TableIdentifier has a single-string + // database field). Callers that need a lossless multi-part form should build a + // Seq[String] from toQualifiedNameParts instead. def asLegacyTableIdentifier(catalogName: String): TableIdentifier = TableIdentifier( table = ident.name(), database = ident.namespace().lastOption, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala index fc484d4675f36..fbe33a90a245a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala @@ -78,10 +78,8 @@ private[sql] object CatalogV2Util { SupportsNamespaces.PROP_LOCATION, SupportsNamespaces.PROP_OWNER) - /** - * Whether the given catalog is a [[TableCatalog]] that declares - * [[TableCatalogCapability.SUPPORTS_VIEW]]. Returns false for non-`TableCatalog` plugins. - */ + // Whether the given catalog is a TableCatalog that declares + // TableCatalogCapability.SUPPORTS_VIEW. Returns false for non-TableCatalog plugins. def supportsView(catalog: CatalogPlugin): Boolean = catalog match { case tc: TableCatalog => tc.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) case _ => false From 2c4edd4ca9a0cf6b599a516d037e70fb9266e0e8 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 25 Apr 2026 04:53:03 +0000 Subject: [PATCH 34/59] rework: separate ViewCatalog interface, drop SUPPORTS_VIEW Restore ViewCatalog as the plugin-facing API for view-only catalogs and view DDL operations, instead of routing views through TableCatalog under a SUPPORTS_VIEW capability flag. Catalog-implementer ergonomics: * Pure view-only: implement ViewCatalog. 5 methods (listViews, loadView/createView/replaceView/dropView), default viewExists. No instanceof, no capability declaration, no TableCatalog stubs. * Pure tables: implement TableCatalog. Same as today. * Mixed (Iceberg/UC): implement both interfaces independently. Single cross-cutting invariant -- one identifier namespace; createTable rejects view-collisions, createView rejects table-collisions. ViewCatalog API: Identifier[] listViews(String[] namespace); ViewInfo loadView(Identifier); ViewInfo createView(Identifier, ViewInfo); ViewInfo replaceView(Identifier, ViewInfo); // atomic per-call boolean dropView(Identifier); default boolean viewExists(Identifier); No StagingViewCatalog -- view REPLACE writes only metadata, so a single transactional metastore call (or equivalent) is sufficient. CREATE OR REPLACE VIEW probes viewExists then dispatches createView/replaceView. Spark-side dispatch: * Analyzer.lookupTableOrView: try TableCatalog.loadTable first; on NoSuchTableException, if catalog is ViewCatalog, fall back to loadView and synthesize ResolvedPersistentView. * Mixed-catalog perf opt-in: loadTable may return MetadataOnlyTable(ViewInfo) for view idents, short-circuiting the second RPC. Documented on TableCatalog#loadTable. * DataSourceV2Strategy: routes CREATE/ALTER/DROP/SHOW VIEWS to ViewCatalog only; staging branches removed. * ResolveSessionCatalog: SUPPORTS_VIEW guards replaced with instanceof ViewCatalog. Internal: V1Table.toCatalogTable for ViewInfo is now public so the analyzer can synthesize CatalogTable from a loadView result for the session-catalog v1 view-resolution path. Out of scope for this commit: * Test suite rewrite (DataSourceV2MetadataOnlyViewSuite still uses SUPPORTS_VIEW and TestingStagingCatalog) -- broken until the follow-up commit. * Lifting the session-catalog gate on DESCRIBE/SHOW CREATE TABLE/SHOW COLUMNS/SHOW TBLPROPERTIES for v2 views -- still pinned with UNSUPPORTED_FEATURE.TABLE_OPERATION; tracked as follow-up. Co-authored-by: Isaac --- .../catalog/StagingTableCatalog.java | 17 +- .../sql/connector/catalog/TableCatalog.java | 104 +++++------ .../catalog/TableCatalogCapability.java | 29 +--- .../sql/connector/catalog/ViewCatalog.java | 163 ++++++++++++++++++ .../spark/sql/connector/catalog/ViewInfo.java | 21 +-- .../sql/catalyst/analysis/Analyzer.scala | 30 +++- .../catalyst/plans/logical/v2Commands.scala | 10 +- .../sql/connector/catalog/CatalogV2Util.scala | 7 - .../spark/sql/connector/catalog/V1Table.scala | 2 +- .../analysis/ResolveSessionCatalog.scala | 20 +-- .../spark/sql/execution/command/views.scala | 32 ++-- .../datasources/v2/AlterV2ViewExec.scala | 101 +++-------- .../datasources/v2/CreateV2ViewExec.scala | 151 +++++----------- .../datasources/v2/DataSourceV2Strategy.scala | 45 ++--- .../datasources/v2/DropViewExec.scala | 49 +++--- .../datasources/v2/ShowViewsExec.scala | 19 +- 16 files changed, 395 insertions(+), 405 deletions(-) create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java index f37dc3dccc5c8..6811ea380b3ae 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java @@ -95,12 +95,9 @@ default StagedTable stageCreate( * table exists when this method is called, the method should throw an exception accordingly. If * another process concurrently creates the table before this table's staged changes are * committed, an exception should be thrown by {@link StagedTable#commitStagedChanges()}. - *

- * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} also route atomic v2 - * {@code CREATE VIEW} through this method when {@code tableInfo} is a {@link ViewInfo}. * * @param ident a table identifier - * @param tableInfo information about the table or view + * @param tableInfo information about the table * @return metadata for the new table. This can be null if the catalog does not support atomic * creation for this table. Spark will call {@link #loadTable(Identifier)} later. * @throws TableAlreadyExistsException If a table or view already exists for the identifier @@ -162,13 +159,9 @@ default StagedTable stageReplace( * {@link #stageCreateOrReplace(Identifier, StructType, Transform[], Map)}, which should create * the table in the data source if the table does not exist at the time of committing the * operation. - *

- * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} also route atomic v2 - * {@code ALTER VIEW ... AS} through this method when {@code tableInfo} is a {@link ViewInfo}; - * the existing entry at {@code ident} is expected to be a view. * * @param ident a table identifier - * @param tableInfo information about the table or view + * @param tableInfo information about the table * @return metadata for the new table. This can be null if the catalog does not support atomic * creation for this table. Spark will call {@link #loadTable(Identifier)} later. * @throws UnsupportedOperationException If a requested partition transform is not supported @@ -229,13 +222,9 @@ default StagedTable stageCreateOrReplace( * backing data source. This differs from the expected semantics of * {@link #stageReplace(Identifier, StructType, Transform[], Map)}, which should fail when * the staged changes are committed but the table doesn't exist at commit time. - *

- * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} also route atomic v2 - * {@code CREATE OR REPLACE VIEW} through this method when {@code tableInfo} is a - * {@link ViewInfo}. * * @param ident a table identifier - * @param tableInfo information about the table or view + * @param tableInfo information about the table * @return metadata for the new table. This can be null if the catalog does not support atomic * creation for this table. Spark will call {@link #loadTable(Identifier)} later. * @throws UnsupportedOperationException If a requested partition transform is not supported diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index f55bd6f44452c..e415752914142 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -100,15 +100,11 @@ public interface TableCatalog extends CatalogPlugin { /** * List the tables in a namespace from the catalog. *

- * Includes views. Like v1 {@code ShowTablesCommand}, the output of Spark's - * {@code SHOW TABLES} includes permanent views alongside tables; for catalogs that declare - * {@link TableCatalogCapability#SUPPORTS_VIEW} this method must therefore return identifiers - * for both tables and views, mirroring the v1 session-catalog behavior. Callers that need to - * tell tables and views apart should use {@link #listTableSummaries} and read - * {@link TableSummary#tableType()}. + * Returns table identifiers only -- views (if the catalog also implements {@link ViewCatalog}) + * are listed separately via {@link ViewCatalog#listViews}. * * @param namespace a multi-part namespace - * @return an array of Identifiers for tables and (for SUPPORTS_VIEW catalogs) views + * @return an array of Identifiers for tables * @throws NoSuchNamespaceException If the namespace does not exist (optional). */ Identifier[] listTables(String[] namespace) throws NoSuchNamespaceException; @@ -116,19 +112,14 @@ public interface TableCatalog extends CatalogPlugin { /** * List the table summaries in a namespace from the catalog. *

- * This method should return all entities from the namespace regardless of type (tables AND - * views). Each returned {@link TableSummary} carries the entity's {@code tableType} - * (e.g. {@link TableSummary#VIEW_TABLE_TYPE VIEW_TABLE_TYPE}), which is what callers use to - * tell tables and views apart. + * Returns one summary per entry returned by {@link #listTables}. Each {@link TableSummary} + * carries the entry's {@code tableType}. *

- * The default implementation enumerates via {@link #listTables} + {@link #loadTable}, which - * works for SUPPORTS_VIEW catalogs because {@code listTables} also returns view identifiers - * and {@code loadTable} returns a view-typed {@link MetadataOnlyTable} for each. Catalogs - * that can fetch summaries in a single round-trip should override this method for - * efficiency. + * The default implementation enumerates via {@link #listTables} + {@link #loadTable}. + * Catalogs that can fetch summaries in a single round-trip should override. * * @param namespace a multi-part namespace - * @return an array of summaries for tables and views in the namespace + * @return an array of summaries for tables in the namespace * @throws NoSuchNamespaceException If the namespace does not exist (optional). * @throws NoSuchTableException If certain table listed by listTables API does not exist. */ @@ -153,15 +144,20 @@ default TableSummary[] listTableSummaries(String[] namespace) /** * Load table metadata by {@link Identifier identifier} from the catalog. *

- * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} must return the view - * as a {@link MetadataOnlyTable} when {@code ident} resolves to a view, so Spark's view - * resolution path can expand the view text. Catalogs that do not declare - * {@code SUPPORTS_VIEW} must throw {@link NoSuchTableException} for a view identifier. + * If {@code ident} resolves to a view in a mixed catalog (one that also implements + * {@link ViewCatalog}), this should throw {@link NoSuchTableException} -- views are loaded + * via {@link ViewCatalog#loadView}. As a perf optimization, a mixed catalog may instead + * return a {@link MetadataOnlyTable} wrapping a {@link ViewInfo} from this method; Spark's + * resolver detects the wrapper and routes through view resolution without a follow-up + * {@code loadView} call. The optimization is opt-in -- correctly throwing + * {@code NoSuchTableException} for a view identifier and letting Spark fall back to + * {@code loadView} is also valid. * * @param ident a table identifier - * @return the table's metadata - * @throws NoSuchTableException If the table doesn't exist, or is a view and the catalog - * does not declare {@link TableCatalogCapability#SUPPORTS_VIEW}. + * @return the table's metadata, or a {@link MetadataOnlyTable} wrapping a {@link ViewInfo} + * (perf opt-in for mixed catalogs) + * @throws NoSuchTableException If the table doesn't exist (or is a view in a mixed catalog + * that does not use the perf opt-in) */ Table loadTable(Identifier ident) throws NoSuchTableException; @@ -189,8 +185,7 @@ default Table loadTable( * Load table metadata of a specific version by {@link Identifier identifier} from the catalog. *

* Time-travel targets a versioned table, not a view. This must throw - * {@link NoSuchTableException} for a view identifier regardless of whether the catalog - * declares {@link TableCatalogCapability#SUPPORTS_VIEW}. + * {@link NoSuchTableException} for a view identifier. * * @param ident a table identifier * @param version version of the table @@ -205,8 +200,7 @@ default Table loadTable(Identifier ident, String version) throws NoSuchTableExce * Load table metadata at a specific time by {@link Identifier identifier} from the catalog. *

* Time-travel targets a versioned table, not a view. This must throw - * {@link NoSuchTableException} for a view identifier regardless of whether the catalog - * declares {@link TableCatalogCapability#SUPPORTS_VIEW}. + * {@link NoSuchTableException} for a view identifier. * * @param ident a table identifier * @param timestamp timestamp of the table, which is microseconds since 1970-01-01 00:00:00 UTC @@ -251,13 +245,11 @@ default void invalidateTable(Identifier ident) { /** * Test whether a table exists using an {@link Identifier identifier} from the catalog. *

- * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} manage views through the - * same identifier space as tables; for such catalogs this method must return {@code true} - * for a view identifier (mirroring {@link #loadTable(Identifier)}). Catalogs that do not - * declare {@code SUPPORTS_VIEW} must return {@code false} for a view identifier. + * Returns {@code false} for a view identifier in a mixed catalog (also implementing + * {@link ViewCatalog}); view existence is checked via {@link ViewCatalog#viewExists}. * * @param ident a table identifier - * @return true if the table exists, false otherwise + * @return true if a table exists at {@code ident}, false otherwise */ default boolean tableExists(Identifier ident) { try { @@ -300,14 +292,13 @@ default Table createTable( /** * Create a table in the catalog. *

- * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} also receive view writes - * through this method: when {@code tableInfo} is a {@link ViewInfo}, the call is a - * {@code CREATE VIEW} / {@code CREATE OR REPLACE VIEW} (combined with {@code dropTable}) / - * {@code ALTER VIEW ... AS} (combined with {@code dropTable}) request and must be persisted - * as a view. Implementations should branch on {@code tableInfo instanceof ViewInfo}. + * In mixed catalogs (also implementing {@link ViewCatalog}) tables and views share an + * identifier namespace; this method must throw {@link TableAlreadyExistsException} if + * {@code ident} already names a view. Views themselves are created via + * {@link ViewCatalog#createView}. * * @param ident a table identifier - * @param tableInfo information about the table or view + * @param tableInfo information about the table * @return metadata for the new table. This can be null if getting the metadata for the new table * is expensive. Spark will call {@link #loadTable(Identifier)} if needed (e.g. CTAS). * @@ -370,10 +361,10 @@ default boolean useNullableQuerySchema() { *

* The requested changes must be applied in the order given. *

- * {@code alterTable} targets tables only. Even for catalogs that declare - * {@link TableCatalogCapability#SUPPORTS_VIEW}, this must throw {@link NoSuchTableException} - * when {@code ident} resolves to a view. View-specific DDL (CREATE / ALTER ... AS) goes - * through {@link #createTable(Identifier, TableInfo)} for SUPPORTS_VIEW catalogs. + * {@code alterTable} targets tables only. In a mixed catalog (also implementing + * {@link ViewCatalog}) this must throw {@link NoSuchTableException} when {@code ident} + * resolves to a view; view DDL is handled by {@link ViewCatalog} (e.g. + * {@link ViewCatalog#replaceView} for {@code ALTER VIEW ... AS}). * * @param ident a table identifier * @param changes changes to apply to the table @@ -390,16 +381,12 @@ Table alterTable( /** * Drop a table in the catalog. *

- * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} manage views through the - * same identifier space as tables; for such catalogs this method must also drop views at - * {@code ident} and return {@code true}. Spark's non-atomic v2 {@code ALTER VIEW ... AS} path - * relies on this ({@code dropTable} + {@code createTable}). Catalogs that do not declare - * {@code SUPPORTS_VIEW} must not drop a view and must return {@code false} for a view - * identifier. + * In a mixed catalog (also implementing {@link ViewCatalog}) this must not drop a view and + * must return {@code false} when {@code ident} resolves to a view; views are dropped via + * {@link ViewCatalog#dropView}. * * @param ident a table identifier - * @return true if a table (or, for SUPPORTS_VIEW catalogs, a view) was deleted, - * false if no such entry exists for the identifier + * @return true if a table was deleted, false if no table exists for the identifier */ boolean dropTable(Identifier ident); @@ -407,9 +394,9 @@ Table alterTable( * Drop a table in the catalog and completely remove its data by skipping a trash even if it is * supported. *

- * {@code purgeTable} targets tables only. Even for catalogs that declare - * {@link TableCatalogCapability#SUPPORTS_VIEW}, this must not drop a view and must return - * {@code false} for a view identifier -- purge semantics (data removal) do not apply to views. + * {@code purgeTable} targets tables only. In a mixed catalog (also implementing + * {@link ViewCatalog}) this must not drop a view and must return {@code false} for a view + * identifier -- purge semantics (data removal) do not apply to views. *

* If the catalog supports to purge a table, this method should be overridden. * The default implementation throws {@link UnsupportedOperationException}. @@ -427,11 +414,10 @@ default boolean purgeTable(Identifier ident) throws UnsupportedOperationExceptio /** * Renames a table in the catalog. *

- * {@code renameTable} targets tables only -- v2 {@code ALTER VIEW ... RENAME TO} is tracked - * as a separate follow-up and is not routed here today. Even for catalogs that declare - * {@link TableCatalogCapability#SUPPORTS_VIEW}, this must throw {@link NoSuchTableException} - * when {@code oldIdent} resolves to a view, and must throw - * {@link TableAlreadyExistsException} if {@code newIdent} resolves to a table or a view. + * {@code renameTable} targets tables only. In a mixed catalog (also implementing + * {@link ViewCatalog}) this must throw {@link NoSuchTableException} when {@code oldIdent} + * resolves to a view, and must throw {@link TableAlreadyExistsException} if {@code newIdent} + * collides with an existing table or view. *

* If the catalog does not support table renames between namespaces, it throws * {@link UnsupportedOperationException}. diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java index 3039d4211a23b..a60c827d5ace1 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java @@ -92,32 +92,5 @@ public enum TableCatalogCapability { * {@link TableCatalog#createTable}. * See {@link Column#identityColumnSpec()}. */ - SUPPORTS_CREATE_TABLE_WITH_IDENTITY_COLUMNS, - - /** - * Signals that the TableCatalog supports views. Views flow through the same write methods as - * tables, using {@link ViewInfo} (a {@link TableInfo} subtype carrying the view-specific - * fields -- query text, captured current catalog/namespace, captured SQL configs, schema - * binding mode, query output column names) as the DTO. Catalogs declaring this capability - * must: - *

    - *
  • Persist a view when {@link TableCatalog#createTable} (or the - * {@link StagingTableCatalog} staging variants) receives a {@code ViewInfo}. - * Implementations should branch on {@code info instanceof ViewInfo}.
  • - *
  • Return a {@link MetadataOnlyTable} wrapping a {@code ViewInfo} from - * {@link TableCatalog#loadTable} for a view identifier, so Spark's view resolution - * path can expand the view text.
  • - *
  • Drop views through {@link TableCatalog#dropTable} and report view existence through - * {@link TableCatalog#tableExists}.
  • - *
  • Include views in {@link TableCatalog#listTables} output.
  • - *
- * Spark routes the view DDL through the standard write APIs: {@code CREATE VIEW} uses - * {@code createTable} (or {@code stageCreate}); {@code CREATE OR REPLACE VIEW} uses - * {@code createTable} (after {@code dropTable}) or {@code stageCreateOrReplace}; - * {@code ALTER VIEW ... AS} uses {@code createTable} (after {@code dropTable}) or - * {@code stageReplace}. Without this capability, Spark rejects {@code CREATE VIEW} and - * {@code ALTER VIEW} statements targeting the catalog up front rather than letting the - * catalog silently persist a table entry that cannot be read as a view. - */ - SUPPORTS_VIEW + SUPPORTS_CREATE_TABLE_WITH_IDENTITY_COLUMNS } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java new file mode 100644 index 0000000000000..3731f7a6b28d6 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connector.catalog; + +import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; +import org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException; + +/** + * Catalog API for read and write access to views. + *

+ * A connector that wants to expose views implements this interface. The interface is independent + * from {@link TableCatalog}: a connector can implement just {@code ViewCatalog} (a view-only + * catalog), just {@code TableCatalog} (a table-only catalog), or both. There is no capability + * flag to declare; the presence of {@code ViewCatalog} on the catalog plugin is the + * signal that it supports views. + * + *

Mixed catalogs (implementing both {@code TableCatalog} and {@code ViewCatalog})

+ * + * The two interfaces are independent: every {@code TableCatalog} method behaves as if views did + * not exist, and every {@code ViewCatalog} method behaves as if tables did not exist. The only + * cross-cutting invariant is that tables and views share a single identifier namespace in + * the catalog: the same identifier cannot resolve to both a table and a view at the same time. + * That invariant manifests in two places: + *
    + *
  • {@link TableCatalog#createTable} must reject (with + * {@link org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException}) if the + * identifier already names a view.
  • + *
  • {@link #createView} must reject (with {@link ViewAlreadyExistsException}) if the + * identifier already names a table.
  • + *
+ * + *

Resolution and the optional perf opt-in for mixed catalogs

+ * + * Spark resolves an identifier by calling {@link TableCatalog#loadTable} first; on + * {@link org.apache.spark.sql.catalyst.analysis.NoSuchTableException} it falls back to + * {@link #loadView} when the catalog also implements {@code ViewCatalog}. That fallback costs an + * extra RPC per cold-cache view lookup. To skip it, a perf-conscious mixed catalog may return a + * {@link MetadataOnlyTable} wrapping the {@link ViewInfo} from + * {@link TableCatalog#loadTable} when the identifier resolves to a view; Spark recognizes the + * {@code ViewInfo} payload and routes through view resolution without a follow-up + * {@code loadView} call. {@code loadView} is still used directly for view DDL paths + * (DROP VIEW, ALTER VIEW, SHOW CREATE TABLE, etc.). + * + * @since 4.2.0 + */ +@Evolving +public interface ViewCatalog extends CatalogPlugin { + + /** + * List the views in a namespace from the catalog. + *

+ * For mixed catalogs, this must return identifiers for views only (tables are listed via + * {@link TableCatalog#listTables}). + * + * @param namespace a multi-part namespace + * @return an array of identifiers for views + * @throws NoSuchNamespaceException if the namespace does not exist (optional) + */ + Identifier[] listViews(String[] namespace) throws NoSuchNamespaceException; + + /** + * Load view metadata by identifier. + *

+ * For mixed catalogs, throws {@link NoSuchViewException} when {@code ident} resolves to a + * table rather than a view. + * + * @param ident a view identifier + * @return the view metadata + * @throws NoSuchViewException if the view does not exist (or {@code ident} is a table in a + * mixed catalog) + */ + ViewInfo loadView(Identifier ident) throws NoSuchViewException; + + /** + * Test whether a view exists. + *

+ * The default implementation calls {@link #loadView} and catches {@link NoSuchViewException}. + * Catalogs that can answer existence cheaply should override. + * + * @param ident a view identifier + * @return true if a view exists at {@code ident}, false otherwise + */ + default boolean viewExists(Identifier ident) { + try { + loadView(ident); + return true; + } catch (NoSuchViewException e) { + return false; + } + } + + /** + * Invalidate cached metadata for a view. + *

+ * If the view is currently cached, drop the cached entry; otherwise do nothing. This must not + * issue remote calls. + * + * @param ident a view identifier + */ + default void invalidateView(Identifier ident) { + } + + /** + * Create a view. + *

+ * In mixed catalogs, must throw {@link ViewAlreadyExistsException} if {@code ident} already + * names a table or a view. + * + * @param ident the view identifier + * @param info the view metadata + * @return the metadata of the newly created view; may equal {@code info} + * @throws ViewAlreadyExistsException if a view or table already exists at {@code ident} + * @throws NoSuchNamespaceException if the identifier's namespace does not exist (optional) + */ + ViewInfo createView(Identifier ident, ViewInfo info) + throws ViewAlreadyExistsException, NoSuchNamespaceException; + + /** + * Atomically replace an existing view's metadata. + *

+ * Used by {@code ALTER VIEW ... AS} and as the replace branch of {@code CREATE OR REPLACE + * VIEW}. Implementations should commit the new metadata atomically; views carry no data, so a + * single transactional metastore call (or equivalent) is sufficient -- there is no separate + * staging API. + * + * @param ident the view identifier + * @param info the new view metadata + * @return the metadata of the replaced view; may equal {@code info} + * @throws NoSuchViewException if no view exists at {@code ident} (or {@code ident} is a table + * in a mixed catalog) + */ + ViewInfo replaceView(Identifier ident, ViewInfo info) throws NoSuchViewException; + + /** + * Drop a view. + *

+ * Returns {@code true} if a view was dropped at {@code ident}, {@code false} otherwise. In + * mixed catalogs, returns {@code false} if {@code ident} is a table (the table is not + * touched). Spark's resolver guards the call site so that {@code DROP VIEW} on a table or + * {@code DROP TABLE} on a view surfaces the dedicated {@code EXPECT_VIEW_NOT_TABLE} / + * {@code EXPECT_TABLE_NOT_VIEW} error before this method is invoked. + * + * @param ident a view identifier + * @return true if a view was dropped, false otherwise + */ + boolean dropView(Identifier ident); +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java index b3b3c37f5569a..4855176977d4f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java @@ -24,17 +24,18 @@ import org.apache.spark.annotation.Evolving; /** - * A {@link TableInfo} specialization for views. Views are a kind of table: they share the - * {@link Table} abstraction and flow through the same {@link TableCatalog#createTable} write - * path and the same {@link TableCatalog#loadTable} read path. {@code ViewInfo} carries the - * view-specific fields that cannot be represented as string table properties -- the query - * text, captured creation-time resolution context, captured SQL configs, schema-binding mode, - * and the query output column names. + * View metadata DTO -- the typed payload returned by {@link ViewCatalog#loadView} and accepted + * by {@link ViewCatalog#createView} / {@link ViewCatalog#replaceView}. Carries the + * view-specific fields that cannot be represented as string table properties: the query text, + * captured creation-time resolution context, captured SQL configs, schema-binding mode, and + * query output column names. Schema and user TBLPROPERTIES are inherited from {@link TableInfo} + * via the typed builder. *

- * Catalogs that declare {@link TableCatalogCapability#SUPPORTS_VIEW} recognize a - * {@code ViewInfo} argument to {@code createTable} (and the {@link StagingTableCatalog} - * staging variants) as a view write, and return a {@link MetadataOnlyTable} wrapping a - * {@code ViewInfo} from {@code loadTable} for a view identifier. + * {@code ViewInfo} extends {@link TableInfo} so that a mixed catalog (one implementing both + * {@link TableCatalog} and {@link ViewCatalog}) can opt into the perf optimization of returning + * a {@link MetadataOnlyTable} wrapping a {@code ViewInfo} from {@link TableCatalog#loadTable} + * for a view identifier. Pure {@link ViewCatalog} implementations never see {@code TableInfo}; + * the typed setters on {@link Builder} cover everything they need to construct. * * @since 4.2.0 */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index c1bf06bac7d1f..76830aee5f094 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1104,9 +1104,16 @@ class Analyzer( * for resolving DDL and misc commands. UnresolvedView callers reject non-view results * downstream via `expectViewNotTableError`. * - * When `viewOnly=true`, non-session catalogs that do not declare SUPPORTS_VIEW are - * rejected up front with MISSING_CATALOG_ABILITY.VIEWS -- they cannot host views at - * all, so surfacing a downstream "view not found" would hide the real reason. + * When `viewOnly=true`, non-session catalogs that do not implement [[ViewCatalog]] are + * rejected up front with MISSING_CATALOG_ABILITY.VIEWS -- they cannot host views at all, + * so surfacing a downstream "view not found" would hide the real reason. + * + * Lookup order against a non-session catalog: + * 1. [[TableCatalog.loadTable]] if implemented. A returned [[MetadataOnlyTable]] wrapping + * a [[ViewInfo]] is interpreted as a view (perf opt-in for mixed catalogs that prefer + * to answer in a single RPC); other results are tables. + * 2. If `loadTable` did not produce a result and the catalog is a [[ViewCatalog]], + * [[ViewCatalog.loadView]] is called as the fallback view-resolution path. */ private def lookupTableOrView( identifier: Seq[String], @@ -1117,10 +1124,10 @@ class Analyzer( relationResolution.expandIdentifier(identifier) match { case CatalogAndIdentifier(catalog, ident) => if (viewOnly && !CatalogV2Util.isSessionCatalog(catalog) && - !CatalogV2Util.supportsView(catalog)) { + !catalog.isInstanceOf[ViewCatalog]) { throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) } - CatalogV2Util.loadTable(catalog, ident).map { + val tableResolved: Option[LogicalPlan] = CatalogV2Util.loadTable(catalog, ident).map { case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) && v1Table.v1Table.tableType == CatalogTableType.VIEW => val v1Ident = v1Table.catalogTable.identifier @@ -1133,6 +1140,19 @@ class Analyzer( case table => ResolvedTable.create(catalog.asTableCatalog, ident, table) } + tableResolved.orElse { + catalog match { + case vc: ViewCatalog => + try { + val viewInfo = vc.loadView(ident) + val catalogTable = V1Table.toCatalogTable(catalog, ident, viewInfo) + Some(ResolvedPersistentView(catalog, ident, catalogTable)) + } catch { + case _: NoSuchViewException => None + } + case _ => None + } + } case _ => None } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index e686a1e979b3b..0eded2d9dbdf9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -1364,11 +1364,11 @@ case class ShowTablePartition( * The logical plan of the SHOW VIEWS command. * * Session-catalog targets fall back to v1 `ShowViewsCommand` via `ResolveSessionCatalog`. - * v2 catalogs that declare - * [[org.apache.spark.sql.connector.catalog.TableCatalogCapability#SUPPORTS_VIEW]] are handled - * in `DataSourceV2Strategy` (enumerates via `listTableSummaries` filtered to - * `VIEW_TABLE_TYPE`). Non-SUPPORTS_VIEW v2 catalogs are rejected up front in - * `ResolveSessionCatalog` with `MISSING_CATALOG_ABILITY.VIEWS`. + * v2 [[org.apache.spark.sql.connector.catalog.ViewCatalog]] catalogs are handled in + * `DataSourceV2Strategy` (enumerates via + * [[org.apache.spark.sql.connector.catalog.ViewCatalog#listViews]]). Non-ViewCatalog v2 + * catalogs are rejected up front in `ResolveSessionCatalog` with + * `MISSING_CATALOG_ABILITY.VIEWS`. */ case class ShowViews( namespace: LogicalPlan, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala index fbe33a90a245a..b29d0b3eabe56 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala @@ -78,13 +78,6 @@ private[sql] object CatalogV2Util { SupportsNamespaces.PROP_LOCATION, SupportsNamespaces.PROP_OWNER) - // Whether the given catalog is a TableCatalog that declares - // TableCatalogCapability.SUPPORTS_VIEW. Returns false for non-TableCatalog plugins. - def supportsView(catalog: CatalogPlugin): Boolean = catalog match { - case tc: TableCatalog => tc.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) - case _ => false - } - /** * Apply properties changes to a map and return the result. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index d2d8a3fe4bdde..079b2639aa2b9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -165,7 +165,7 @@ private[sql] object V1Table { ) } - private def toCatalogTable( + def toCatalogTable( catalog: CatalogPlugin, ident: Identifier, info: ViewInfo): CatalogTable = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index a3547ed236e52..94523dd313b43 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, CharVarcharUtils, ResolveDefaultColumns => DefaultCols} import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._ -import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table} +import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table, ViewCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.execution.command._ @@ -327,12 +327,12 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) case DropView(DropViewInSessionCatalog(ident), ifExists) => DropTableCommand(ident, ifExists, isView = true, purge = false) - // SUPPORTS_VIEW catalogs fall through to `DataSourceV2Strategy`, which routes DROP VIEW to - // `TableCatalog.dropTable` (contractually required to drop views for such catalogs). Other - // non-session catalogs get `MISSING_CATALOG_ABILITY.VIEWS`, matching the error raised from - // `CheckViewReferences` for CREATE/ALTER VIEW and from the analyzer gate on UnresolvedView. + // ViewCatalog catalogs fall through to `DataSourceV2Strategy`, which routes DROP VIEW to + // `ViewCatalog.dropView`. Other non-session catalogs get `MISSING_CATALOG_ABILITY.VIEWS`, + // matching the error raised from `CheckViewReferences` for CREATE/ALTER VIEW and from the + // analyzer gate on UnresolvedView. case DropView(r @ ResolvedIdentifier(catalog, ident), ifExists) - if !CatalogV2Util.supportsView(catalog) => + if !catalog.isInstanceOf[ViewCatalog] => if (catalog == FakeSystemCatalog) { DropTempViewCommand(ident, ifExists) } else { @@ -550,11 +550,11 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) viewType = PersistedView, viewSchemaMode = viewSchemaMode) - // SUPPORTS_VIEW catalogs are handled by the v2 strategy (enumerates via - // listTableSummaries); we skip the match here so the plan flows through unchanged. Only - // non-session, non-SUPPORTS_VIEW catalogs hit the MISSING_CATALOG_ABILITY.VIEWS rejection. + // ViewCatalog catalogs are handled by the v2 strategy (enumerates via listViews); we skip + // the match here so the plan flows through unchanged. Only non-session, non-ViewCatalog + // catalogs hit the MISSING_CATALOG_ABILITY.VIEWS rejection. case ShowViews(ns: ResolvedNamespace, pattern, output) - if !CatalogV2Util.supportsView(ns.catalog) => + if !ns.catalog.isInstanceOf[ViewCatalog] => ns match { case ResolvedDatabaseInSessionCatalog(db) => ShowViewsCommand(db, pattern, output) case _ => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index d56a02056f512..c721eb6aaabd8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, SubqueryExpr import org.apache.spark.sql.catalyst.plans.logical.{AlterViewAs, AnalysisOnlyCommand, CreateTempView, CreateView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.classic.ClassicConversions.castToImpl -import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier} +import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, ViewCatalog} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{IdentifierHelper, NamespaceHelper} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation @@ -902,14 +902,14 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { /** * Post-analysis check for v2 CREATE VIEW / ALTER VIEW. First rejects catalogs that do not - * declare [[TableCatalogCapability.SUPPORTS_VIEW]] with `MISSING_CATALOG_ABILITY.VIEWS` -- we - * do this before the temp-object and auto-alias checks so a catalog that cannot host views at - * all surfaces the correct root cause instead of a misleading "references temp" error. Then - * rejects permanent views that reference temporary objects and view bodies with auto-generated - * aliases. `referredTempFunctions` is captured by the command's `markAsAnalyzed` before this - * rule runs. The v1 counterparts [[CreateViewCommand]] and [[AlterViewAsCommand]] keep their - * existing exec-time checks -- Dataset-built commands bypass the analyzer's re-capture path, - * so the exec-time safety net must stay for v1. + * implement [[ViewCatalog]] with `MISSING_CATALOG_ABILITY.VIEWS` -- we do this before the + * temp-object and auto-alias checks so a catalog that cannot host views at all surfaces the + * correct root cause instead of a misleading "references temp" error. Then rejects permanent + * views that reference temporary objects and view bodies with auto-generated aliases. + * `referredTempFunctions` is captured by the command's `markAsAnalyzed` before this rule runs. + * The v1 counterparts [[CreateViewCommand]] and [[AlterViewAsCommand]] keep their existing + * exec-time checks -- Dataset-built commands bypass the analyzer's re-capture path, so the + * exec-time safety net must stay for v1. */ object CheckViewReferences extends (LogicalPlan => Unit) { import ViewHelper._ @@ -931,18 +931,18 @@ object CheckViewReferences extends (LogicalPlan => Unit) { catalog.name() +: ident.asMultipartIdentifier } - // Fail fast if the catalog cannot host views. Gate non-TableCatalog plugins here too so - // callers get the VIEWS-specific error rather than a generic cast failure later. - private def requireSupportsView(resolved: LogicalPlan): Unit = { + // Fail fast if the catalog cannot host views. Gate non-ViewCatalog plugins here so callers + // get the VIEWS-specific error rather than a generic cast failure later. + private def requireViewCatalog(resolved: LogicalPlan): Unit = { val (catalog, _) = catalogAndIdent(resolved) - if (!CatalogV2Util.supportsView(catalog)) { + if (!catalog.isInstanceOf[ViewCatalog]) { throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) } } override def apply(plan: LogicalPlan): Unit = plan.foreach { case cv: CreateView if cv.isAnalyzed => - requireSupportsView(cv.child) + requireViewCatalog(cv.child) val fullIdent = fullIdentFor(cv.child) verifyTemporaryObjectsNotExists( isTemporary = false, fullIdent, cv.query, cv.referredTempFunctions) @@ -956,8 +956,8 @@ object CheckViewReferences extends (LogicalPlan => Unit) { case av: AlterViewAs if av.isAnalyzed => // No capability check here: `Analyzer.lookupTableOrView(identifier, viewOnly=true)` - // already rejects non-SUPPORTS_VIEW catalogs upstream for `UnresolvedView`, so by the - // time an AlterViewAs reaches this rule the catalog is guaranteed to support views. + // already rejects non-ViewCatalog catalogs upstream for `UnresolvedView`, so by the time + // an AlterViewAs reaches this rule the catalog is guaranteed to be a ViewCatalog. val fullIdent = fullIdentFor(av.child) verifyTemporaryObjectsNotExists( isTemporary = false, fullIdent, av.query, av.referredTempFunctions) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala index c8a210d31ec19..3da71a5bdbd27 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -20,23 +20,21 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.jdk.CollectionConverters._ import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, ResolvedIdentifier, TableAlreadyExistsException, ViewSchemaMode} +import org.apache.spark.sql.catalyst.analysis.{NoSuchViewException, ResolvedIdentifier, ViewSchemaMode} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, TableCatalog, ViewInfo} +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, ViewCatalog, ViewInfo} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command.CommandUtils -import org.apache.spark.sql.execution.metric.SQLMetric -import org.apache.spark.util.Utils /** - * Shared bits for the v2 ALTER VIEW ... AS execs. Loads the existing view once via - * `existingView` and uses it to preserve user-set properties, comment, collation, schema - * binding mode, and owner when constructing the replacement [[ViewInfo]]. A racing DDL - * between analysis and exec can change the target out from under us (dropped, or replaced - * with a non-view table); in that case we surface a regular no-such-table / not-a-view - * analysis error rather than propagating a stale analyzer decision. + * Shared bits for the v2 ALTER VIEW ... AS exec. Loads the existing view once via + * `existingView` and uses it to preserve user-set TBLPROPERTIES, comment, collation, owner, + * and schema binding mode when constructing the replacement [[ViewInfo]]. A racing DDL between + * analysis and exec can change the target out from under us (dropped, or replaced with a + * non-view table); in that case we surface a regular no-such-view / not-a-view analysis error + * rather than propagating a stale analyzer decision. * * Transient fields (SQL configs, query column names, schema mode) are re-captured from the * current session by [[V2ViewPreparation.buildViewInfo]], matching v1 @@ -44,29 +42,22 @@ import org.apache.spark.util.Utils * unchanged. */ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { - // Reuses `tryLoadTable` / `isViewTable` from the parent trait. A racing DDL between - // analysis and exec (drop, or replace with a non-view table) can invalidate the analyzer's - // ResolvedPersistentView decision -- we re-check here and surface user-facing errors - // rather than propagate the stale resolution. - protected lazy val existingView: ViewInfo = tryLoadTable() match { - case None => - throw QueryCompilationErrors.noSuchTableError(catalog.name(), identifier) - case Some(mot: MetadataOnlyTable) => - mot.getTableInfo match { - case v: ViewInfo => v - case _ => + protected lazy val existingView: ViewInfo = try { + catalog.loadView(identifier) + } catch { + case _: NoSuchViewException => + // Race: the view disappeared after analysis. Surface no-such-view, or + // expect-view-not-table if a colliding non-view table appeared in a mixed catalog. + catalog match { + case tc: TableCatalog if tc.tableExists(identifier) => throw QueryCompilationErrors.expectViewNotTableError( (catalog.name() +: identifier.asMultipartIdentifier).toSeq, cmd = "ALTER VIEW ... AS", suggestAlternative = false, t = this) + case _ => + throw new NoSuchViewException(identifier) } - case _ => - throw QueryCompilationErrors.expectViewNotTableError( - (catalog.name() +: identifier.asMultipartIdentifier).toSeq, - cmd = "ALTER VIEW ... AS", - suggestAlternative = false, - t = this) } protected lazy val existingProps: Map[String, String] = @@ -94,9 +85,12 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { .map(CatalogTable.VIEW_SCHEMA_MODE -> _) .toMap) + // ALTER VIEW ... AS is always a replace, never a CREATE. + override protected def replaceArg: Boolean = true + /** - * Force-evaluate `existingView` so `NoSuchTableException` / `expectViewNotTableError` - * surfaces before any other work (e.g. `buildViewInfo`, uncache, drop). The result is + * Force-evaluate `existingView` so `NoSuchViewException` / `expectViewNotTableError` + * surfaces before any other work (e.g. `buildViewInfo`, uncache, replace). The result is * intentionally discarded; call this purely for its side effect of materializing the * lazy val. */ @@ -104,12 +98,11 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { } /** - * Non-atomic ALTER VIEW for a plain [[TableCatalog]]: load existing, build replacement, - * check cyclic reference, uncache, drop, create. Between drop and create the view does not - * exist -- catalogs that need atomicity should also implement [[StagingTableCatalog]]. + * Physical plan node for ALTER VIEW ... AS on a v2 [[ViewCatalog]]. Dispatches to + * [[ViewCatalog#replaceView]], which is contractually atomic. */ case class AlterV2ViewExec( - catalog: TableCatalog, + catalog: ViewCatalog, identifier: Identifier, originalText: String, query: LogicalPlan) extends V2AlterViewPreparation { @@ -119,47 +112,7 @@ case class AlterV2ViewExec( val info = buildViewInfo() // Cyclic reference detection is done at analysis time in CheckViewReferences. CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) - catalog.dropTable(identifier) - try { - catalog.createTable(identifier, info) - } catch { - case _: TableAlreadyExistsException => throw viewAlreadyExists() - } - Seq.empty - } -} - -/** - * Atomic ALTER VIEW for a [[StagingTableCatalog]]: uses `stageReplace` + commit so the view - * metadata swap is atomic against concurrent readers. `stageReplace` throws - * [[NoSuchTableException]] when the view does not exist; we surface that as the standard - * no-such-table error. - */ -case class AtomicAlterV2ViewExec( - catalog: StagingTableCatalog, - identifier: Identifier, - originalText: String, - query: LogicalPlan) extends V2AlterViewPreparation { - - override val metrics: Map[String, SQLMetric] = - DataSourceV2Utils.commitMetrics(sparkContext, catalog) - - override protected def run(): Seq[InternalRow] = { - requireExistingView() - val info = buildViewInfo() - // Cyclic reference detection is done at analysis time in CheckViewReferences. - CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) - val staged: StagedTable = try { - catalog.stageReplace(identifier, info) - } catch { - case _: NoSuchTableException => - throw QueryCompilationErrors.noSuchTableError(catalog.name(), identifier) - } - Utils.tryWithSafeFinallyAndFailureCallbacks({ - DataSourceV2Utils.commitStagedChanges(sparkContext, staged, metrics) - })(catchBlock = { - staged.abortStagedChanges() - }) + catalog.replaceView(identifier, info) Seq.empty } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala index 9d0aa8d09aa1d..09eb30ed0b88c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -21,28 +21,27 @@ import scala.jdk.CollectionConverters._ import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.{CurrentUserContext, InternalRow} -import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, ResolvedIdentifier, SchemaEvolution, TableAlreadyExistsException, ViewSchemaMode} +import org.apache.spark.sql.catalyst.analysis.{NoSuchViewException, ResolvedIdentifier, SchemaEvolution, ViewAlreadyExistsException, ViewSchemaMode} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.util.CharVarcharUtils -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, ViewInfo} +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, ViewCatalog, ViewInfo} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.command.{CommandUtils, ViewHelper} -import org.apache.spark.sql.execution.metric.SQLMetric import org.apache.spark.sql.util.SchemaUtils import org.apache.spark.util.ArrayImplicits._ -import org.apache.spark.util.Utils /** - * Shared validation + ViewInfo construction for v2 CREATE VIEW execs. + * Shared validation + ViewInfo construction for v2 CREATE VIEW / ALTER VIEW execs. * * Mirrors the persistent-view portion of v1 [[ViewHelper.prepareTable]] + the execution-time - * checks in [[CreateViewCommand.run]]. Post-analysis checks for temp-object references and - * auto-generated aliases run once for both v1 and v2 in [[CheckViewReferences]]. + * checks in [[org.apache.spark.sql.execution.command.CreateViewCommand.run]]. Post-analysis + * checks for temp-object references and auto-generated aliases run once for both v1 and v2 in + * [[org.apache.spark.sql.execution.command.CheckViewReferences]]. */ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { - def catalog: TableCatalog + def catalog: ViewCatalog def identifier: Identifier def userSpecifiedColumns: Seq[(String, Option[String])] def comment: Option[String] @@ -113,32 +112,27 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { protected def viewAlreadyExists(): Throwable = QueryCompilationErrors.viewAlreadyExistsError(fullNameParts) - // Loads the existing entry at `identifier` or returns None if it does not exist. Combines - // the existence check and type check into a single catalog round-trip (vs. the previous - // tableExists + implicit assume-view flow). - protected def tryLoadTable(): Option[Table] = { - try { - Some(catalog.loadTable(identifier)) - } catch { - case _: NoSuchTableException => None - } + // For mixed catalogs (also TableCatalog), reject if the identifier names a non-view table: + // CREATE VIEW must not silently destroy a table's data, and CREATE OR REPLACE VIEW must not + // either. + protected def rejectIfTable(): Unit = catalog match { + case tc: TableCatalog if tc.tableExists(identifier) => + throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( + fullNameParts, replaceArg) + case _ => } - // A SUPPORTS_VIEW catalog round-trips views as MetadataOnlyTable wrapping a ViewInfo. - // Anything else at the same identifier is a non-view table -- REPLACE'ing it as a view would - // silently destroy the table's data, so we reject at the exec layer. - protected def isViewTable(table: Table): Boolean = table match { - case mot: MetadataOnlyTable => mot.getTableInfo.isInstanceOf[ViewInfo] - case _ => false - } + protected def replaceArg: Boolean } /** - * Physical plan node for CREATE VIEW on a v2 `TableCatalog` that does NOT support staging. - * REPLACE is implemented as a non-atomic drop + create. + * Physical plan node for CREATE VIEW on a v2 [[ViewCatalog]]. Dispatches to + * [[ViewCatalog#createView]] for plain CREATE / `IF NOT EXISTS`, and to + * [[ViewCatalog#replaceView]] for `OR REPLACE`. CREATE OR REPLACE on a non-existent view falls + * back to `createView`. */ case class CreateV2ViewExec( - catalog: TableCatalog, + catalog: ViewCatalog, identifier: Identifier, userSpecifiedColumns: Seq[(String, Option[String])], comment: Option[String], @@ -151,99 +145,40 @@ case class CreateV2ViewExec( viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { override def owner: Option[String] = Some(CurrentUserContext.getCurrentUser) + override protected def replaceArg: Boolean = replace override protected def run(): Seq[InternalRow] = { - // Probe the catalog before preparing the view body so `IF NOT EXISTS` short-circuits - // without running `aliasPlan` / config capture, matching v1 `CreateViewCommand.run`. - // Cyclic-reference detection is done at analysis time in `CheckViewReferences`. - val existing = tryLoadTable() - if (allowExisting && existing.isDefined) { + // Probe before preparing the view body so `IF NOT EXISTS` and the type-collision check can + // short-circuit before running `aliasPlan` / config capture (matches v1 + // `CreateViewCommand.run`). Cyclic-reference detection runs at analysis time in + // `CheckViewReferences`. + val viewExists = catalog.viewExists(identifier) + if (allowExisting && viewExists) { return Seq.empty } - existing.foreach { table => - if (!isViewTable(table)) { - throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( - fullNameParts, replace) - } - if (!replace) throw viewAlreadyExists() - } - val info = buildViewInfo() - if (existing.isDefined) { - CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) - catalog.dropTable(identifier) - } - // TOCTOU: if another writer creates an entry between tryLoadTable and createTable, a bare - // TableAlreadyExistsException is unhelpful; present the same viewAlreadyExists error the - // atomic path uses. - try { - catalog.createTable(identifier, info) - } catch { - case _: TableAlreadyExistsException => throw viewAlreadyExists() - } - Seq.empty - } -} + rejectIfTable() + if (viewExists && !replace) throw viewAlreadyExists() -/** - * Physical plan node for CREATE VIEW on a v2 `StagingTableCatalog`. Uses the staging API to - * commit the metadata swap atomically. - */ -case class AtomicCreateV2ViewExec( - catalog: StagingTableCatalog, - identifier: Identifier, - userSpecifiedColumns: Seq[(String, Option[String])], - comment: Option[String], - collation: Option[String], - userProperties: Map[String, String], - originalText: String, - query: LogicalPlan, - allowExisting: Boolean, - replace: Boolean, - viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { - - override def owner: Option[String] = Some(CurrentUserContext.getCurrentUser) - - override val metrics: Map[String, SQLMetric] = - DataSourceV2Utils.commitMetrics(sparkContext, catalog) - - override protected def run(): Seq[InternalRow] = { - // Probe the catalog before preparing the view body so `IF NOT EXISTS` short-circuits - // without running `aliasPlan` / config capture, matching v1 `CreateViewCommand.run`. - // Cyclic-reference detection is done at analysis time in `CheckViewReferences`. - val existing = tryLoadTable() - if (allowExisting && existing.isDefined) { - return Seq.empty - } - existing.foreach { table => - if (!isViewTable(table)) { - throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( - fullNameParts, replace) - } - // Match the non-atomic exec: reject plain CREATE against an existing view up front - // rather than relying on `stageCreate` to throw. - if (!replace) throw viewAlreadyExists() - } val info = buildViewInfo() - val staged: StagedTable = if (replace) { - if (existing.isDefined) { - CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) + if (replace && viewExists) { + // CREATE OR REPLACE on an existing view: replaceView is the single atomic-swap call. + CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) + try { + catalog.replaceView(identifier, info) + } catch { + case _: NoSuchViewException => + // Race: the view disappeared between the existence probe and replaceView. Fall back + // to createView to honor REPLACE-or-create semantics. + catalog.createView(identifier, info) } - catalog.stageCreateOrReplace(identifier, info) } else { - // TOCTOU: a concurrent writer can create an entry between `tryLoadTable` and - // `stageCreate`; translate the catalog's `TableAlreadyExistsException` into the same - // view-already-exists error the fast-path uses. + // Plain CREATE (or CREATE OR REPLACE on a non-existent view). try { - catalog.stageCreate(identifier, info) + catalog.createView(identifier, info) } catch { - case _: TableAlreadyExistsException => throw viewAlreadyExists() + case _: ViewAlreadyExistsException => throw viewAlreadyExists() } } - Utils.tryWithSafeFinallyAndFailureCallbacks({ - DataSourceV2Utils.commitStagedChanges(sparkContext, staged, metrics) - })(catchBlock = { - staged.abortStagedChanges() - }) Seq.empty } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 790a2da41fb18..d047ce385d118 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreePattern.SCALAR_SUBQUERY import org.apache.spark.sql.catalyst.util.{toPrettySQL, GeneratedColumn, IdentityColumn, ResolveDefaultColumns, ResolveTableConstraints, V2ExpressionBuilder} import org.apache.spark.sql.classic.SparkSession -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableCatalogCapability, TruncatableTable, V1Table} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TruncatableTable, V1Table, ViewCatalog} import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.connector.catalog.index.SupportsIndex import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue} @@ -301,30 +301,19 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat qualifyLocInTableSpec(tableSpec), orCreate = orCreate, invalidateCache) :: Nil } - // The SUPPORTS_VIEW capability gate runs earlier in `CheckViewReferences`, so by the time - // these strategy cases fire the catalog is guaranteed to be a TableCatalog with the flag. + // CheckViewReferences guarantees the catalog is a ViewCatalog by the time these strategy + // cases fire (it throws MISSING_CATALOG_ABILITY.VIEWS otherwise). case CreateView(ResolvedIdentifier(catalog, ident), userSpecifiedColumns, comment, collation, properties, originalText, child, allowExisting, replace, viewSchemaMode, _, _) => val sqlText = originalText.getOrElse { throw QueryCompilationErrors.createPersistedViewFromDatasetAPINotAllowedError() } - catalog.asTableCatalog match { - case staging: StagingTableCatalog => - AtomicCreateV2ViewExec(staging, ident, userSpecifiedColumns, comment, collation, - properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil - case tableCatalog => - CreateV2ViewExec(tableCatalog, ident, userSpecifiedColumns, comment, collation, - properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil - } + CreateV2ViewExec(catalog.asInstanceOf[ViewCatalog], ident, userSpecifiedColumns, comment, + collation, properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil case AlterViewAs(ResolvedPersistentView(catalog, ident, _), originalText, query, _, _) => - catalog.asTableCatalog match { - case staging: StagingTableCatalog => - AtomicAlterV2ViewExec(staging, ident, originalText, query) :: Nil - case tableCatalog => - AlterV2ViewExec(tableCatalog, ident, originalText, query) :: Nil - } + AlterV2ViewExec(catalog.asInstanceOf[ViewCatalog], ident, originalText, query) :: Nil // View DDL / inspection on a non-session v2 catalog that the v1 rewrite in // `ResolveSessionCatalog` can't handle. These are tracked as follow-up work in SPARK-52729; @@ -389,14 +378,13 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat // `UnresolvedTable` (not `UnresolvedTableOrView`), so `CheckAnalysis` surfaces // `EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE` before planning. No strategy case needed. - // DROP VIEW on a non-session SUPPORTS_VIEW catalog. The v1 rewrite in `ResolveSessionCatalog` - // skips SUPPORTS_VIEW catalogs (its DropView case has a `!supportsView(catalog)` guard), so - // they fall through here. `DropViewExec` verifies the target is a view before calling - // `dropTable`, mirroring v1's `DropTableCommand(isView = true)` safety net. - case DropView(r @ ResolvedIdentifier(catalog, ident), ifExists) - if CatalogV2Util.supportsView(catalog) => + // DROP VIEW on a non-session ViewCatalog. The v1 rewrite in `ResolveSessionCatalog` skips + // ViewCatalog catalogs, so they fall through here. `DropViewExec` calls + // `ViewCatalog.dropView` and surfaces `EXPECT_VIEW_NOT_TABLE` if the identifier resolves to + // a table in a mixed catalog. + case DropView(r @ ResolvedIdentifier(catalog: ViewCatalog, ident), ifExists) => val invalidateFunc = () => CommandUtils.uncacheTableOrView(session, r) - DropViewExec(catalog.asTableCatalog, ident, ifExists, invalidateFunc) :: Nil + DropViewExec(catalog, ident, ifExists, invalidateFunc) :: Nil case ReplaceTableAsSelect(ResolvedIdentifier(catalog, ident), parts, query, tableSpec: TableSpec, options, orCreate, true) => @@ -590,11 +578,10 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case ShowTables(ResolvedNamespace(catalog, ns, _), pattern, output) => ShowTablesExec(output, catalog.asTableCatalog, ns, pattern) :: Nil - // SHOW VIEWS on a non-session v2 catalog. Session-catalog targets are rewritten to v1 - // `ShowViewsCommand` by `ResolveSessionCatalog`; non-SUPPORTS_VIEW catalogs are rejected - // there too. This case only sees non-session SUPPORTS_VIEW catalogs. - case ShowViews(ResolvedNamespace(catalog: TableCatalog, ns, _), pattern, output) - if catalog.capabilities().contains(TableCatalogCapability.SUPPORTS_VIEW) => + // SHOW VIEWS on a non-session v2 ViewCatalog. Session-catalog targets are rewritten to v1 + // `ShowViewsCommand` by `ResolveSessionCatalog`; non-ViewCatalog catalogs are rejected + // there too. This case only sees non-session ViewCatalog catalogs. + case ShowViews(ResolvedNamespace(catalog: ViewCatalog, ns, _), pattern, output) => ShowViewsExec(output, catalog, ns, pattern) :: Nil case ShowTablesExtended( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala index 589fffd529f26..9a665f644e0de 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala @@ -18,45 +18,40 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.NoSuchTableException +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, TableCatalog, ViewInfo} +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, ViewCatalog} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.util.ArrayImplicits._ /** - * Physical plan node for DROP VIEW on a v2 `TableCatalog` that declares - * [[org.apache.spark.sql.connector.catalog.TableCatalogCapability#SUPPORTS_VIEW]]. Loads the - * target entry once to verify it is a view (a [[MetadataOnlyTable]] wrapping a [[ViewInfo]]) - * before calling [[TableCatalog#dropTable]]. Matching the v1 path's - * `DropTableCommand(isView = true)` safety net keeps `DROP VIEW some_table` from silently - * destroying a non-view table on a SUPPORTS_VIEW catalog. + * Physical plan node for DROP VIEW on a v2 [[ViewCatalog]]. Calls [[ViewCatalog#dropView]]; if + * it returns false and the catalog also implements [[TableCatalog]] with a table at this + * identifier, surfaces the dedicated `EXPECT_VIEW_NOT_TABLE` error rather than a generic + * "view not found" -- matching v1 `DropTableCommand(isView = true)`. */ case class DropViewExec( - catalog: TableCatalog, + catalog: ViewCatalog, ident: Identifier, ifExists: Boolean, invalidateCache: () => Unit) extends LeafV2CommandExec { override protected def run(): Seq[InternalRow] = { - val loaded = try { - Some(catalog.loadTable(ident)) - } catch { - case _: NoSuchTableException => None - } - val nameParts = - (catalog.name() +: ident.namespace() :+ ident.name()).toImmutableArraySeq - loaded match { - case Some(mot: MetadataOnlyTable) if mot.getTableInfo.isInstanceOf[ViewInfo] => - invalidateCache() - catalog.dropTable(ident) - case Some(_) => - throw QueryCompilationErrors.expectViewNotTableError( - nameParts, cmd = "DROP VIEW", suggestAlternative = false, t = this) - case None if !ifExists => - throw QueryCompilationErrors.noSuchTableError(nameParts) - case None => - // IF EXISTS: no-op. + val dropped = catalog.dropView(ident) + if (dropped) { + invalidateCache() + } else { + val nameParts = + (catalog.name() +: ident.namespace() :+ ident.name()).toImmutableArraySeq + catalog match { + case tc: TableCatalog if tc.tableExists(ident) => + throw QueryCompilationErrors.expectViewNotTableError( + nameParts, cmd = "DROP VIEW", suggestAlternative = false, t = this) + case _ if !ifExists => + throw new NoSuchViewException(ident) + case _ => + // IF EXISTS: no-op. + } } Seq.empty } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala index 26ca6a819f55b..86b5b968833d9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala @@ -22,32 +22,27 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.util.StringUtils -import org.apache.spark.sql.connector.catalog.{TableCatalog, TableSummary} +import org.apache.spark.sql.connector.catalog.ViewCatalog import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper import org.apache.spark.sql.execution.LeafExecNode /** - * Physical plan node for SHOW VIEWS on a v2 catalog that declares - * [[org.apache.spark.sql.connector.catalog.TableCatalogCapability#SUPPORTS_VIEW]]. - * - * Enumerates via [[TableCatalog#listTableSummaries]] and filters to - * [[TableSummary#VIEW_TABLE_TYPE]]. v2 catalogs have no temp views, so the `isTemporary` - * column is always false -- mirroring v1 `ShowViewsCommand`, which sets `isTemporary=true` + * Physical plan node for SHOW VIEWS on a v2 [[ViewCatalog]]. Enumerates view identifiers via + * [[ViewCatalog#listViews]]. v2 catalogs have no temp views, so the {@code isTemporary} column + * is always false -- mirroring v1 {@code ShowViewsCommand}, which sets {@code isTemporary=true} * only for local/global temp views that live in the session catalog. */ case class ShowViewsExec( output: Seq[Attribute], - catalog: TableCatalog, + catalog: ViewCatalog, namespace: Seq[String], pattern: Option[String]) extends V2CommandExec with LeafExecNode { override protected def run(): Seq[InternalRow] = { val rows = new ArrayBuffer[InternalRow]() - val summaries = catalog.listTableSummaries(namespace.toArray) - summaries.foreach { summary => - val ident = summary.identifier + catalog.listViews(namespace.toArray).foreach { ident => val nameMatches = pattern.forall(p => StringUtils.filterPattern(Seq(ident.name), p).nonEmpty) - if (TableSummary.VIEW_TABLE_TYPE == summary.tableType && nameMatches) { + if (nameMatches) { rows += toCatalystRow(ident.namespace().quoted, ident.name(), false) } } From 966f0c70ea0bbe8c8ad8ee0d1b9268a162e8c824 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 25 Apr 2026 04:57:40 +0000 Subject: [PATCH 35/59] tests: adapt MetadataOnlyView suite catalogs to mixed TableCatalog+ViewCatalog The structural rework removed TableCatalogCapability.SUPPORTS_VIEW and introduced ViewCatalog as the plugin-facing API for views. The existing test catalogs (TestingViewCatalog, TestingStagingCatalog) now implement both TableCatalog and ViewCatalog, sharing one identifier-keyed map per the mixed-catalog contract. Storage value's runtime type (ViewInfo vs TableInfo) distinguishes views from tables on each lookup; tableExists / listTables exclude view entries, viewExists / listViews include only views, and createTable / createView each reject cross-type collisions. Test-name renames replace "without SUPPORTS_VIEW" with "without ViewCatalog" to track the new API. The rest of the test bodies are unchanged. Co-authored-by: Isaac --- .../DataSourceV2MetadataOnlyViewSuite.scala | 167 ++++++++++++++---- .../sql/connector/DataSourceV2SQLSuite.scala | 2 +- 2 files changed, 136 insertions(+), 33 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index a8106f5f56fd1..271ec88675a13 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -19,8 +19,8 @@ package org.apache.spark.sql.connector import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, QueryTest, Row} -import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException} -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableCatalogCapability, TableChange, TableInfo, TableSummary, V1Table, ViewInfo} +import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, NoSuchViewException, TableAlreadyExistsException, ViewAlreadyExistsException} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableChange, TableInfo, TableSummary, V1Table, ViewCatalog, ViewInfo} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType @@ -170,7 +170,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } - test("CREATE VIEW on a catalog without SUPPORTS_VIEW fails") { + test("CREATE VIEW on a catalog without ViewCatalog fails") { withSQLConf( "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { val ex = intercept[AnalysisException] { @@ -574,9 +574,9 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } - test("ALTER VIEW on a catalog without SUPPORTS_VIEW fails with MISSING_CATALOG_ABILITY") { + test("ALTER VIEW on a catalog without ViewCatalog fails with MISSING_CATALOG_ABILITY") { // ALTER VIEW's identifier is resolved via `UnresolvedView`, whose `viewOnly=true` path - // in `Analyzer.lookupTableOrView` rejects non-SUPPORTS_VIEW catalogs up front with the + // in `Analyzer.lookupTableOrView` rejects non-ViewCatalog catalogs up front with the // expected error class -- before `loadTable` is even called. `TestingTableOnlyCatalog` // happens to round-trip `default.v` as a view-typed MetadataOnlyTable, but that fixture // is not actually consulted on this path. CREATE VIEW's capability check lives in @@ -807,7 +807,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio // --- DROP VIEW on a v2 catalog -------------------------------- - test("DROP VIEW on a SUPPORTS_VIEW v2 catalog drops the view") { + test("DROP VIEW on a ViewCatalog drops the view") { val catalog = spark.sessionState.catalogManager.catalog("view_catalog") .asInstanceOf[TestingViewCatalog] withTable("spark_catalog.default.t") { @@ -822,7 +822,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio test("DROP VIEW IF EXISTS on a v2 catalog is a no-op when the view is missing") { // Exercises the `ifExists=true` path -- DropViewExec should not throw when the view - // doesn't exist on a SUPPORTS_VIEW catalog. + // doesn't exist on a ViewCatalog. sql("DROP VIEW IF EXISTS view_catalog.default.v_never_existed") } @@ -867,13 +867,13 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } - test("DROP VIEW on a catalog without SUPPORTS_VIEW is rejected") { + test("DROP VIEW on a catalog without ViewCatalog is rejected") { withSQLConf( "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { val ex = intercept[AnalysisException] { sql("DROP VIEW no_view_catalog.default.v") } - // Preserves the pre-PR error surface for non-SUPPORTS_VIEW catalogs. + // Preserves the pre-PR error surface for non-ViewCatalog catalogs. assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("views")) } } @@ -925,7 +925,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio assert(names == Set("v_foo"), s"expected only v_foo, got $names") } - test("SHOW VIEWS on a catalog without SUPPORTS_VIEW is rejected") { + test("SHOW VIEWS on a catalog without ViewCatalog is rejected") { withSQLConf( "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { val ex = intercept[AnalysisException] { @@ -952,22 +952,21 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } /** - * A [[TableCatalog]] that supports SUPPORTS_VIEW: round-trips [[MetadataOnlyTable]] for created + * A [[TableCatalog]] implementing ViewCatalog: round-trips [[MetadataOnlyTable]] for created * views and tables (via `createTable` / `dropTable` / `tableExists` / `listTables`) and exposes * two canned read-only fixtures (`test_view`, `test_unqualified_view`) used by the view-read * tests. Entries created via `createTable` can be either tables or views -- their * [[TableCatalog#PROP_TABLE_TYPE]] property is what distinguishes them. */ -class TestingViewCatalog extends TableCatalog { +class TestingViewCatalog extends TableCatalog with ViewCatalog { - // Holds entries (views and tables) created via createTable within the session. Keyed by - // (namespace, name); PROP_TABLE_TYPE in the stored TableInfo distinguishes views from tables. + // Holds entries (views and tables) created via createTable / createView within the session. + // Keyed by (namespace, name); the stored value's runtime type (ViewInfo vs TableInfo) + // distinguishes views from tables. Mixed-catalog: shared identifier namespace per the + // ViewCatalog contract. private val createdViews = new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() - override def capabilities(): java.util.Set[TableCatalogCapability] = - java.util.Collections.singleton(TableCatalogCapability.SUPPORTS_VIEW) - override def loadTable(ident: Identifier): Table = { val key = (ident.namespace().toSeq, ident.name()) Option(createdViews.get(key)).map(new MetadataOnlyTable(_, ident.toString)).getOrElse { @@ -1015,10 +1014,17 @@ class TestingViewCatalog extends TableCatalog { override def tableExists(ident: Identifier): Boolean = { val key = (ident.namespace().toSeq, ident.name()) - createdViews.containsKey(key) || super.tableExists(ident) + val existing = createdViews.get(key) + existing != null && !existing.isInstanceOf[ViewInfo] } override def createTable(ident: Identifier, info: TableInfo): Table = { + // Per the mixed-catalog contract: createTable must reject if the ident is already a view. + if (info.isInstanceOf[ViewInfo]) { + throw new IllegalStateException( + "TestingViewCatalog.createTable should not be called with a ViewInfo; views go through " + + "ViewCatalog.createView") + } val key = (ident.namespace().toSeq, ident.name()) if (createdViews.putIfAbsent(key, info) != null) { throw new TableAlreadyExistsException(ident) @@ -1046,23 +1052,71 @@ class TestingViewCatalog extends TableCatalog { } override def dropTable(ident: Identifier): Boolean = { val key = (ident.namespace().toSeq, ident.name()) + val existing = createdViews.get(key) + if (existing == null || existing.isInstanceOf[ViewInfo]) return false createdViews.remove(key) != null } override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = { throw new RuntimeException("shouldn't be called") } override def listTables(namespace: Array[String]): Array[Identifier] = { - // Per the TableCatalog contract (v1 parity), this returns identifiers for both tables and - // views; `listTableSummaries` (default impl: listTables + loadTable + read PROP_TABLE_TYPE) - // is what distinguishes them. + // Tables only -- views are listed via ViewCatalog.listViews per the new contract. + val targetNs = namespace.toSeq + val ids = new java.util.ArrayList[Identifier]() + createdViews.forEach { (key, info) => + if (key._1 == targetNs && !info.isInstanceOf[ViewInfo]) { + ids.add(Identifier.of(key._1.toArray, key._2)) + } + } + ids.toArray(new Array[Identifier](0)) + } + + // ViewCatalog methods. Storage is shared with TableCatalog (mixed-catalog pattern). + + override def listViews(namespace: Array[String]): Array[Identifier] = { val targetNs = namespace.toSeq val ids = new java.util.ArrayList[Identifier]() - createdViews.forEach { (key, _) => - if (key._1 == targetNs) ids.add(Identifier.of(key._1.toArray, key._2)) + createdViews.forEach { (key, info) => + if (key._1 == targetNs && info.isInstanceOf[ViewInfo]) { + ids.add(Identifier.of(key._1.toArray, key._2)) + } } ids.toArray(new Array[Identifier](0)) } + override def loadView(ident: Identifier): ViewInfo = { + val key = (ident.namespace().toSeq, ident.name()) + Option(createdViews.get(key)) match { + case Some(v: ViewInfo) => v + case _ => throw new NoSuchViewException(ident) + } + } + + override def createView(ident: Identifier, info: ViewInfo): ViewInfo = { + val key = (ident.namespace().toSeq, ident.name()) + if (createdViews.putIfAbsent(key, info) != null) { + throw new ViewAlreadyExistsException(ident) + } + info + } + + override def replaceView(ident: Identifier, info: ViewInfo): ViewInfo = { + val key = (ident.namespace().toSeq, ident.name()) + val existing = createdViews.get(key) + if (existing == null || !existing.isInstanceOf[ViewInfo]) { + throw new NoSuchViewException(ident) + } + createdViews.put(key, info) + info + } + + override def dropView(ident: Identifier): Boolean = { + val key = (ident.namespace().toSeq, ident.name()) + val existing = createdViews.get(key) + if (existing == null || !existing.isInstanceOf[ViewInfo]) return false + createdViews.remove(key) != null + } + private var catalogName = "" override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { catalogName = name @@ -1071,17 +1125,15 @@ class TestingViewCatalog extends TableCatalog { } /** - * A minimal [[StagingTableCatalog]] used to drive `AtomicCreateV2ViewExec`. Views are stored - * in a local map; staging commits write through, aborts discard. Supports SUPPORTS_VIEW. + * A minimal mixed [[StagingTableCatalog]] + [[ViewCatalog]]. View DDL routes through the + * ViewCatalog API (no separate staging variant for views in the new design). The staging + * methods cover table CTAS / RTAS only. */ -class TestingStagingCatalog extends StagingTableCatalog { +class TestingStagingCatalog extends StagingTableCatalog with ViewCatalog { private val views = new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() - override def capabilities(): java.util.Set[TableCatalogCapability] = - java.util.Collections.singleton(TableCatalogCapability.SUPPORTS_VIEW) - private def keyOf(ident: Identifier): (Seq[String], String) = (ident.namespace().toSeq, ident.name()) @@ -1090,9 +1142,16 @@ class TestingStagingCatalog extends StagingTableCatalog { .getOrElse(throw new NoSuchTableException(ident)) } - override def tableExists(ident: Identifier): Boolean = views.containsKey(keyOf(ident)) + override def tableExists(ident: Identifier): Boolean = { + val v = views.get(keyOf(ident)) + v != null && !v.isInstanceOf[ViewInfo] + } override def createTable(ident: Identifier, info: TableInfo): Table = { + if (info.isInstanceOf[ViewInfo]) { + throw new IllegalStateException( + "TestingStagingCatalog.createTable should not be called with a ViewInfo") + } if (views.putIfAbsent(keyOf(ident), info) != null) { throw new TableAlreadyExistsException(ident) } @@ -1118,11 +1177,55 @@ class TestingStagingCatalog extends StagingTableCatalog { override def alterTable(ident: Identifier, changes: TableChange*): Table = throw new RuntimeException("shouldn't be called") - override def dropTable(ident: Identifier): Boolean = views.remove(keyOf(ident)) != null + override def dropTable(ident: Identifier): Boolean = { + val v = views.get(keyOf(ident)) + if (v == null || v.isInstanceOf[ViewInfo]) return false + views.remove(keyOf(ident)) != null + } override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = throw new RuntimeException("shouldn't be called") override def listTables(namespace: Array[String]): Array[Identifier] = Array.empty + // ViewCatalog methods -- shared storage with the table side. + + override def listViews(namespace: Array[String]): Array[Identifier] = { + val targetNs = namespace.toSeq + val ids = new java.util.ArrayList[Identifier]() + views.forEach { (key, info) => + if (key._1 == targetNs && info.isInstanceOf[ViewInfo]) { + ids.add(Identifier.of(key._1.toArray, key._2)) + } + } + ids.toArray(new Array[Identifier](0)) + } + + override def loadView(ident: Identifier): ViewInfo = views.get(keyOf(ident)) match { + case v: ViewInfo => v + case _ => throw new NoSuchViewException(ident) + } + + override def createView(ident: Identifier, info: ViewInfo): ViewInfo = { + if (views.putIfAbsent(keyOf(ident), info) != null) { + throw new ViewAlreadyExistsException(ident) + } + info + } + + override def replaceView(ident: Identifier, info: ViewInfo): ViewInfo = { + val existing = views.get(keyOf(ident)) + if (existing == null || !existing.isInstanceOf[ViewInfo]) { + throw new NoSuchViewException(ident) + } + views.put(keyOf(ident), info) + info + } + + override def dropView(ident: Identifier): Boolean = { + val existing = views.get(keyOf(ident)) + if (existing == null || !existing.isInstanceOf[ViewInfo]) return false + views.remove(keyOf(ident)) != null + } + private var catalogName = "" override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { catalogName = name @@ -1140,7 +1243,7 @@ private class RecordingStagedTable( } /** - * A v2 catalog that does not declare SUPPORTS_VIEW. Used by capability-gate tests. The + * A v2 catalog that does not implement ViewCatalog. Used by capability-gate tests. The * gate actually fires in `Analyzer.lookupTableOrView(viewOnly=true)` for ALTER VIEW and in * [[CheckViewReferences]] for CREATE VIEW -- in both cases before `loadTable` is called -- * so the pre-seeded view fixture is effectively unused on the happy-path-error flow. It's diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 976c3485f1f77..d1dc9c282829f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2966,7 +2966,7 @@ class DataSourceV2SQLSuiteV1Filter } } - test("View commands are not supported in v2 catalogs without SUPPORTS_VIEW") { + test("View commands are not supported in v2 catalogs that don't implement ViewCatalog") { def validateViewCommand(sqlStatement: String): Unit = { val e = analysisException(sqlStatement) checkError( From 66fa409b1779b371bf07561b0613f10c7dbbb108 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 25 Apr 2026 05:23:51 +0000 Subject: [PATCH 36/59] fix: pure ViewCatalog support in resolver; drop misleading staging-exec test names - Analyzer.lookupTableOrView and RelationResolution.tryResolvePersistent skip CatalogV2Util.loadTable for pure ViewCatalogs (no TableCatalog mixin), so asTableCatalog no longer throws MISSING_CATALOG_ABILITY.TABLES and masks the legitimate loadView fallback. SELECT and ALTER VIEW now work end-to-end on a pure ViewCatalog. - Add a TestingViewOnlyCatalog fixture (no TableCatalog mixin) plus read and ALTER VIEW tests that exercise the loadView fallback. - DataSourceV2MetadataOnlyViewSuite: rename "uses the atomic exec" tests to reflect that view DDL routes through ViewCatalog.createView / replaceView (no separate staging variant); drop now-dead RecordingStagedTable; replace TestingStagingCatalog's stage* method bodies with explicit "must not be invoked by view DDL" throws so any future regression that misroutes through the staging API surfaces immediately. Co-authored-by: Isaac --- .../sql/catalyst/analysis/Analyzer.scala | 33 ++-- .../analysis/RelationResolution.scala | 43 ++++- .../DataSourceV2MetadataOnlyViewSuite.scala | 163 +++++++++++++----- 3 files changed, 181 insertions(+), 58 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 76830aee5f094..b62acadcdc173 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1127,18 +1127,27 @@ class Analyzer( !catalog.isInstanceOf[ViewCatalog]) { throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) } - val tableResolved: Option[LogicalPlan] = CatalogV2Util.loadTable(catalog, ident).map { - case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) && - v1Table.v1Table.tableType == CatalogTableType.VIEW => - val v1Ident = v1Table.catalogTable.identifier - val v2Ident = Identifier.of(v1Ident.database.toArray, v1Ident.identifier) - ResolvedPersistentView( - catalog, v2Ident, v1Table.catalogTable) - case t: MetadataOnlyTable if t.getTableInfo.isInstanceOf[ViewInfo] => - val catalogTable = V1Table.toCatalogTable(catalog, ident, t) - ResolvedPersistentView(catalog, ident, catalogTable) - case table => - ResolvedTable.create(catalog.asTableCatalog, ident, table) + // Skip the table-side lookup entirely for view-only catalogs (no `TableCatalog` mixin): + // `CatalogV2Util.loadTable` would call `asTableCatalog` and throw + // MISSING_CATALOG_ABILITY.TABLES, masking the legitimate view-resolution path. + val tableResolved: Option[LogicalPlan] = if ( + CatalogV2Util.isSessionCatalog(catalog) || catalog.isInstanceOf[TableCatalog] + ) { + CatalogV2Util.loadTable(catalog, ident).map { + case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) && + v1Table.v1Table.tableType == CatalogTableType.VIEW => + val v1Ident = v1Table.catalogTable.identifier + val v2Ident = Identifier.of(v1Ident.database.toArray, v1Ident.identifier) + ResolvedPersistentView( + catalog, v2Ident, v1Table.catalogTable) + case t: MetadataOnlyTable if t.getTableInfo.isInstanceOf[ViewInfo] => + val catalogTable = V1Table.toCatalogTable(catalog, ident, t) + ResolvedPersistentView(catalog, ident, catalogTable) + case table => + ResolvedTable.create(catalog.asTableCatalog, ident, table) + } + } else { + None } tableResolved.orElse { catalog match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index 6913737b6c709..a09639d902a1a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -40,7 +40,8 @@ import org.apache.spark.sql.connector.catalog.{ MetadataOnlyTable, Table, V1Table, - V2TableWithV1Fallback + V2TableWithV1Fallback, + ViewCatalog } import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors} @@ -229,11 +230,39 @@ class RelationResolution( .orElse { val writePrivileges = u.options.get(UnresolvedRelation.REQUIRED_WRITE_PRIVILEGES) val finalOptions = u.clearWritePrivileges.options - val table = CatalogV2Util.loadTable( - catalog, - ident, - finalTimeTravelSpec, - Option(writePrivileges)) + // Skip the table-side lookup entirely for view-only catalogs (no `TableCatalog` mixin): + // `CatalogV2Util.loadTable` would call `asTableCatalog` and throw + // MISSING_CATALOG_ABILITY.TABLES, masking the legitimate view-resolution path. A pure + // `ViewCatalog`'s view is resolved below via the `loadView` fallback. + val table: Option[Table] = if ( + CatalogV2Util.isSessionCatalog(catalog) || catalog.isInstanceOf[TableCatalog] + ) { + CatalogV2Util.loadTable( + catalog, + ident, + finalTimeTravelSpec, + Option(writePrivileges)) + } else { + None + } + // Fallback to ViewCatalog for catalogs that host views but where loadTable returned + // None (or was skipped because there's no TableCatalog mixin). Time-travel / write + // privileges only apply to tables, not views, so the fallback is gated on neither. + val tableOrView: Option[Table] = table.orElse { + if (finalTimeTravelSpec.isEmpty && writePrivileges == null) { + catalog match { + case vc: ViewCatalog => + try { + Some(new MetadataOnlyTable(vc.loadView(ident), ident.toString)) + } catch { + case _: NoSuchViewException => None + } + case _ => None + } + } else { + None + } + } val sharedRelationCacheMatch = for { t <- table @@ -251,7 +280,7 @@ class RelationResolution( val loaded = createRelation( catalog, ident, - table, + tableOrView, finalOptions, u.isStreaming, finalTimeTravelSpec) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 271ec88675a13..2343092e3a955 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -29,7 +29,9 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap /** * Tests for the view side of [[MetadataOnlyTable]]: view-text expansion on read, and * CREATE VIEW / ALTER VIEW ... AS going through the v2 write path - * (`CreateV2ViewExec` / `AlterV2ViewExec` and their atomic staging variants). + * (`CreateV2ViewExec` / `AlterV2ViewExec`). View writes route through + * [[ViewCatalog#createView]] / [[ViewCatalog#replaceView]]; there is no separate staging + * variant for views (the `StagingTableCatalog` `stage*` API is table-only). * Data-source-table read paths live in * [[org.apache.spark.sql.connector.DataSourceV2MetadataOnlyTableSuite]]. * @@ -297,35 +299,38 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio // --- CREATE VIEW on a StagingTableCatalog ------------------------------- - test("CREATE VIEW on a StagingTableCatalog uses the atomic exec") { + // The view exec routes everything through `ViewCatalog.createView` / `replaceView` regardless + // of whether the catalog also implements `StagingTableCatalog` -- views have no separate + // staging variant. These tests just confirm the view CRUD still works on a catalog that + // happens to mix in `StagingTableCatalog`; they do NOT exercise `stageCreate` / + // `stageCreateOrReplace` (which are table-only paths). + test("CREATE VIEW on a mixed StagingTableCatalog + ViewCatalog routes through createView") { withSQLConf( "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { withTable("spark_catalog.default.t") { Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - // Plain CREATE -- exercises stageCreate. sql("CREATE VIEW staging_catalog.default.v_atomic AS " + "SELECT x FROM spark_catalog.default.t WHERE x > 1") checkAnswer( spark.table("staging_catalog.default.v_atomic"), Seq(Row(2), Row(3))) - // Second CREATE without IF NOT EXISTS -- should surface viewAlreadyExistsError - // (TestingStagingCatalog's stageCreate throws TableAlreadyExistsException, which the - // exec wraps). + // Second CREATE without IF NOT EXISTS surfaces the viewAlreadyExists error from + // ViewCatalog.createView. val ex = intercept[AnalysisException] { sql("CREATE VIEW staging_catalog.default.v_atomic AS " + "SELECT x FROM spark_catalog.default.t WHERE x > 1") } assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("already exists")) - // CREATE OR REPLACE -- exercises stageCreateOrReplace. + // CREATE OR REPLACE routes through ViewCatalog.replaceView. sql("CREATE OR REPLACE VIEW staging_catalog.default.v_atomic AS " + "SELECT x FROM spark_catalog.default.t WHERE x > 2") checkAnswer(spark.table("staging_catalog.default.v_atomic"), Row(3)) - // CREATE IF NOT EXISTS on an existing view -- no-op; the atomic exec short-circuits on - // tryLoadTable() before buildViewInfo, matching the non-atomic path. + // CREATE IF NOT EXISTS on an existing view -- no-op; the exec short-circuits on + // viewExists before buildViewInfo. sql("CREATE VIEW IF NOT EXISTS staging_catalog.default.v_atomic AS " + "SELECT x + 100 AS x FROM spark_catalog.default.t") // Value unchanged -- IF NOT EXISTS was a no-op. @@ -334,7 +339,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } - test("CREATE VIEW over a non-view table entry is rejected (StagingTableCatalog)") { + test("CREATE VIEW over a non-view table entry is rejected (mixed StagingTableCatalog)") { withSQLConf( "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { val stagingCatalog = spark.sessionState.catalogManager.catalog("staging_catalog") @@ -349,9 +354,9 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio withTable("spark_catalog.default.t") { Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - // CREATE OR REPLACE VIEW must not silently destroy a non-view table. On a staging - // catalog this specifically guards against `stageCreateOrReplace` committing over - // the table. + // CREATE OR REPLACE VIEW must not silently destroy a non-view table. The exec's + // `rejectIfTable` short-circuits before any view-write call (no `stage*` involved -- + // views are written via `ViewCatalog.replaceView`, not the staging API). val replaceEx = intercept[AnalysisException] { sql("CREATE OR REPLACE VIEW staging_catalog.default.v_existing_table AS " + "SELECT x FROM spark_catalog.default.t") @@ -556,7 +561,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } - test("ALTER VIEW on a StagingTableCatalog uses the atomic exec (stageReplace)") { + test("ALTER VIEW on a mixed StagingTableCatalog + ViewCatalog routes through replaceView") { withSQLConf( "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { withTable("spark_catalog.default.t") { @@ -591,6 +596,37 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } + // --- Pure ViewCatalog (no TableCatalog mixin) --------------------------- + + test("read view from a pure ViewCatalog (no TableCatalog mixin)") { + // The analyzer's table-side lookup must skip `loadTable` entirely for catalogs that don't + // implement `TableCatalog`; otherwise `asTableCatalog` would throw + // MISSING_CATALOG_ABILITY.TABLES and the legitimate `loadView` fallback would never run. + withSQLConf( + "spark.sql.catalog.view_only" -> classOf[TestingViewOnlyCatalog].getName) { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + // The fixture stores a `pure_v` view whose body filters spark_catalog.default.t. + checkAnswer(spark.table("view_only.default.pure_v"), Seq(Row(2), Row(3))) + } + } + } + + test("ALTER VIEW on a pure ViewCatalog (no TableCatalog mixin)") { + withSQLConf( + "spark.sql.catalog.view_only" -> classOf[TestingViewOnlyCatalog].getName) { + val catalog = spark.sessionState.catalogManager.catalog("view_only") + .asInstanceOf[TestingViewOnlyCatalog] + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("ALTER VIEW view_only.default.pure_v AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 2") + assert(catalog.loadView(Identifier.of(Array("default"), "pure_v")).queryText() == + "SELECT x FROM spark_catalog.default.t WHERE x > 2") + } + } + } + test("cyclic detection distinguishes views across multi-level namespaces") { withTable("spark_catalog.default.t") { Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") @@ -1158,22 +1194,15 @@ class TestingStagingCatalog extends StagingTableCatalog with ViewCatalog { new MetadataOnlyTable(info, ident.toString) } - override def stageCreate(ident: Identifier, info: TableInfo): StagedTable = { - if (views.containsKey(keyOf(ident))) throw new TableAlreadyExistsException(ident) - new RecordingStagedTable( - info, ident.toString, () => views.put(keyOf(ident), info), () => ()) - } - - override def stageReplace(ident: Identifier, info: TableInfo): StagedTable = { - if (!views.containsKey(keyOf(ident))) throw new NoSuchTableException(ident) - new RecordingStagedTable( - info, ident.toString, () => views.put(keyOf(ident), info), () => ()) - } - - override def stageCreateOrReplace(ident: Identifier, info: TableInfo): StagedTable = { - new RecordingStagedTable( - info, ident.toString, () => views.put(keyOf(ident), info), () => ()) - } + // Staging methods are required by `StagingTableCatalog` but should never be invoked by view + // DDL (views write through `ViewCatalog.createView` / `replaceView`, not the staging API). + // Throwing here turns any accidental routing into a clear test failure. + override def stageCreate(ident: Identifier, info: TableInfo): StagedTable = + throw new RuntimeException("stageCreate must not be invoked by view DDL") + override def stageReplace(ident: Identifier, info: TableInfo): StagedTable = + throw new RuntimeException("stageReplace must not be invoked by view DDL") + override def stageCreateOrReplace(ident: Identifier, info: TableInfo): StagedTable = + throw new RuntimeException("stageCreateOrReplace must not be invoked by view DDL") override def alterTable(ident: Identifier, changes: TableChange*): Table = throw new RuntimeException("shouldn't be called") @@ -1233,15 +1262,6 @@ class TestingStagingCatalog extends StagingTableCatalog with ViewCatalog { override def name(): String = catalogName } -private class RecordingStagedTable( - info: TableInfo, - name: String, - onCommit: () => Unit, - onAbort: () => Unit) extends MetadataOnlyTable(info, name) with StagedTable { - override def commitStagedChanges(): Unit = onCommit() - override def abortStagedChanges(): Unit = onAbort() -} - /** * A v2 catalog that does not implement ViewCatalog. Used by capability-gate tests. The * gate actually fires in `Analyzer.lookupTableOrView(viewOnly=true)` for ALTER VIEW and in @@ -1274,3 +1294,68 @@ class TestingTableOnlyCatalog extends TableCatalog { } override def name(): String = catalogName } + +/** + * A pure [[ViewCatalog]] (no [[TableCatalog]] mixin). Used to exercise that the analyzer's + * resolution paths skip the `loadTable` step and fall through to `loadView` for catalogs that + * cannot host tables. Pre-seeds a single mutable view at `default.pure_v` so the read and + * ALTER VIEW tests can both reach it. + */ +class TestingViewOnlyCatalog extends ViewCatalog { + private val store = + new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), ViewInfo]() + + // Seeded on first `initialize`. Filters `spark_catalog.default.t` so the read test can + // assert deterministic output. ALTER VIEW tests overwrite it via `replaceView`. + private def seedDefault(): Unit = { + val key = (Seq("default"), "pure_v") + if (!store.containsKey(key)) { + val info = new ViewInfo.Builder() + .withSchema(new StructType().add("x", "int")) + .withQueryText("SELECT x FROM spark_catalog.default.t WHERE x > 1") + .build() + store.put(key, info) + } + } + + override def listViews(namespace: Array[String]): Array[Identifier] = { + val target = namespace.toSeq + val ids = new java.util.ArrayList[Identifier]() + store.forEach { (key, _) => + if (key._1 == target) ids.add(Identifier.of(key._1.toArray, key._2)) + } + ids.toArray(new Array[Identifier](0)) + } + + override def loadView(ident: Identifier): ViewInfo = { + val key = (ident.namespace().toSeq, ident.name()) + Option(store.get(key)).getOrElse(throw new NoSuchViewException(ident)) + } + + override def createView(ident: Identifier, info: ViewInfo): ViewInfo = { + val key = (ident.namespace().toSeq, ident.name()) + if (store.putIfAbsent(key, info) != null) { + throw new ViewAlreadyExistsException(ident) + } + info + } + + override def replaceView(ident: Identifier, info: ViewInfo): ViewInfo = { + val key = (ident.namespace().toSeq, ident.name()) + if (!store.containsKey(key)) throw new NoSuchViewException(ident) + store.put(key, info) + info + } + + override def dropView(ident: Identifier): Boolean = { + val key = (ident.namespace().toSeq, ident.name()) + store.remove(key) != null + } + + private var catalogName = "" + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + catalogName = name + seedDefault() + } + override def name(): String = catalogName +} From 6bbb3c986352320c59fe07ca30b4c54c1bd7caf2 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 25 Apr 2026 05:45:08 +0000 Subject: [PATCH 37/59] address self-review findings: v1-parity for CREATE VIEW IF NOT EXISTS over a non-view table; align SHOW TABLES test with new listTables contract; defensive null check on MetadataOnlyTable - CreateV2ViewExec.run: probe both viewExists and tableExists up front. CREATE VIEW IF NOT EXISTS over a non-view table is now a no-op (v1 parity: see SQLViewSuite "existing a table with the duplicate name when CREATE VIEW IF NOT EXISTS"); the previous code called rejectIfTable() unconditionally before the allowExisting check and threw TABLE_OR_VIEW_ALREADY_EXISTS for what should be a no-op. Non-IF-NOT-EXISTS CREATE / OR REPLACE still surfaces the dedicated EXPECT_VIEW_NOT_TABLE / TABLE_OR_VIEW_ALREADY_EXISTS error. Drop the now-unused rejectIfTable / replaceArg trait helpers (and the AlterV2ViewExec override). - DataSourceV2MetadataOnlyViewSuite: rename "SHOW TABLES on a v2 catalog includes views (v1 parity)" to "SHOW TABLES on a v2 catalog returns only tables" and flip the assertion. The new TableCatalog.listTables contract excludes views (per the file Javadoc); the previous test name + body asserted v1-parity which the implementation does not provide and ShowTablesExec is not changed by this PR. Documents the intentional v2 divergence. - MetadataOnlyTable: Objects.requireNonNull on `info` and `name` so a connector that constructs the wrapper with nulls fails fast at construction time rather than producing cryptic NPEs in downstream consumers (DescribeTableExec's Name row, DataSourceV2Relation logging). Co-authored-by: Isaac --- .../connector/catalog/MetadataOnlyTable.java | 5 +-- .../datasources/v2/AlterV2ViewExec.scala | 3 -- .../datasources/v2/CreateV2ViewExec.scala | 31 ++++++++++--------- .../DataSourceV2MetadataOnlyViewSuite.scala | 14 ++++----- 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java index 5044b9f451820..a550ce460d1c8 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -19,6 +19,7 @@ import java.util.Collections; import java.util.Map; +import java.util.Objects; import java.util.Set; import org.apache.spark.annotation.Evolving; @@ -52,8 +53,8 @@ public class MetadataOnlyTable implements Table { * multi-part form used elsewhere for v2 identifiers. */ public MetadataOnlyTable(TableInfo info, String name) { - this.info = info; - this.name = name; + this.info = Objects.requireNonNull(info, "info should not be null"); + this.name = Objects.requireNonNull(name, "name should not be null"); } public TableInfo getTableInfo() { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala index 3da71a5bdbd27..35c84b50bad75 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -85,9 +85,6 @@ private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { .map(CatalogTable.VIEW_SCHEMA_MODE -> _) .toMap) - // ALTER VIEW ... AS is always a replace, never a CREATE. - override protected def replaceArg: Boolean = true - /** * Force-evaluate `existingView` so `NoSuchViewException` / `expectViewNotTableError` * surfaces before any other work (e.g. `buildViewInfo`, uncache, replace). The result is diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala index 09eb30ed0b88c..b944332825d24 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -111,18 +111,6 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { protected def viewAlreadyExists(): Throwable = QueryCompilationErrors.viewAlreadyExistsError(fullNameParts) - - // For mixed catalogs (also TableCatalog), reject if the identifier names a non-view table: - // CREATE VIEW must not silently destroy a table's data, and CREATE OR REPLACE VIEW must not - // either. - protected def rejectIfTable(): Unit = catalog match { - case tc: TableCatalog if tc.tableExists(identifier) => - throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( - fullNameParts, replaceArg) - case _ => - } - - protected def replaceArg: Boolean } /** @@ -145,18 +133,31 @@ case class CreateV2ViewExec( viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { override def owner: Option[String] = Some(CurrentUserContext.getCurrentUser) - override protected def replaceArg: Boolean = replace override protected def run(): Seq[InternalRow] = { // Probe before preparing the view body so `IF NOT EXISTS` and the type-collision check can // short-circuit before running `aliasPlan` / config capture (matches v1 // `CreateViewCommand.run`). Cyclic-reference detection runs at analysis time in // `CheckViewReferences`. + // + // For mixed catalogs (also implementing `TableCatalog`), also probe `tableExists` so: + // * `CREATE VIEW IF NOT EXISTS` over a non-view table is a no-op (v1 parity, see + // `SQLViewSuite` "existing a table with the duplicate name when CREATE VIEW IF NOT + // EXISTS"), and + // * a non-IF-NOT-EXISTS CREATE / OR REPLACE surfaces the dedicated + // `EXPECT_VIEW_NOT_TABLE` / `TABLE_OR_VIEW_ALREADY_EXISTS` error before any view write. val viewExists = catalog.viewExists(identifier) - if (allowExisting && viewExists) { + val tableExists = catalog match { + case tc: TableCatalog => tc.tableExists(identifier) + case _ => false + } + if (allowExisting && (viewExists || tableExists)) { return Seq.empty } - rejectIfTable() + if (tableExists) { + throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( + fullNameParts, replace) + } if (viewExists && !replace) throw viewAlreadyExists() val info = buildViewInfo() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 2343092e3a955..863793fafef58 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -927,18 +927,18 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio .build()) } - test("SHOW TABLES on a v2 catalog includes views (v1 parity)") { - // v1 SHOW TABLES returns both tables and views; the `isTemporary` column distinguishes - // temp views from everything else. v2 catalogs have no temp views, so `isTemporary` is - // always false -- tables and permanent views are indistinguishable at the row level, but - // both must appear (callers that want only tables should use listTableSummaries and - // filter). + test("SHOW TABLES on a v2 catalog returns only tables") { + // Per the new `TableCatalog.listTables` contract, SHOW TABLES returns table identifiers + // only -- views (in mixed catalogs) are listed via SHOW VIEWS / `ViewCatalog.listViews`. + // This is an intentional divergence from v1 SHOW TABLES (which includes both tables and + // views in a single listing); v2 catalogs separate the two so callers can target either + // kind without filtering. seedV2View("v_in_show_tables") seedV2Table("t_in_show_tables") val rows = sql("SHOW TABLES IN view_catalog.default").collect() val names = rows.map(_.getString(1)).toSet - assert(names.contains("v_in_show_tables"), s"view missing from SHOW TABLES: $names") assert(names.contains("t_in_show_tables"), s"table missing from SHOW TABLES: $names") + assert(!names.contains("v_in_show_tables"), s"view leaked into SHOW TABLES: $names") rows.foreach(r => assert(!r.getBoolean(2), s"isTemporary must be false: $r")) } From a088c5c8fc7768396e4553fa07fea4e0f9e86549 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 25 Apr 2026 05:56:21 +0000 Subject: [PATCH 38/59] address self-review findings: minor Javadoc wording - ViewInfo class doc: complete the dangling "construct." sentence with its direct object ("construct a ViewInfo") so the line reads as a complete thought. - TableInfo Builder: replace the awkward use of "write" as a noun ("discards the convenience setter's write") with verb form ("discards the value the convenience setter wrote"). Co-authored-by: Isaac --- .../org/apache/spark/sql/connector/catalog/TableInfo.java | 4 ++-- .../java/org/apache/spark/sql/connector/catalog/ViewInfo.java | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java index 1ae04a3d3fab2..89709c9f1c2f0 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java @@ -115,8 +115,8 @@ public B withConstraints(Constraint[] constraints) { // Convenience setters below write reserved keys into the current `properties` map. Pair // each with a preceding `withProperties(...)` call if you want to start from a user map; - // calling `withProperties` after a convenience setter discards the convenience setter's - // write. + // calling `withProperties` after a convenience setter discards the value the convenience + // setter wrote. /** Writes {@link TableCatalog#PROP_PROVIDER} into the current properties map. */ public B withProvider(String provider) { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java index 4855176977d4f..1406255519420 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java @@ -35,7 +35,8 @@ * {@link TableCatalog} and {@link ViewCatalog}) can opt into the perf optimization of returning * a {@link MetadataOnlyTable} wrapping a {@code ViewInfo} from {@link TableCatalog#loadTable} * for a view identifier. Pure {@link ViewCatalog} implementations never see {@code TableInfo}; - * the typed setters on {@link Builder} cover everything they need to construct. + * the typed setters on {@link Builder} cover everything they need to construct a + * {@code ViewInfo}. * * @since 4.2.0 */ From 67e5890e6a28c63278ccbefcca886246621ff8b6 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 25 Apr 2026 09:57:05 +0000 Subject: [PATCH 39/59] fix RelationResolution import: TableCatalog missing after pure-ViewCatalog gating Commit 66fa409b177 added `catalog.isInstanceOf[TableCatalog]` to RelationResolution.tryResolvePersistent's gating but didn't add TableCatalog to the explicit-list import block; CI failed at catalyst compile with `not found: type TableCatalog`. Add the import. Co-authored-by: Isaac --- .../apache/spark/sql/catalyst/analysis/RelationResolution.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index a09639d902a1a..78101317566da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -39,6 +39,7 @@ import org.apache.spark.sql.connector.catalog.{ LookupCatalog, MetadataOnlyTable, Table, + TableCatalog, V1Table, V2TableWithV1Fallback, ViewCatalog From 93241b5bb6e91de9c5f72e74763a5568e0224f27 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 25 Apr 2026 11:47:57 +0000 Subject: [PATCH 40/59] remove unused CatalogV2Util imports flagged by -Wconf unused-imports After the SUPPORTS_VIEW removal, views.scala and DataSourceV2Strategy.scala no longer reference CatalogV2Util. Scala's fatal unused-imports warning (Wconf cat=unused-imports) blocks the build. Drop the now-dead import in both files. Co-authored-by: Isaac --- .../scala/org/apache/spark/sql/execution/command/views.scala | 2 +- .../sql/execution/datasources/v2/DataSourceV2Strategy.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index c721eb6aaabd8..994c7836f9dd1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, SubqueryExpr import org.apache.spark.sql.catalyst.plans.logical.{AlterViewAs, AnalysisOnlyCommand, CreateTempView, CreateView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.classic.ClassicConversions.castToImpl -import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, ViewCatalog} +import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, Identifier, ViewCatalog} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{IdentifierHelper, NamespaceHelper} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index d047ce385d118..1fd13bb35d223 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreePattern.SCALAR_SUBQUERY import org.apache.spark.sql.catalyst.util.{toPrettySQL, GeneratedColumn, IdentityColumn, ResolveDefaultColumns, ResolveTableConstraints, V2ExpressionBuilder} import org.apache.spark.sql.classic.SparkSession -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TruncatableTable, V1Table, ViewCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TruncatableTable, V1Table, ViewCatalog} import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.connector.catalog.index.SupportsIndex import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue} From d830eba61a499987c3ff46be3542f584e690082b Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 25 Apr 2026 16:32:38 +0000 Subject: [PATCH 41/59] fix: scalastyle import order; tests must use createView/viewExists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ShowViewsExec: import order — CatalogV2Implicits.NamespaceHelper must precede ViewCatalog (alphabetic comparison after the shared `connector.catalog.` prefix). Caught by `sql/scalastyle`. - DataSourceV2MetadataOnlyViewSuite (3 test failures from CI): * "ALTER VIEW preserves PROP_OWNER (v1-parity)": pre-seeded the view via `catalog.createTable(viewIdent, viewInfo)`. After the rework, TestingViewCatalog.createTable rejects ViewInfo (the new mixed- catalog contract: views go through ViewCatalog.createView). Use `createView` instead. * "DROP VIEW on a ViewCatalog drops the view" / "DROP VIEW on a StagingTableCatalog drops the view": asserted catalog.tableExists(viewIdent) before/after DROP, but tableExists now returns false for view-typed entries by design (the table side ignores views). Use `viewExists` instead. Co-authored-by: Isaac --- .../sql/execution/datasources/v2/ShowViewsExec.scala | 2 +- .../connector/DataSourceV2MetadataOnlyViewSuite.scala | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala index 86b5b968833d9..00927f05842ad 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala @@ -22,8 +22,8 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.util.StringUtils -import org.apache.spark.sql.connector.catalog.ViewCatalog import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper +import org.apache.spark.sql.connector.catalog.ViewCatalog import org.apache.spark.sql.execution.LeafExecNode /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 863793fafef58..e864a349cfe49 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -495,7 +495,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio .withCurrentCatalog("spark_catalog") .withCurrentNamespace(Array("default")) .build() - catalog.createTable(viewIdent, initialInfo) + catalog.createView(viewIdent, initialInfo) try { withTable("spark_catalog.default.t") { Seq(2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") @@ -850,9 +850,9 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") sql("CREATE VIEW view_catalog.default.v_drop AS " + "SELECT x FROM spark_catalog.default.t") - assert(catalog.tableExists(Identifier.of(Array("default"), "v_drop"))) + assert(catalog.viewExists(Identifier.of(Array("default"), "v_drop"))) sql("DROP VIEW view_catalog.default.v_drop") - assert(!catalog.tableExists(Identifier.of(Array("default"), "v_drop"))) + assert(!catalog.viewExists(Identifier.of(Array("default"), "v_drop"))) } } @@ -896,9 +896,9 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") sql("CREATE VIEW staging_catalog.default.v_drop_atomic AS " + "SELECT x FROM spark_catalog.default.t") - assert(catalog.tableExists(Identifier.of(Array("default"), "v_drop_atomic"))) + assert(catalog.viewExists(Identifier.of(Array("default"), "v_drop_atomic"))) sql("DROP VIEW staging_catalog.default.v_drop_atomic") - assert(!catalog.tableExists(Identifier.of(Array("default"), "v_drop_atomic"))) + assert(!catalog.viewExists(Identifier.of(Array("default"), "v_drop_atomic"))) } } } From f0f6e46b0e4116e87e04e44d8eb72c311f1f3e24 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sun, 26 Apr 2026 10:01:53 +0000 Subject: [PATCH 42/59] DropTableExec: dropTable-first + viewExists fallback for EXPECT_TABLE_NOT_VIEW Two changes, symmetric to the existing DropViewExec: 1. Issue dropTable / purgeTable directly and inspect its boolean return, instead of probing tableExists upfront and ignoring dropTable's value. Cuts the happy-path RPC count from 2 (tableExists + dropTable) to 1. IF EXISTS semantics are unchanged: false return + ifExists=true -> no-op, false return + ifExists=false -> noSuchTableError. 2. On false return, if the catalog also implements ViewCatalog and a view sits at the ident, throw EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE instead of the generic noSuchTable error. Restores v1 DropTableCommand(isView=false) parity for mixed catalogs (which in the new design return tableExists=false for view idents). The viewExists probe runs only on the slow path (would-be "not found"), so happy-path catalogs see no extra RPC. invalidateCache now runs after the drop succeeds rather than before; matches DropViewExec's ordering, and the cache invalidation only mattered when the drop succeeded anyway. Co-authored-by: Isaac --- .../datasources/v2/DropTableExec.scala | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala index c94af4e3dceb3..18f5566a8f468 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala @@ -19,12 +19,19 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, ViewCatalog} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.util.ArrayImplicits._ /** * Physical plan node for dropping a table. + * + * Issues `dropTable` (or `purgeTable`) directly and inspects its return; this saves the + * upfront `tableExists` probe (1 RPC on the happy path). On `false`, falls back to + * `viewExists` for catalogs that also implement [[ViewCatalog]] -- distinguishes + * "wrong type" from "missing" so a `DROP TABLE someView` on a mixed catalog surfaces the + * dedicated `EXPECT_TABLE_NOT_VIEW` error rather than a generic "table not found", + * matching the v1 `DropTableCommand(isView = false)` behavior. */ case class DropTableExec( catalog: TableCatalog, @@ -34,12 +41,22 @@ case class DropTableExec( invalidateCache: () => Unit) extends LeafV2CommandExec { override def run(): Seq[InternalRow] = { - if (catalog.tableExists(ident)) { - invalidateCache() + val dropped = if (purge) catalog.purgeTable(ident) else catalog.dropTable(ident) - } else if (!ifExists) { - val nameParts = (catalog.name() +: ident.namespace() :+ ident.name()).toImmutableArraySeq - throw QueryCompilationErrors.noSuchTableError(nameParts) + if (dropped) { + invalidateCache() + } else { + val nameParts = + (catalog.name() +: ident.namespace() :+ ident.name()).toImmutableArraySeq + catalog match { + case vc: ViewCatalog if vc.viewExists(ident) => + throw QueryCompilationErrors.expectTableNotViewError( + nameParts, cmd = "DROP TABLE", suggestAlternative = false, t = this) + case _ if !ifExists => + throw QueryCompilationErrors.noSuchTableError(nameParts) + case _ => + // IF EXISTS: no-op. + } } Seq.empty From f894e3c8e9d79d13399ce8b6f078779346676492 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sun, 26 Apr 2026 10:09:22 +0000 Subject: [PATCH 43/59] ViewCatalog.createOrReplaceView for CREATE OR REPLACE VIEW (single-RPC opt-in) Add a default `createOrReplaceView(ident, info)` method to ViewCatalog. The default implementation tries `replaceView`, falling back to `createView` on NoSuchViewException -- non-atomic across the two calls, but keeps plugin authors free of the new method (override only when single-RPC atomic upsert is worth the work, e.g. catalogs backed by `INSERT ... ON CONFLICT DO UPDATE` or equivalent). CreateV2ViewExec rework: * Drop the upfront viewExists + tableExists probes. Just call createView / createOrReplaceView and decode the catalog's response. * Happy-path RPCs: - CREATE VIEW (no collision): 1 (was 3) - CREATE OR REPLACE VIEW (any state): 1 (was 3, plus the catch-and-fallback) - CREATE VIEW IF NOT EXISTS over an existing view: 1 (early viewExists short-circuit kept; matches v1 `CreateViewCommand.run` behavior of skipping aliasPlan / config capture in the no-op path). * Cross-type collision (mixed catalog with a table at ident): the catalog's ViewAlreadyExistsException is caught, then a single tableExists check decodes the situation. Plain CREATE / OR REPLACE throws EXPECT_VIEW_NOT_TABLE; IF NOT EXISTS is a no-op (v1 parity). Co-authored-by: Isaac --- .../sql/connector/catalog/ViewCatalog.java | 34 +++++++- .../datasources/v2/CreateV2ViewExec.scala | 78 ++++++++----------- 2 files changed, 64 insertions(+), 48 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java index 3731f7a6b28d6..14bf43c3499d3 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java @@ -134,10 +134,9 @@ ViewInfo createView(Identifier ident, ViewInfo info) /** * Atomically replace an existing view's metadata. *

- * Used by {@code ALTER VIEW ... AS} and as the replace branch of {@code CREATE OR REPLACE - * VIEW}. Implementations should commit the new metadata atomically; views carry no data, so a - * single transactional metastore call (or equivalent) is sufficient -- there is no separate - * staging API. + * Used by {@code ALTER VIEW ... AS}. Implementations should commit the new metadata + * atomically; views carry no data, so a single transactional metastore call (or equivalent) + * is sufficient -- there is no separate staging API. * * @param ident the view identifier * @param info the new view metadata @@ -147,6 +146,33 @@ ViewInfo createView(Identifier ident, ViewInfo info) */ ViewInfo replaceView(Identifier ident, ViewInfo info) throws NoSuchViewException; + /** + * Create a view if one does not exist at {@code ident}, or atomically replace it if one does. + *

+ * Used by {@code CREATE OR REPLACE VIEW}. The default implementation calls + * {@link #replaceView}, falling back to {@link #createView} on + * {@link NoSuchViewException}. The fallback is non-atomic across the two calls (a concurrent + * drop or create can race), so catalogs that can answer the upsert in a single transactional + * call should override this method to collapse to one RPC and to make the swap atomic. + *

+ * In mixed catalogs, must throw {@link ViewAlreadyExistsException} if {@code ident} resolves + * to a non-view table (cross-type collision is rejected; the table is not touched). + * + * @param ident the view identifier + * @param info the view metadata + * @return the metadata of the created or replaced view; may equal {@code info} + * @throws ViewAlreadyExistsException if {@code ident} resolves to a table in a mixed catalog + * @throws NoSuchNamespaceException if the identifier's namespace does not exist (optional) + */ + default ViewInfo createOrReplaceView(Identifier ident, ViewInfo info) + throws ViewAlreadyExistsException, NoSuchNamespaceException { + try { + return replaceView(ident, info); + } catch (NoSuchViewException e) { + return createView(ident, info); + } + } + /** * Drop a view. *

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala index b944332825d24..6cfa95a2eaf43 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -21,7 +21,7 @@ import scala.jdk.CollectionConverters._ import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.{CurrentUserContext, InternalRow} -import org.apache.spark.sql.catalyst.analysis.{NoSuchViewException, ResolvedIdentifier, SchemaEvolution, ViewAlreadyExistsException, ViewSchemaMode} +import org.apache.spark.sql.catalyst.analysis.{ResolvedIdentifier, SchemaEvolution, ViewAlreadyExistsException, ViewSchemaMode} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.util.CharVarcharUtils @@ -115,9 +115,9 @@ private[v2] trait V2ViewPreparation extends LeafV2CommandExec { /** * Physical plan node for CREATE VIEW on a v2 [[ViewCatalog]]. Dispatches to - * [[ViewCatalog#createView]] for plain CREATE / `IF NOT EXISTS`, and to - * [[ViewCatalog#replaceView]] for `OR REPLACE`. CREATE OR REPLACE on a non-existent view falls - * back to `createView`. + * [[ViewCatalog#createView]] for plain CREATE, [[ViewCatalog#createOrReplaceView]] for + * `OR REPLACE`, and short-circuits `IF NOT EXISTS` early via [[ViewCatalog#viewExists]] so + * the view body isn't analyzed when the view already exists. */ case class CreateV2ViewExec( catalog: ViewCatalog, @@ -135,50 +135,40 @@ case class CreateV2ViewExec( override def owner: Option[String] = Some(CurrentUserContext.getCurrentUser) override protected def run(): Seq[InternalRow] = { - // Probe before preparing the view body so `IF NOT EXISTS` and the type-collision check can - // short-circuit before running `aliasPlan` / config capture (matches v1 - // `CreateViewCommand.run`). Cyclic-reference detection runs at analysis time in - // `CheckViewReferences`. - // - // For mixed catalogs (also implementing `TableCatalog`), also probe `tableExists` so: - // * `CREATE VIEW IF NOT EXISTS` over a non-view table is a no-op (v1 parity, see - // `SQLViewSuite` "existing a table with the duplicate name when CREATE VIEW IF NOT - // EXISTS"), and - // * a non-IF-NOT-EXISTS CREATE / OR REPLACE surfaces the dedicated - // `EXPECT_VIEW_NOT_TABLE` / `TABLE_OR_VIEW_ALREADY_EXISTS` error before any view write. - val viewExists = catalog.viewExists(identifier) - val tableExists = catalog match { - case tc: TableCatalog => tc.tableExists(identifier) - case _ => false - } - if (allowExisting && (viewExists || tableExists)) { - return Seq.empty - } - if (tableExists) { - throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( - fullNameParts, replace) - } - if (viewExists && !replace) throw viewAlreadyExists() + // CREATE VIEW IF NOT EXISTS: short-circuit before `buildViewInfo` if a view already sits + // at the ident -- avoids `aliasPlan` / config capture for the common no-op case (matches + // v1 `CreateViewCommand.run`). The mixed-catalog "table at ident" no-op is handled in the + // catch block below; that case is rare enough that paying for `buildViewInfo` is fine. + if (allowExisting && catalog.viewExists(identifier)) return Seq.empty val info = buildViewInfo() - if (replace && viewExists) { - // CREATE OR REPLACE on an existing view: replaceView is the single atomic-swap call. - CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) - try { - catalog.replaceView(identifier, info) - } catch { - case _: NoSuchViewException => - // Race: the view disappeared between the existence probe and replaceView. Fall back - // to createView to honor REPLACE-or-create semantics. - catalog.createView(identifier, info) - } - } else { - // Plain CREATE (or CREATE OR REPLACE on a non-existent view). - try { + try { + if (replace) { + CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) + catalog.createOrReplaceView(identifier, info) + } else { catalog.createView(identifier, info) - } catch { - case _: ViewAlreadyExistsException => throw viewAlreadyExists() } + } catch { + case _: ViewAlreadyExistsException => + // Catalog refused: something already occupies the ident. Decode whether it's a table + // (cross-type collision) or a view (race for plain CREATE / OR REPLACE), and emit the + // precise error -- or no-op for IF NOT EXISTS. + val isTable = catalog match { + case tc: TableCatalog => tc.tableExists(identifier) + case _ => false + } + if (isTable) { + if (!allowExisting) { + throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( + fullNameParts, replace) + } + // CREATE VIEW IF NOT EXISTS over a table is a no-op (v1 parity). + } else if (!allowExisting) { + throw viewAlreadyExists() + } + // else: a view appeared between our viewExists probe and createView; IF NOT EXISTS + // semantics make this a no-op. } Seq.empty } From 62b26138ec6f9ed5610067fcf71020ab7611f85a Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sun, 26 Apr 2026 10:30:01 +0000 Subject: [PATCH 44/59] address self-review findings: minor Javadoc grammar fix ViewInfo.java:64 -- "remember withTableType(VIEW)" parses as "remember [the noun-form method call]"; add the missing infinitive ("remember to call withTableType(VIEW)") so the sentence reads cleanly. Co-authored-by: Isaac --- .../java/org/apache/spark/sql/connector/catalog/ViewInfo.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java index 1406255519420..27525adc1f367 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java @@ -61,7 +61,7 @@ private ViewInfo(Builder builder) { // Force PROP_TABLE_TYPE = VIEW so that `properties()` reflects the typed ViewInfo // classification. Catalogs and generic viewers reading PROP_TABLE_TYPE from the properties // bag (e.g. TableCatalog.listTableSummaries default impl, DESCRIBE) see "VIEW" without - // requiring authors to remember withTableType(VIEW). + // requiring authors to remember to call withTableType(VIEW). properties().put(TableCatalog.PROP_TABLE_TYPE, TableSummary.VIEW_TABLE_TYPE); } From 834322489ae1347fef60d4f4002ca5531a9deca9 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sun, 26 Apr 2026 10:46:06 +0000 Subject: [PATCH 45/59] address self-review findings: drop "schema mode" from transient-fields list V2AlterViewPreparation scaladoc listed "schema mode" among the transient fields re-captured from the session, contradicting the same scaladoc two lines earlier ("preserve ... schema binding mode") and the implementation at lines 78-86 (viewSchemaMode reads existingView.schemaMode -- preserved, not re-captured). Co-authored-by: Isaac --- .../spark/sql/execution/datasources/v2/AlterV2ViewExec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala index 35c84b50bad75..cb21e773d86c4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -36,7 +36,7 @@ import org.apache.spark.sql.execution.command.CommandUtils * non-view table); in that case we surface a regular no-such-view / not-a-view analysis error * rather than propagating a stale analyzer decision. * - * Transient fields (SQL configs, query column names, schema mode) are re-captured from the + * Transient fields (SQL configs, query column names) are re-captured from the * current session by [[V2ViewPreparation.buildViewInfo]], matching v1 * `AlterViewAsCommand.alterPermanentView`. PROP_OWNER and user TBLPROPERTIES flow through * unchanged. From 08067be9116a252568bb9d4bed34c4edcfddf416 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sun, 26 Apr 2026 11:00:49 +0000 Subject: [PATCH 46/59] address self-review findings: minor Scaladoc grammar fix in DropTableExec --- .../spark/sql/execution/datasources/v2/DropTableExec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala index 18f5566a8f468..831b05149fc5c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala @@ -26,7 +26,7 @@ import org.apache.spark.util.ArrayImplicits._ /** * Physical plan node for dropping a table. * - * Issues `dropTable` (or `purgeTable`) directly and inspects its return; this saves the + * Issues `dropTable` (or `purgeTable`) directly and inspects its return value; this saves the * upfront `tableExists` probe (1 RPC on the happy path). On `false`, falls back to * `viewExists` for catalogs that also implement [[ViewCatalog]] -- distinguishes * "wrong type" from "missing" so a `DROP TABLE someView` on a mixed catalog surfaces the From 903a495e8d709a8d583d51e6c5788feaacf3dd81 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sun, 26 Apr 2026 14:41:51 +0000 Subject: [PATCH 47/59] add RelationCatalog: dedicated interface for catalogs serving both tables and views Reshapes the table-and-view catalog story around an explicit `RelationCatalog extends TableCatalog, ViewCatalog`. The new interface owns all the cross-cutting rules; `TableCatalog` and `ViewCatalog` revert to strict single-kind APIs. Why: in the previous shape, `TableCatalog.loadTable` carried a perf opt-in (return `MetadataOnlyTable(ViewInfo)` for views). That leaked through `tableExists`'s default impl (which calls `loadTable`), silently breaking three independent framework call sites for any mixed catalog that used the opt-in without overriding `tableExists`. The new shape makes the perf opt-in explicit on a dedicated method and removes the leak by construction. API: - New `RelationCatalog` (`@Evolving`, since 4.2.0) with `loadRelation(ident)` and a default `listRelations(namespace)`. Class Javadoc owns the "two principles" (orthogonal interfaces + single identifier namespace) and the per-method cross-type contract tables (active rejection on writes, passive filtering on reads). - `TableCatalog.loadTable` Javadoc tightened: tables only. The "may return MetadataOnlyTable wrapping ViewInfo" clause is gone. `tableExists`'s default impl is now correct under any catalog. - `TableCatalog` and `ViewCatalog` class-level docs read as strict single-kind APIs and point mixed implementers at `RelationCatalog`. Per-method cross-type clauses are stripped from both interfaces. Enforcement: `Catalogs.load` rejects any plugin that implements both `TableCatalog` and `ViewCatalog` directly without `RelationCatalog`, with a clear error message naming the right interface. Resolver: `Analyzer.lookupTableOrView` and `RelationResolution.tryResolvePersistent` check `RelationCatalog` first and call `loadRelation` (one RPC); otherwise fall through to the existing `loadTable` -> `loadView` two-step. Test catalogs: - `TestingViewCatalog` extends `RelationCatalog`; `loadTable` is tightened to tables-only, view fixtures move to `loadRelation`. - `TestingStagingCatalog` extends `StagingTableCatalog with RelationCatalog`. - `TestingTableOnlyCatalog`'s dead view fixture is removed. Co-authored-by: Isaac --- .../connector/catalog/MetadataOnlyTable.java | 16 +- .../connector/catalog/RelationCatalog.java | 160 ++++++++++++++++++ .../sql/connector/catalog/TableCatalog.java | 82 +++------ .../sql/connector/catalog/ViewCatalog.java | 73 ++------ .../spark/sql/connector/catalog/ViewInfo.java | 11 +- .../sql/catalyst/analysis/Analyzer.scala | 92 +++++----- .../analysis/RelationResolution.scala | 85 ++++++---- .../sql/connector/catalog/Catalogs.scala | 18 ++ .../DataSourceV2MetadataOnlyViewSuite.scala | 145 +++++++++------- 9 files changed, 416 insertions(+), 266 deletions(-) create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java index a550ce460d1c8..b20a9b566646f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -33,9 +33,12 @@ * (for views) at read time. *

* Catalogs build the metadata via {@link TableInfo.Builder} (for data-source tables) or - * {@link ViewInfo.Builder} (for views) and wrap the result in a {@code MetadataOnlyTable} to - * return from {@link TableCatalog#loadTable(Identifier)}. Downstream consumers distinguish - * the two by checking {@code getTableInfo() instanceof ViewInfo}. + * {@link ViewInfo.Builder} (for views). A {@code MetadataOnlyTable} wrapping a + * {@link TableInfo} can be returned from {@link TableCatalog#loadTable(Identifier)} for a + * data-source table; a {@code MetadataOnlyTable} wrapping a {@link ViewInfo} can be returned + * from {@link RelationCatalog#loadRelation(Identifier)} as the single-RPC perf opt-in for a view. + * Downstream consumers distinguish the two by checking + * {@code getTableInfo() instanceof ViewInfo}. * * @since 4.2.0 */ @@ -48,9 +51,10 @@ public class MetadataOnlyTable implements Table { * @param info metadata for the table or view. Pass a {@link ViewInfo} for a view. * @param name human-readable name for this table, used by places that read {@link #name()} * (e.g. the {@code Name} row of {@code DESCRIBE TABLE EXTENDED}). Catalogs - * returning a {@code MetadataOnlyTable} from {@link TableCatalog#loadTable} - * should typically pass {@code ident.toString()}, matching the quoted - * multi-part form used elsewhere for v2 identifiers. + * returning a {@code MetadataOnlyTable} from {@link TableCatalog#loadTable} or + * {@link RelationCatalog#loadRelation} should typically pass + * {@code ident.toString()}, matching the quoted multi-part form used elsewhere + * for v2 identifiers. */ public MetadataOnlyTable(TableInfo info, String name) { this.info = Objects.requireNonNull(info, "info should not be null"); diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java new file mode 100644 index 0000000000000..795deb295f7f8 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connector.catalog; + +import java.util.ArrayList; + +import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; + +/** + * Catalog API for connectors that expose both tables and views in a single shared identifier + * namespace. + *

+ * Connectors that expose both tables and views must implement {@code RelationCatalog}; + * implementing {@link TableCatalog} and {@link ViewCatalog} directly without + * {@code RelationCatalog} is rejected at catalog initialization. Connectors that expose only + * tables implement just {@link TableCatalog}; connectors that expose only views implement just + * {@link ViewCatalog}; this interface is not relevant to them. + * + *

Two principles

+ * + * A {@code RelationCatalog} follows two rules that, taken together, define every cross-cutting + * subtlety: + *
    + *
  1. Orthogonal interfaces. Every {@link TableCatalog} method behaves as if views did + * not exist, and every {@link ViewCatalog} method behaves as if tables did not exist. + * From the perspective of a {@code TableCatalog} caller, a view at an identifier is + * indistinguishable from "nothing there"; symmetrically for {@code ViewCatalog} on + * tables. The implementation, of course, knows about both kinds -- it just filters them + * apart at each method boundary.
  2. + *
  3. Single identifier namespace. Tables and views share one keyspace within a + * namespace; the same {@link Identifier} cannot resolve to both at the same time. The + * implementation typically enforces this with a single backing keyspace plus a kind + * discriminator.
  4. + *
+ * + *

Per-method cross-type behavior

+ * + * Active rejection (write-side methods that throw on cross-type collision): + *
+ * + * + * + * + * + * + * + * + * + * + * + * + * + *
Cross-type rejection
MethodRejects whenThrows
{@link TableCatalog#createTable}a view sits at {@code ident}{@link org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException}
{@link TableCatalog#renameTable}a view sits at {@code newIdent}{@link org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException}
{@link ViewCatalog#createView}a table sits at {@code ident}{@link org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException}
{@link ViewCatalog#createOrReplaceView}a table sits at {@code ident}{@link org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException}
{@link ViewCatalog#replaceView}a table sits at {@code ident}{@link org.apache.spark.sql.catalyst.analysis.NoSuchViewException}
+ * + * Passive filtering (read / non-collision mutation methods that behave as if the wrong + * kind doesn't exist): + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Cross-type filtering
MethodOn wrong-kind ident
{@link TableCatalog#loadTable(Identifier)}throws {@code NoSuchTableException} for a view
{@link TableCatalog#loadTable(Identifier, String)} / + * {@link TableCatalog#loadTable(Identifier, long)}throws {@code NoSuchTableException} for a view (no perf opt-in -- time-travel does + * not apply to views)
{@link TableCatalog#tableExists}returns {@code false} for a view
{@link TableCatalog#dropTable} / {@link TableCatalog#purgeTable}returns {@code false} for a view; does not drop it
{@link TableCatalog#renameTable}throws {@code NoSuchTableException} when the source is a view
{@link TableCatalog#listTables}tables only
{@link ViewCatalog#loadView}throws {@code NoSuchViewException} for a table
{@link ViewCatalog#viewExists}returns {@code false} for a table
{@link ViewCatalog#dropView}returns {@code false} for a table; does not drop it
{@link ViewCatalog#listViews}views only
+ * + *

Single-RPC perf entry points

+ * + * The orthogonal {@link TableCatalog} and {@link ViewCatalog} answer two cross-cutting + * questions in two round trips each. {@code RelationCatalog} adds dedicated methods so a + * catalog can answer both in one round trip: + *
    + *
  • {@link #loadRelation(Identifier)} -- the resolver's per-identifier read path. Returns + * a regular {@link Table} for a table, or a {@link MetadataOnlyTable} wrapping a + * {@link ViewInfo} for a view. Saves the {@code loadTable} -> {@code loadView} fallback + * on a cold cache.
  • + *
  • {@link #listRelations(String[])} -- a unified listing of tables and views with the + * kind preserved on each {@link TableSummary}. Default impl performs both + * {@link TableCatalog#listTableSummaries} and {@link ViewCatalog#listViews}; override to + * fetch in one round trip.
  • + *
+ * + * @since 4.2.0 + */ +@Evolving +public interface RelationCatalog extends TableCatalog, ViewCatalog { + + /** + * Load metadata for an identifier that may resolve to either a table or a view. + *

+ * For a table, returns the table's {@link Table}. For a view, returns a + * {@link MetadataOnlyTable} wrapping a {@link ViewInfo}; callers discriminate via + * {@code getTableInfo() instanceof ViewInfo}. This lets the resolver answer in a single RPC + * instead of falling back from {@link TableCatalog#loadTable} to {@link ViewCatalog#loadView}. + * + * @param ident the identifier + * @return a {@link Table} for tables, or a {@link MetadataOnlyTable} wrapping a + * {@link ViewInfo} for views + * @throws NoSuchTableException if neither a table nor a view exists at {@code ident} + */ + Table loadRelation(Identifier ident) throws NoSuchTableException; + + /** + * List the tables and views in a namespace, returned as {@link TableSummary} entries with + * the kind preserved on each summary. + *

+ * The default implementation enumerates via {@link TableCatalog#listTableSummaries} for + * tables and {@link ViewCatalog#listViews} for views (two round trips). Catalogs that can + * fetch the unified listing in a single round trip should override. + * + * @param namespace a multi-part namespace + * @return an array of summaries for both tables and views in the namespace + * @throws NoSuchNamespaceException if the namespace does not exist (optional) + * @throws NoSuchTableException if a table listed by the underlying enumeration disappears + * before its summary can be assembled (default impl only) + */ + default TableSummary[] listRelations(String[] namespace) + throws NoSuchNamespaceException, NoSuchTableException { + TableSummary[] tableSummaries = listTableSummaries(namespace); + Identifier[] viewIdentifiers = listViews(namespace); + ArrayList all = new ArrayList<>( + tableSummaries.length + viewIdentifiers.length); + for (TableSummary s : tableSummaries) { + all.add(s); + } + for (Identifier id : viewIdentifiers) { + all.add(TableSummary.of(id, TableSummary.VIEW_TABLE_TYPE)); + } + return all.toArray(TableSummary[]::new); + } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index e415752914142..55894357f19d1 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -31,13 +31,18 @@ import java.util.Set; /** - * Catalog methods for working with Tables. + * Catalog API for connectors that expose tables. + *

+ * Connectors that expose only tables implement this interface. Connectors that expose + * both tables and views must implement {@link RelationCatalog} (which extends both this + * interface and {@link ViewCatalog} and adds the cross-cutting contract for the combined + * case); the methods on this interface remain table-only -- they do not interact with views. *

* TableCatalog implementations may be case-sensitive or case-insensitive. Spark will pass * {@link Identifier table identifiers} without modification. Field names passed to - * {@link #alterTable(Identifier, TableChange...)} will be normalized to match the case used in the - * table schema when updating, renaming, or dropping existing columns when catalyst analysis is - * case-insensitive. + * {@link #alterTable(Identifier, TableChange...)} will be normalized to match the case used in + * the table schema when updating, renaming, or dropping existing columns when catalyst + * analysis is case-insensitive. * * @since 3.0.0 */ @@ -99,9 +104,6 @@ public interface TableCatalog extends CatalogPlugin { /** * List the tables in a namespace from the catalog. - *

- * Returns table identifiers only -- views (if the catalog also implements {@link ViewCatalog}) - * are listed separately via {@link ViewCatalog#listViews}. * * @param namespace a multi-part namespace * @return an array of Identifiers for tables @@ -143,35 +145,21 @@ default TableSummary[] listTableSummaries(String[] namespace) /** * Load table metadata by {@link Identifier identifier} from the catalog. - *

- * If {@code ident} resolves to a view in a mixed catalog (one that also implements - * {@link ViewCatalog}), this should throw {@link NoSuchTableException} -- views are loaded - * via {@link ViewCatalog#loadView}. As a perf optimization, a mixed catalog may instead - * return a {@link MetadataOnlyTable} wrapping a {@link ViewInfo} from this method; Spark's - * resolver detects the wrapper and routes through view resolution without a follow-up - * {@code loadView} call. The optimization is opt-in -- correctly throwing - * {@code NoSuchTableException} for a view identifier and letting Spark fall back to - * {@code loadView} is also valid. * * @param ident a table identifier - * @return the table's metadata, or a {@link MetadataOnlyTable} wrapping a {@link ViewInfo} - * (perf opt-in for mixed catalogs) - * @throws NoSuchTableException If the table doesn't exist (or is a view in a mixed catalog - * that does not use the perf opt-in) + * @return the table's metadata + * @throws NoSuchTableException If the table doesn't exist */ Table loadTable(Identifier ident) throws NoSuchTableException; /** * Load table metadata by {@link Identifier identifier} from the catalog. Spark will write data * into this table later. - *

- * Contract for views matches {@link #loadTable(Identifier)}. * * @param ident a table identifier * @param writePrivileges * @return the table's metadata - * @throws NoSuchTableException If the table doesn't exist or is a view (see - * {@link #loadTable(Identifier)} for the view contract). + * @throws NoSuchTableException If the table doesn't exist * * @since 3.5.3 */ @@ -183,14 +171,11 @@ default Table loadTable( /** * Load table metadata of a specific version by {@link Identifier identifier} from the catalog. - *

- * Time-travel targets a versioned table, not a view. This must throw - * {@link NoSuchTableException} for a view identifier. * * @param ident a table identifier * @param version version of the table * @return the table's metadata - * @throws NoSuchTableException If the table doesn't exist or is a view + * @throws NoSuchTableException If the table doesn't exist */ default Table loadTable(Identifier ident, String version) throws NoSuchTableException { throw QueryCompilationErrors.noSuchTableError(name(), ident); @@ -198,14 +183,11 @@ default Table loadTable(Identifier ident, String version) throws NoSuchTableExce /** * Load table metadata at a specific time by {@link Identifier identifier} from the catalog. - *

- * Time-travel targets a versioned table, not a view. This must throw - * {@link NoSuchTableException} for a view identifier. * * @param ident a table identifier * @param timestamp timestamp of the table, which is microseconds since 1970-01-01 00:00:00 UTC * @return the table's metadata - * @throws NoSuchTableException If the table doesn't exist or is a view + * @throws NoSuchTableException If the table doesn't exist */ default Table loadTable(Identifier ident, long timestamp) throws NoSuchTableException { throw QueryCompilationErrors.noSuchTableError(name(), ident); @@ -244,9 +226,6 @@ default void invalidateTable(Identifier ident) { /** * Test whether a table exists using an {@link Identifier identifier} from the catalog. - *

- * Returns {@code false} for a view identifier in a mixed catalog (also implementing - * {@link ViewCatalog}); view existence is checked via {@link ViewCatalog#viewExists}. * * @param ident a table identifier * @return true if a table exists at {@code ident}, false otherwise @@ -291,18 +270,13 @@ default Table createTable( /** * Create a table in the catalog. - *

- * In mixed catalogs (also implementing {@link ViewCatalog}) tables and views share an - * identifier namespace; this method must throw {@link TableAlreadyExistsException} if - * {@code ident} already names a view. Views themselves are created via - * {@link ViewCatalog#createView}. * * @param ident a table identifier * @param tableInfo information about the table * @return metadata for the new table. This can be null if getting the metadata for the new table * is expensive. Spark will call {@link #loadTable(Identifier)} if needed (e.g. CTAS). * - * @throws TableAlreadyExistsException If a table or view already exists for the identifier + * @throws TableAlreadyExistsException If a table already exists for the identifier * @throws UnsupportedOperationException If a requested partition transform is not supported * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional) * @since 4.1.0 @@ -334,7 +308,7 @@ default Table createTable(Identifier ident, TableInfo tableInfo) * or other custom state from this object to clone additional metadata * @return metadata for the new table * - * @throws TableAlreadyExistsException If a table or view already exists for the identifier + * @throws TableAlreadyExistsException If a table already exists for the identifier * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional) * @throws UnsupportedOperationException If the catalog does not support CREATE TABLE LIKE * @since 4.2.0 @@ -360,18 +334,13 @@ default boolean useNullableQuerySchema() { * changes should be applied to the table. *

* The requested changes must be applied in the order given. - *

- * {@code alterTable} targets tables only. In a mixed catalog (also implementing - * {@link ViewCatalog}) this must throw {@link NoSuchTableException} when {@code ident} - * resolves to a view; view DDL is handled by {@link ViewCatalog} (e.g. - * {@link ViewCatalog#replaceView} for {@code ALTER VIEW ... AS}). * * @param ident a table identifier * @param changes changes to apply to the table * @return updated metadata for the table. This can be null if getting the metadata for the * updated table is expensive. Spark always discard the returned table here. * - * @throws NoSuchTableException If the table doesn't exist or is a view + * @throws NoSuchTableException If the table doesn't exist * @throws IllegalArgumentException If any change is rejected by the implementation. */ Table alterTable( @@ -380,10 +349,6 @@ Table alterTable( /** * Drop a table in the catalog. - *

- * In a mixed catalog (also implementing {@link ViewCatalog}) this must not drop a view and - * must return {@code false} when {@code ident} resolves to a view; views are dropped via - * {@link ViewCatalog#dropView}. * * @param ident a table identifier * @return true if a table was deleted, false if no table exists for the identifier @@ -394,10 +359,6 @@ Table alterTable( * Drop a table in the catalog and completely remove its data by skipping a trash even if it is * supported. *

- * {@code purgeTable} targets tables only. In a mixed catalog (also implementing - * {@link ViewCatalog}) this must not drop a view and must return {@code false} for a view - * identifier -- purge semantics (data removal) do not apply to views. - *

* If the catalog supports to purge a table, this method should be overridden. * The default implementation throws {@link UnsupportedOperationException}. * @@ -414,18 +375,13 @@ default boolean purgeTable(Identifier ident) throws UnsupportedOperationExceptio /** * Renames a table in the catalog. *

- * {@code renameTable} targets tables only. In a mixed catalog (also implementing - * {@link ViewCatalog}) this must throw {@link NoSuchTableException} when {@code oldIdent} - * resolves to a view, and must throw {@link TableAlreadyExistsException} if {@code newIdent} - * collides with an existing table or view. - *

* If the catalog does not support table renames between namespaces, it throws * {@link UnsupportedOperationException}. * * @param oldIdent the table identifier of the existing table to rename * @param newIdent the new table identifier of the table - * @throws NoSuchTableException If the table to rename doesn't exist or is a view - * @throws TableAlreadyExistsException If the new table name already exists or is a view + * @throws NoSuchTableException If the table to rename doesn't exist + * @throws TableAlreadyExistsException If the new table name already exists * @throws UnsupportedOperationException If the namespaces of old and new identifiers do not * match (optional) */ diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java index 14bf43c3499d3..184676023d7c4 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java @@ -22,40 +22,15 @@ import org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException; /** - * Catalog API for read and write access to views. + * Catalog API for connectors that expose views. *

- * A connector that wants to expose views implements this interface. The interface is independent - * from {@link TableCatalog}: a connector can implement just {@code ViewCatalog} (a view-only - * catalog), just {@code TableCatalog} (a table-only catalog), or both. There is no capability - * flag to declare; the presence of {@code ViewCatalog} on the catalog plugin is the - * signal that it supports views. - * - *

Mixed catalogs (implementing both {@code TableCatalog} and {@code ViewCatalog})

- * - * The two interfaces are independent: every {@code TableCatalog} method behaves as if views did - * not exist, and every {@code ViewCatalog} method behaves as if tables did not exist. The only - * cross-cutting invariant is that tables and views share a single identifier namespace in - * the catalog: the same identifier cannot resolve to both a table and a view at the same time. - * That invariant manifests in two places: - *
    - *
  • {@link TableCatalog#createTable} must reject (with - * {@link org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException}) if the - * identifier already names a view.
  • - *
  • {@link #createView} must reject (with {@link ViewAlreadyExistsException}) if the - * identifier already names a table.
  • - *
- * - *

Resolution and the optional perf opt-in for mixed catalogs

- * - * Spark resolves an identifier by calling {@link TableCatalog#loadTable} first; on - * {@link org.apache.spark.sql.catalyst.analysis.NoSuchTableException} it falls back to - * {@link #loadView} when the catalog also implements {@code ViewCatalog}. That fallback costs an - * extra RPC per cold-cache view lookup. To skip it, a perf-conscious mixed catalog may return a - * {@link MetadataOnlyTable} wrapping the {@link ViewInfo} from - * {@link TableCatalog#loadTable} when the identifier resolves to a view; Spark recognizes the - * {@code ViewInfo} payload and routes through view resolution without a follow-up - * {@code loadView} call. {@code loadView} is still used directly for view DDL paths - * (DROP VIEW, ALTER VIEW, SHOW CREATE TABLE, etc.). + * Connectors that expose only views implement this interface. Connectors that expose + * both tables and views must implement {@link RelationCatalog} (which extends both this + * interface and {@link TableCatalog} and adds the cross-cutting contract for the combined + * case); the methods on this interface remain view-only -- they do not interact with tables. + *

+ * The presence of {@code ViewCatalog} on the catalog plugin is the signal that it + * supports views; there is no capability flag to declare. * * @since 4.2.0 */ @@ -64,9 +39,6 @@ public interface ViewCatalog extends CatalogPlugin { /** * List the views in a namespace from the catalog. - *

- * For mixed catalogs, this must return identifiers for views only (tables are listed via - * {@link TableCatalog#listTables}). * * @param namespace a multi-part namespace * @return an array of identifiers for views @@ -76,14 +48,10 @@ public interface ViewCatalog extends CatalogPlugin { /** * Load view metadata by identifier. - *

- * For mixed catalogs, throws {@link NoSuchViewException} when {@code ident} resolves to a - * table rather than a view. * * @param ident a view identifier * @return the view metadata - * @throws NoSuchViewException if the view does not exist (or {@code ident} is a table in a - * mixed catalog) + * @throws NoSuchViewException if the view does not exist */ ViewInfo loadView(Identifier ident) throws NoSuchViewException; @@ -118,14 +86,11 @@ default void invalidateView(Identifier ident) { /** * Create a view. - *

- * In mixed catalogs, must throw {@link ViewAlreadyExistsException} if {@code ident} already - * names a table or a view. * * @param ident the view identifier * @param info the view metadata * @return the metadata of the newly created view; may equal {@code info} - * @throws ViewAlreadyExistsException if a view or table already exists at {@code ident} + * @throws ViewAlreadyExistsException if a view already exists at {@code ident} * @throws NoSuchNamespaceException if the identifier's namespace does not exist (optional) */ ViewInfo createView(Identifier ident, ViewInfo info) @@ -141,8 +106,7 @@ ViewInfo createView(Identifier ident, ViewInfo info) * @param ident the view identifier * @param info the new view metadata * @return the metadata of the replaced view; may equal {@code info} - * @throws NoSuchViewException if no view exists at {@code ident} (or {@code ident} is a table - * in a mixed catalog) + * @throws NoSuchViewException if no view exists at {@code ident} */ ViewInfo replaceView(Identifier ident, ViewInfo info) throws NoSuchViewException; @@ -154,14 +118,15 @@ ViewInfo createView(Identifier ident, ViewInfo info) * {@link NoSuchViewException}. The fallback is non-atomic across the two calls (a concurrent * drop or create can race), so catalogs that can answer the upsert in a single transactional * call should override this method to collapse to one RPC and to make the swap atomic. - *

- * In mixed catalogs, must throw {@link ViewAlreadyExistsException} if {@code ident} resolves - * to a non-view table (cross-type collision is rejected; the table is not touched). * * @param ident the view identifier * @param info the view metadata * @return the metadata of the created or replaced view; may equal {@code info} - * @throws ViewAlreadyExistsException if {@code ident} resolves to a table in a mixed catalog + * @throws ViewAlreadyExistsException if {@code ident} cannot host this view -- either a + * concurrent {@code CREATE VIEW} won the race in the + * default impl's gap between {@link #replaceView} and + * the fallback {@link #createView}, or, in a + * {@link RelationCatalog}, a table sits at {@code ident} * @throws NoSuchNamespaceException if the identifier's namespace does not exist (optional) */ default ViewInfo createOrReplaceView(Identifier ident, ViewInfo info) @@ -175,12 +140,6 @@ default ViewInfo createOrReplaceView(Identifier ident, ViewInfo info) /** * Drop a view. - *

- * Returns {@code true} if a view was dropped at {@code ident}, {@code false} otherwise. In - * mixed catalogs, returns {@code false} if {@code ident} is a table (the table is not - * touched). Spark's resolver guards the call site so that {@code DROP VIEW} on a table or - * {@code DROP TABLE} on a view surfaces the dedicated {@code EXPECT_VIEW_NOT_TABLE} / - * {@code EXPECT_TABLE_NOT_VIEW} error before this method is invoked. * * @param ident a view identifier * @return true if a view was dropped, false otherwise diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java index 27525adc1f367..da82de01f8e4d 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java @@ -31,12 +31,11 @@ * query output column names. Schema and user TBLPROPERTIES are inherited from {@link TableInfo} * via the typed builder. *

- * {@code ViewInfo} extends {@link TableInfo} so that a mixed catalog (one implementing both - * {@link TableCatalog} and {@link ViewCatalog}) can opt into the perf optimization of returning - * a {@link MetadataOnlyTable} wrapping a {@code ViewInfo} from {@link TableCatalog#loadTable} - * for a view identifier. Pure {@link ViewCatalog} implementations never see {@code TableInfo}; - * the typed setters on {@link Builder} cover everything they need to construct a - * {@code ViewInfo}. + * {@code ViewInfo} extends {@link TableInfo} so that a {@link RelationCatalog} can opt into the + * single-RPC perf path by returning a {@link MetadataOnlyTable} wrapping a {@code ViewInfo} + * from {@link RelationCatalog#loadRelation} for a view identifier. Pure {@link ViewCatalog} + * implementations never see {@code TableInfo}; the typed setters on {@link Builder} cover + * everything they need to construct a {@code ViewInfo}. * * @since 4.2.0 */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index b62acadcdc173..060f479e5b5e6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1109,11 +1109,11 @@ class Analyzer( * so surfacing a downstream "view not found" would hide the real reason. * * Lookup order against a non-session catalog: - * 1. [[TableCatalog.loadTable]] if implemented. A returned [[MetadataOnlyTable]] wrapping - * a [[ViewInfo]] is interpreted as a view (perf opt-in for mixed catalogs that prefer - * to answer in a single RPC); other results are tables. - * 2. If `loadTable` did not produce a result and the catalog is a [[ViewCatalog]], - * [[ViewCatalog.loadView]] is called as the fallback view-resolution path. + * 1. If the catalog is a [[RelationCatalog]], [[RelationCatalog.loadRelation]] is called once. + * A returned [[MetadataOnlyTable]] wrapping a [[ViewInfo]] is interpreted as a view; + * other results are tables. + * 2. Otherwise, [[TableCatalog.loadTable]] is tried (when implemented), then + * [[ViewCatalog.loadView]] as the fallback view-resolution path (when implemented). */ private def lookupTableOrView( identifier: Seq[String], @@ -1127,40 +1127,56 @@ class Analyzer( !catalog.isInstanceOf[ViewCatalog]) { throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) } - // Skip the table-side lookup entirely for view-only catalogs (no `TableCatalog` mixin): - // `CatalogV2Util.loadTable` would call `asTableCatalog` and throw - // MISSING_CATALOG_ABILITY.TABLES, masking the legitimate view-resolution path. - val tableResolved: Option[LogicalPlan] = if ( - CatalogV2Util.isSessionCatalog(catalog) || catalog.isInstanceOf[TableCatalog] - ) { - CatalogV2Util.loadTable(catalog, ident).map { - case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) && - v1Table.v1Table.tableType == CatalogTableType.VIEW => - val v1Ident = v1Table.catalogTable.identifier - val v2Ident = Identifier.of(v1Ident.database.toArray, v1Ident.identifier) - ResolvedPersistentView( - catalog, v2Ident, v1Table.catalogTable) - case t: MetadataOnlyTable if t.getTableInfo.isInstanceOf[ViewInfo] => - val catalogTable = V1Table.toCatalogTable(catalog, ident, t) - ResolvedPersistentView(catalog, ident, catalogTable) - case table => - ResolvedTable.create(catalog.asTableCatalog, ident, table) - } - } else { - None - } - tableResolved.orElse { - catalog match { - case vc: ViewCatalog => - try { - val viewInfo = vc.loadView(ident) - val catalogTable = V1Table.toCatalogTable(catalog, ident, viewInfo) - Some(ResolvedPersistentView(catalog, ident, catalogTable)) - } catch { - case _: NoSuchViewException => None + catalog match { + case mc: RelationCatalog => + // Single-RPC perf path: loadRelation returns a Table for a table or a + // MetadataOnlyTable wrapping a ViewInfo for a view. NoSuchTable means + // neither exists. + try { + Some(mc.loadRelation(ident) match { + case t: MetadataOnlyTable if t.getTableInfo.isInstanceOf[ViewInfo] => + ResolvedPersistentView( + catalog, ident, V1Table.toCatalogTable(catalog, ident, t)) + case table => + ResolvedTable.create(catalog.asTableCatalog, ident, table) + }) + } catch { + case _: NoSuchTableException => None + } + case _ => + // Skip the table-side lookup entirely for view-only catalogs (no + // `TableCatalog` mixin): `CatalogV2Util.loadTable` would call `asTableCatalog` + // and throw MISSING_CATALOG_ABILITY.TABLES, masking the legitimate view- + // resolution path. + val tableResolved: Option[LogicalPlan] = if ( + CatalogV2Util.isSessionCatalog(catalog) || catalog.isInstanceOf[TableCatalog] + ) { + CatalogV2Util.loadTable(catalog, ident).map { + case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) && + v1Table.v1Table.tableType == CatalogTableType.VIEW => + val v1Ident = v1Table.catalogTable.identifier + val v2Ident = Identifier.of(v1Ident.database.toArray, v1Ident.identifier) + ResolvedPersistentView( + catalog, v2Ident, v1Table.catalogTable) + case table => + ResolvedTable.create(catalog.asTableCatalog, ident, table) } - case _ => None - } + } else { + None + } + tableResolved.orElse { + catalog match { + case vc: ViewCatalog => + try { + val viewInfo = vc.loadView(ident) + val catalogTable = V1Table.toCatalogTable(catalog, ident, viewInfo) + Some(ResolvedPersistentView(catalog, ident, catalogTable)) + } catch { + case _: NoSuchViewException => None + } + case _ => None + } + } } case _ => None } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index 78101317566da..7e7baad7055e5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -38,11 +38,13 @@ import org.apache.spark.sql.connector.catalog.{ Identifier, LookupCatalog, MetadataOnlyTable, + RelationCatalog, Table, TableCatalog, V1Table, V2TableWithV1Fallback, - ViewCatalog + ViewCatalog, + ViewInfo } import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors} @@ -231,38 +233,59 @@ class RelationResolution( .orElse { val writePrivileges = u.options.get(UnresolvedRelation.REQUIRED_WRITE_PRIVILEGES) val finalOptions = u.clearWritePrivileges.options - // Skip the table-side lookup entirely for view-only catalogs (no `TableCatalog` mixin): - // `CatalogV2Util.loadTable` would call `asTableCatalog` and throw - // MISSING_CATALOG_ABILITY.TABLES, masking the legitimate view-resolution path. A pure - // `ViewCatalog`'s view is resolved below via the `loadView` fallback. - val table: Option[Table] = if ( - CatalogV2Util.isSessionCatalog(catalog) || catalog.isInstanceOf[TableCatalog] - ) { - CatalogV2Util.loadTable( - catalog, - ident, - finalTimeTravelSpec, - Option(writePrivileges)) - } else { - None - } - // Fallback to ViewCatalog for catalogs that host views but where loadTable returned - // None (or was skipped because there's no TableCatalog mixin). Time-travel / write - // privileges only apply to tables, not views, so the fallback is gated on neither. - val tableOrView: Option[Table] = table.orElse { - if (finalTimeTravelSpec.isEmpty && writePrivileges == null) { - catalog match { - case vc: ViewCatalog => - try { - Some(new MetadataOnlyTable(vc.loadView(ident), ident.toString)) - } catch { - case _: NoSuchViewException => None + // For a `RelationCatalog` with no time-travel / write privileges, the single-RPC + // `loadRelation` answers both "is there a table?" and "is there a view?" in one + // call. Time-travel and write privileges apply to tables only, so for those the + // lookup falls through to the table-only `loadTable` path below; views are not + // reachable via the v2 fallback in those cases. + // + // Skip the table-side lookup entirely for view-only catalogs (no `TableCatalog` + // mixin): `CatalogV2Util.loadTable` would call `asTableCatalog` and throw + // MISSING_CATALOG_ABILITY.TABLES, masking the legitimate view-resolution path. + val tableOrView: Option[Table] = catalog match { + case mc: RelationCatalog if finalTimeTravelSpec.isEmpty && writePrivileges == null => + try { + Some(mc.loadRelation(ident)) + } catch { + case _: NoSuchTableException => None + } + case _ => + val tableSide: Option[Table] = if ( + CatalogV2Util.isSessionCatalog(catalog) || catalog.isInstanceOf[TableCatalog] + ) { + CatalogV2Util.loadTable( + catalog, + ident, + finalTimeTravelSpec, + Option(writePrivileges)) + } else { + None + } + // Fallback to ViewCatalog for catalogs that host views but where loadTable + // returned None (or was skipped because there's no TableCatalog mixin). + // Time-travel / write privileges only apply to tables, not views, so the + // fallback is gated on neither. + tableSide.orElse { + if (finalTimeTravelSpec.isEmpty && writePrivileges == null) { + catalog match { + case vc: ViewCatalog => + try { + Some(new MetadataOnlyTable(vc.loadView(ident), ident.toString)) + } catch { + case _: NoSuchViewException => None + } + case _ => None } - case _ => None + } else { + None + } } - } else { - None - } + } + // `table` is `tableOrView` filtered to tables only -- used for cache lookup since + // we don't share-cache views. + val table: Option[Table] = tableOrView.filter { + case t: MetadataOnlyTable if t.getTableInfo.isInstanceOf[ViewInfo] => false + case _ => true } val sharedRelationCacheMatch = for { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala index e6c70fdabb159..c9a87c6e88c2a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala @@ -64,6 +64,7 @@ private[sql] object Catalogs { } val plugin = pluginClass.getDeclaredConstructor().newInstance().asInstanceOf[CatalogPlugin] plugin.initialize(name, catalogOptions(name, conf)) + validateRelationCatalog(name, plugin) plugin } catch { case e: ClassNotFoundException => @@ -106,4 +107,21 @@ private[sql] object Catalogs { } new CaseInsensitiveStringMap(options) } + + /** + * Reject catalogs that implement both [[TableCatalog]] and [[ViewCatalog]] without + * extending [[RelationCatalog]]. The combined case has cross-cutting rules (single namespace, + * cross-type collision rejection, perf opt-ins) that live on [[RelationCatalog]]; implementing + * the two interfaces directly would skip that contract. + */ + private def validateRelationCatalog(name: String, plugin: CatalogPlugin): Unit = { + if (plugin.isInstanceOf[TableCatalog] && plugin.isInstanceOf[ViewCatalog] && + !plugin.isInstanceOf[RelationCatalog]) { + throw new IllegalArgumentException( + s"Catalog '$name' (${plugin.getClass.getName}) implements both TableCatalog and " + + s"ViewCatalog directly. Catalogs that expose both tables and views must implement " + + s"RelationCatalog instead, which centralizes the cross-cutting rules (shared identifier " + + s"namespace, cross-type collision rejection, single-RPC perf entry points).") + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index e864a349cfe49..9374070d5dd79 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.connector import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, NoSuchViewException, TableAlreadyExistsException, ViewAlreadyExistsException} -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, StagedTable, StagingTableCatalog, Table, TableCatalog, TableChange, TableInfo, TableSummary, V1Table, ViewCatalog, ViewInfo} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, RelationCatalog, StagedTable, StagingTableCatalog, Table, TableCatalog, TableChange, TableInfo, TableSummary, V1Table, ViewCatalog, ViewInfo} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType @@ -988,66 +988,83 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } /** - * A [[TableCatalog]] implementing ViewCatalog: round-trips [[MetadataOnlyTable]] for created - * views and tables (via `createTable` / `dropTable` / `tableExists` / `listTables`) and exposes - * two canned read-only fixtures (`test_view`, `test_unqualified_view`) used by the view-read - * tests. Entries created via `createTable` can be either tables or views -- their - * [[TableCatalog#PROP_TABLE_TYPE]] property is what distinguishes them. + * A [[RelationCatalog]]: round-trips [[MetadataOnlyTable]] for created views and tables and + * exposes a few canned read-only view fixtures (`test_view`, `test_unqualified_view`, + * `test_unqualified_multi`, plus an unqualified-target view at `ns1.ns2.t`) used by the + * view-read tests. Entries created via `createTable` / `createView` are distinguished by the + * stored value's runtime type (ViewInfo vs TableInfo). The single-RPC perf entry point + * [[loadRelation]] returns either kind; [[loadTable]] is tables-only per the + * [[TableCatalog#loadTable]] contract. */ -class TestingViewCatalog extends TableCatalog with ViewCatalog { +class TestingViewCatalog extends RelationCatalog { // Holds entries (views and tables) created via createTable / createView within the session. // Keyed by (namespace, name); the stored value's runtime type (ViewInfo vs TableInfo) // distinguishes views from tables. Mixed-catalog: shared identifier namespace per the - // ViewCatalog contract. + // RelationCatalog contract. private val createdViews = new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() + // Canned read-only view fixtures, exposed only via the perf path (loadRelation). loadView + // does not need to expose them because the resolver routes RelationCatalog reads through + // loadRelation. + private def fixtureView(ident: Identifier): Option[ViewInfo] = ident.name() match { + case "test_view" => + Some(new ViewInfo.Builder() + .withSchema(new StructType().add("col", "string").add("i", "int")) + .withQueryText( + "SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") + .withSqlConfigs(java.util.Collections.singletonMap( + SQLConf.ANSI_ENABLED.key, (ident.namespace().head == "ansi").toString)) + .build()) + case "test_unqualified_view" => + Some(new ViewInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withQueryText("SELECT col FROM t WHERE col = 'b'") + .withCurrentCatalog("spark_catalog") + .withCurrentNamespace(Array("default")) + .build()) + case "test_unqualified_multi" => + // View whose captured catalog+namespace is view_catalog.ns1.ns2 (two-part). The + // unqualified `t` in the body must resolve via that captured context to + // view_catalog.ns1.ns2.t, which this catalog also serves (see `t` case below). + Some(new ViewInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withQueryText("SELECT col FROM t") + .withCurrentCatalog("view_catalog") + .withCurrentNamespace(Array("ns1", "ns2")) + .build()) + case "t" if ident.namespace().toSeq == Seq("ns1", "ns2") => + // Target of test_unqualified_multi's unqualified reference. Self-contained view so + // the test doesn't need external data. + Some(new ViewInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withQueryText("SELECT 'multi' AS col") + .build()) + case _ => None + } + override def loadTable(ident: Identifier): Table = { + // Tables only -- views must be loaded via loadView / loadRelation per the new contract. val key = (ident.namespace().toSeq, ident.name()) - Option(createdViews.get(key)).map(new MetadataOnlyTable(_, ident.toString)).getOrElse { - ident.name() match { - case "test_view" => - val info = new ViewInfo.Builder() - .withSchema(new StructType().add("col", "string").add("i", "int")) - .withQueryText( - "SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") - .withSqlConfigs(java.util.Collections.singletonMap( - SQLConf.ANSI_ENABLED.key, (ident.namespace().head == "ansi").toString)) - .build() - new MetadataOnlyTable(info, ident.toString) - case "test_unqualified_view" => - val info = new ViewInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withQueryText("SELECT col FROM t WHERE col = 'b'") - .withCurrentCatalog("spark_catalog") - .withCurrentNamespace(Array("default")) - .build() - new MetadataOnlyTable(info, ident.toString) - case "test_unqualified_multi" => - // View whose captured catalog+namespace is view_catalog.ns1.ns2 (two-part). The - // unqualified `t` in the body must resolve via that captured context to - // view_catalog.ns1.ns2.t, which this catalog also serves (see `t` case below). - val info = new ViewInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withQueryText("SELECT col FROM t") - .withCurrentCatalog("view_catalog") - .withCurrentNamespace(Array("ns1", "ns2")) - .build() - new MetadataOnlyTable(info, ident.toString) - case "t" if ident.namespace().toSeq == Seq("ns1", "ns2") => - // Target of test_unqualified_multi's unqualified reference. Self-contained view so - // the test doesn't need external data. - val info = new ViewInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withQueryText("SELECT 'multi' AS col") - .build() - new MetadataOnlyTable(info, ident.toString) - case _ => throw new NoSuchTableException(ident) - } + Option(createdViews.get(key)) match { + case Some(info) if !info.isInstanceOf[ViewInfo] => + new MetadataOnlyTable(info, ident.toString) + case _ => throw new NoSuchTableException(ident) } } + override def loadRelation(ident: Identifier): Table = { + // Single-RPC perf path: returns tables AND views (as MetadataOnlyTable). Stored entries + // win over fixture views (the fixture name space is read-only and disjoint from + // createdViews in practice). + val key = (ident.namespace().toSeq, ident.name()) + Option(createdViews.get(key)) + .orElse(fixtureView(ident)) + .map(new MetadataOnlyTable(_, ident.toString)) + .getOrElse(throw new NoSuchTableException(ident)) + } + override def tableExists(ident: Identifier): Boolean = { val key = (ident.namespace().toSeq, ident.name()) val existing = createdViews.get(key) @@ -1161,11 +1178,11 @@ class TestingViewCatalog extends TableCatalog with ViewCatalog { } /** - * A minimal mixed [[StagingTableCatalog]] + [[ViewCatalog]]. View DDL routes through the + * A minimal mixed [[StagingTableCatalog]] + [[RelationCatalog]]. View DDL routes through the * ViewCatalog API (no separate staging variant for views in the new design). The staging * methods cover table CTAS / RTAS only. */ -class TestingStagingCatalog extends StagingTableCatalog with ViewCatalog { +class TestingStagingCatalog extends StagingTableCatalog with RelationCatalog { private val views = new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() @@ -1174,6 +1191,15 @@ class TestingStagingCatalog extends StagingTableCatalog with ViewCatalog { (ident.namespace().toSeq, ident.name()) override def loadTable(ident: Identifier): Table = { + // Tables only -- per the new contract, views must be loaded via loadView / loadRelation. + Option(views.get(keyOf(ident))) match { + case Some(info) if !info.isInstanceOf[ViewInfo] => + new MetadataOnlyTable(info, ident.toString) + case _ => throw new NoSuchTableException(ident) + } + } + + override def loadRelation(ident: Identifier): Table = { Option(views.get(keyOf(ident))).map(new MetadataOnlyTable(_, ident.toString)) .getOrElse(throw new NoSuchTableException(ident)) } @@ -1263,24 +1289,13 @@ class TestingStagingCatalog extends StagingTableCatalog with ViewCatalog { } /** - * A v2 catalog that does not implement ViewCatalog. Used by capability-gate tests. The - * gate actually fires in `Analyzer.lookupTableOrView(viewOnly=true)` for ALTER VIEW and in + * A v2 catalog that does not implement ViewCatalog. Used by capability-gate tests: the gate + * fires in `Analyzer.lookupTableOrView(viewOnly=true)` for ALTER VIEW and in * [[CheckViewReferences]] for CREATE VIEW -- in both cases before `loadTable` is called -- - * so the pre-seeded view fixture is effectively unused on the happy-path-error flow. It's - * kept to make future tests that deliberately bypass the upstream gate easy to write. + * so this catalog's content is intentionally empty. */ class TestingTableOnlyCatalog extends TableCatalog { - private val fixtureView: ViewInfo = new ViewInfo.Builder() - .withSchema(new StructType().add("x", "int")) - .withQueryText("SELECT 1 AS x") - .build() - - override def loadTable(ident: Identifier): Table = - if (ident.namespace().toSeq == Seq("default") && ident.name() == "v") { - new MetadataOnlyTable(fixtureView, ident.toString) - } else { - throw new NoSuchTableException(ident) - } + override def loadTable(ident: Identifier): Table = throw new NoSuchTableException(ident) override def alterTable(ident: Identifier, changes: TableChange*): Table = throw new RuntimeException("shouldn't be called") From 38ffa07b828ae0f61f53185cc2e890a30ac99731 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sun, 26 Apr 2026 15:07:59 +0000 Subject: [PATCH 48/59] RelationCatalog: derive loadTable/loadView/tableExists/viewExists from loadRelation; rename listRelations -> listRelationSummaries Two API refinements on top of the previous commit, both targeted at the implementer's experience: 1. Default impls on `RelationCatalog` for `loadTable`, `loadView`, `tableExists`, `viewExists` -- each derives from `loadRelation` by matching `MetadataOnlyTable + ViewInfo` and routing the result to the right kind. A `RelationCatalog` author writes the read-side lookup once (in `loadRelation`); the four kind-specific accessors come for free. Implementers can still override any of them for a cheaper kind-specific path; otherwise the defaults are correct by construction. 2. Rename `RelationCatalog.listRelations` to `RelationCatalog.listRelationSummaries`, mirroring the existing `TableCatalog.listTableSummaries` / `ViewCatalog.listViews` convention -- the method returns `TableSummary[]`, so "summaries" is the accurate noun. The unified entry point reads as the summary-of-relations counterpart to the kind-specific summary methods. Test catalogs trimmed: `TestingViewCatalog` and `TestingStagingCatalog` now override only `loadRelation` on the read side; `loadTable`, `loadView`, `tableExists`, and (for the latter) `viewExists` use the new `RelationCatalog` defaults. Co-authored-by: Isaac --- .../connector/catalog/RelationCatalog.java | 75 ++++++++++++++++++- .../DataSourceV2MetadataOnlyViewSuite.scala | 47 +----------- 2 files changed, 76 insertions(+), 46 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java index 795deb295f7f8..4f4b4e9b3955f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java @@ -21,6 +21,7 @@ import org.apache.spark.annotation.Evolving; import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; /** * Catalog API for connectors that expose both tables and views in a single shared identifier @@ -103,7 +104,7 @@ * a regular {@link Table} for a table, or a {@link MetadataOnlyTable} wrapping a * {@link ViewInfo} for a view. Saves the {@code loadTable} -> {@code loadView} fallback * on a cold cache. - *

  • {@link #listRelations(String[])} -- a unified listing of tables and views with the + *
  • {@link #listRelationSummaries(String[])} -- a unified listing of tables and views with the * kind preserved on each {@link TableSummary}. Default impl performs both * {@link TableCatalog#listTableSummaries} and {@link ViewCatalog#listViews}; override to * fetch in one round trip.
  • @@ -143,7 +144,7 @@ public interface RelationCatalog extends TableCatalog, ViewCatalog { * @throws NoSuchTableException if a table listed by the underlying enumeration disappears * before its summary can be assembled (default impl only) */ - default TableSummary[] listRelations(String[] namespace) + default TableSummary[] listRelationSummaries(String[] namespace) throws NoSuchNamespaceException, NoSuchTableException { TableSummary[] tableSummaries = listTableSummaries(namespace); Identifier[] viewIdentifiers = listViews(namespace); @@ -157,4 +158,74 @@ default TableSummary[] listRelations(String[] namespace) } return all.toArray(TableSummary[]::new); } + + /** + * @inheritDoc + *

    + * The default implementation derives from {@link #loadRelation}: a {@link MetadataOnlyTable} + * wrapping a {@link ViewInfo} is rejected as not-a-table; anything else is returned. Override + * only if a tables-only path is materially cheaper than the unified one. + */ + @Override + default Table loadTable(Identifier ident) throws NoSuchTableException { + Table t = loadRelation(ident); + if (t instanceof MetadataOnlyTable mot && mot.getTableInfo() instanceof ViewInfo) { + throw new NoSuchTableException(ident); + } + return t; + } + + /** + * @inheritDoc + *

    + * The default implementation derives from {@link #loadRelation}: a {@link MetadataOnlyTable} + * wrapping a {@link ViewInfo} is unwrapped and returned; anything else (table or absent) is + * surfaced as {@link NoSuchViewException}. Override only if a views-only path is materially + * cheaper than the unified one. + */ + @Override + default ViewInfo loadView(Identifier ident) throws NoSuchViewException { + Table t; + try { + t = loadRelation(ident); + } catch (NoSuchTableException e) { + throw new NoSuchViewException(ident); + } + if (t instanceof MetadataOnlyTable mot && mot.getTableInfo() instanceof ViewInfo vi) { + return vi; + } + throw new NoSuchViewException(ident); + } + + /** + * @inheritDoc + *

    + * The default implementation derives from {@link #loadRelation}: returns {@code true} only if + * the entry exists and is not a view. Override only if a cheaper existence-check path exists. + */ + @Override + default boolean tableExists(Identifier ident) { + try { + Table t = loadRelation(ident); + return !(t instanceof MetadataOnlyTable mot && mot.getTableInfo() instanceof ViewInfo); + } catch (NoSuchTableException e) { + return false; + } + } + + /** + * @inheritDoc + *

    + * The default implementation derives from {@link #loadRelation}: returns {@code true} only if + * the entry exists and is a view. Override only if a cheaper existence-check path exists. + */ + @Override + default boolean viewExists(Identifier ident) { + try { + Table t = loadRelation(ident); + return t instanceof MetadataOnlyTable mot && mot.getTableInfo() instanceof ViewInfo; + } catch (NoSuchTableException e) { + return false; + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 9374070d5dd79..55eee1ee20d96 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -1044,20 +1044,11 @@ class TestingViewCatalog extends RelationCatalog { case _ => None } - override def loadTable(ident: Identifier): Table = { - // Tables only -- views must be loaded via loadView / loadRelation per the new contract. - val key = (ident.namespace().toSeq, ident.name()) - Option(createdViews.get(key)) match { - case Some(info) if !info.isInstanceOf[ViewInfo] => - new MetadataOnlyTable(info, ident.toString) - case _ => throw new NoSuchTableException(ident) - } - } - override def loadRelation(ident: Identifier): Table = { // Single-RPC perf path: returns tables AND views (as MetadataOnlyTable). Stored entries // win over fixture views (the fixture name space is read-only and disjoint from - // createdViews in practice). + // createdViews in practice). loadTable, loadView, tableExists, viewExists all derive + // from this via the RelationCatalog default impls. val key = (ident.namespace().toSeq, ident.name()) Option(createdViews.get(key)) .orElse(fixtureView(ident)) @@ -1065,12 +1056,6 @@ class TestingViewCatalog extends RelationCatalog { .getOrElse(throw new NoSuchTableException(ident)) } - override def tableExists(ident: Identifier): Boolean = { - val key = (ident.namespace().toSeq, ident.name()) - val existing = createdViews.get(key) - existing != null && !existing.isInstanceOf[ViewInfo] - } - override def createTable(ident: Identifier, info: TableInfo): Table = { // Per the mixed-catalog contract: createTable must reject if the ident is already a view. if (info.isInstanceOf[ViewInfo]) { @@ -1137,14 +1122,6 @@ class TestingViewCatalog extends RelationCatalog { ids.toArray(new Array[Identifier](0)) } - override def loadView(ident: Identifier): ViewInfo = { - val key = (ident.namespace().toSeq, ident.name()) - Option(createdViews.get(key)) match { - case Some(v: ViewInfo) => v - case _ => throw new NoSuchViewException(ident) - } - } - override def createView(ident: Identifier, info: ViewInfo): ViewInfo = { val key = (ident.namespace().toSeq, ident.name()) if (createdViews.putIfAbsent(key, info) != null) { @@ -1190,25 +1167,12 @@ class TestingStagingCatalog extends StagingTableCatalog with RelationCatalog { private def keyOf(ident: Identifier): (Seq[String], String) = (ident.namespace().toSeq, ident.name()) - override def loadTable(ident: Identifier): Table = { - // Tables only -- per the new contract, views must be loaded via loadView / loadRelation. - Option(views.get(keyOf(ident))) match { - case Some(info) if !info.isInstanceOf[ViewInfo] => - new MetadataOnlyTable(info, ident.toString) - case _ => throw new NoSuchTableException(ident) - } - } - override def loadRelation(ident: Identifier): Table = { + // loadTable, loadView, tableExists, viewExists derive from this via RelationCatalog defaults. Option(views.get(keyOf(ident))).map(new MetadataOnlyTable(_, ident.toString)) .getOrElse(throw new NoSuchTableException(ident)) } - override def tableExists(ident: Identifier): Boolean = { - val v = views.get(keyOf(ident)) - v != null && !v.isInstanceOf[ViewInfo] - } - override def createTable(ident: Identifier, info: TableInfo): Table = { if (info.isInstanceOf[ViewInfo]) { throw new IllegalStateException( @@ -1254,11 +1218,6 @@ class TestingStagingCatalog extends StagingTableCatalog with RelationCatalog { ids.toArray(new Array[Identifier](0)) } - override def loadView(ident: Identifier): ViewInfo = views.get(keyOf(ident)) match { - case v: ViewInfo => v - case _ => throw new NoSuchViewException(ident) - } - override def createView(ident: Identifier, info: ViewInfo): ViewInfo = { if (views.putIfAbsent(keyOf(ident), info) != null) { throw new ViewAlreadyExistsException(ident) From 417e8faa3308dd9969ae161a7437e16195196af4 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sun, 26 Apr 2026 15:28:36 +0000 Subject: [PATCH 49/59] test fixtures: rename TestingViewCatalog -> TestingRelationCatalog; drop TestingStagingCatalog Three cleanups from a self-review pass: 1. Rename `TestingViewCatalog` to `TestingRelationCatalog`. The class extends `RelationCatalog` and stores both tables and views in the same backing map; the old name (a leftover from when the interface was called `ViewCatalog`) suggested view-only and was misleading. 2. Drop `TestingStagingCatalog` and its 4 associated tests. The fixture's only real assertion was that `stageCreate` / `stageReplace` / `stageCreateOrReplace` throw "must not be invoked by view DDL" -- a defense against a regression that the type system already prevents (view DDL execs call `createView` / `replaceView` / `dropView` directly on the `ViewCatalog` half of the interface; the staging methods take `TableInfo`, not `ViewInfo`, so they're unreachable from view DDL by construction). The four CREATE / ALTER / DROP VIEW tests on the staging catalog were duplicates of the corresponding tests on `TestingRelationCatalog` -- same SQL, same assertions, the `StagingTableCatalog` mixin contributes nothing observable. 3. Clarify three contract-claim inline comments that didn't accurately describe the code below them: - `TestingRelationCatalog.createTable` claimed the method rejected when the ident was already a view, but the `instanceof ViewInfo` check inspected the input parameter, not catalog state. The actual rejection comes from the shared-keyspace `putIfAbsent` collision below. - `RelationResolution.tryResolvePersistent` said "the fallback is gated on neither" -- ambiguous; clarified to "fires only when both are absent". - `DataSourceV2Strategy.ShowViews` said "this case only sees non-session ViewCatalog catalogs"; a session-catalog override that mixes in `ViewCatalog` would also reach this case. View suite test count: 60 -> 56. Co-authored-by: Isaac --- .../analysis/RelationResolution.scala | 2 +- .../datasources/v2/DataSourceV2Strategy.scala | 9 +- .../DataSourceV2MetadataOnlyViewSuite.scala | 255 ++---------------- 3 files changed, 27 insertions(+), 239 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index 7e7baad7055e5..58f832ea6cbdf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -264,7 +264,7 @@ class RelationResolution( // Fallback to ViewCatalog for catalogs that host views but where loadTable // returned None (or was skipped because there's no TableCatalog mixin). // Time-travel / write privileges only apply to tables, not views, so the - // fallback is gated on neither. + // fallback only fires when both are absent. tableSide.orElse { if (finalTimeTravelSpec.isEmpty && writePrivileges == null) { catalog match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 1fd13bb35d223..d677ff1c4be2b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -578,9 +578,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case ShowTables(ResolvedNamespace(catalog, ns, _), pattern, output) => ShowTablesExec(output, catalog.asTableCatalog, ns, pattern) :: Nil - // SHOW VIEWS on a non-session v2 ViewCatalog. Session-catalog targets are rewritten to v1 - // `ShowViewsCommand` by `ResolveSessionCatalog`; non-ViewCatalog catalogs are rejected - // there too. This case only sees non-session ViewCatalog catalogs. + // SHOW VIEWS on a v2 ViewCatalog. `ResolveSessionCatalog` rewrites the SHOW VIEWS plan to + // v1 `ShowViewsCommand` only when the catalog is NOT a `ViewCatalog`; non-`ViewCatalog` + // catalogs (session or not) are rejected with `MISSING_CATALOG_ABILITY.VIEWS` there. So + // this case sees `ViewCatalog` catalogs (typically non-session, since the default + // `V2SessionCatalog` is not a `ViewCatalog`; a session-catalog override that mixes in + // `ViewCatalog` would also reach here). case ShowViews(ResolvedNamespace(catalog: ViewCatalog, ns, _), pattern, output) => ShowViewsExec(output, catalog, ns, pattern) :: Nil diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index 55eee1ee20d96..a54169dc2135b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.connector import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, NoSuchViewException, TableAlreadyExistsException, ViewAlreadyExistsException} -import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, RelationCatalog, StagedTable, StagingTableCatalog, Table, TableCatalog, TableChange, TableInfo, TableSummary, V1Table, ViewCatalog, ViewInfo} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, RelationCatalog, Table, TableCatalog, TableChange, TableInfo, TableSummary, V1Table, ViewCatalog, ViewInfo} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.StructType @@ -30,8 +30,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap * Tests for the view side of [[MetadataOnlyTable]]: view-text expansion on read, and * CREATE VIEW / ALTER VIEW ... AS going through the v2 write path * (`CreateV2ViewExec` / `AlterV2ViewExec`). View writes route through - * [[ViewCatalog#createView]] / [[ViewCatalog#replaceView]]; there is no separate staging - * variant for views (the `StagingTableCatalog` `stage*` API is table-only). + * [[ViewCatalog#createView]] / [[ViewCatalog#replaceView]]. * Data-source-table read paths live in * [[org.apache.spark.sql.connector.DataSourceV2MetadataOnlyTableSuite]]. * @@ -45,7 +44,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio import testImplicits._ override def sparkConf: SparkConf = super.sparkConf - .set("spark.sql.catalog.view_catalog", classOf[TestingViewCatalog].getName) + .set("spark.sql.catalog.view_catalog", classOf[TestingRelationCatalog].getName) // --- View read path ----------------------------------------------------- @@ -73,7 +72,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio // End-to-end coverage of the v2 encoder -> parser round-trip: test_unqualified_multi is a // view whose captured catalog+namespace is view_catalog.ns1.ns2 (two-part namespace) and // whose body references `t` unqualified. At read time the unqualified `t` must expand to - // view_catalog.ns1.ns2.t via the captured context -- which TestingViewCatalog resolves to + // view_catalog.ns1.ns2.t via the captured context -- which TestingRelationCatalog resolves to // its own `t` fixture at that namespace. checkAnswer( spark.table("view_catalog.outer_ns.test_unqualified_multi"), @@ -233,10 +232,10 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") sql("CREATE VIEW view_catalog.default.v_coll DEFAULT COLLATION UTF8_BINARY AS " + "SELECT col FROM spark_catalog.default.t") - // TestingViewCatalog stores the TableInfo verbatim, so the collation property is + // TestingRelationCatalog stores the TableInfo verbatim, so the collation property is // observable via the catalog-stored builder output. val catalog = spark.sessionState.catalogManager.catalog("view_catalog") - .asInstanceOf[TestingViewCatalog] + .asInstanceOf[TestingRelationCatalog] val info = catalog.getStoredView(Array("default"), "v_coll") assert(info.properties().get(TableCatalog.PROP_COLLATION) == "UTF8_BINARY") } @@ -259,7 +258,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio test("CREATE VIEW over a non-view table entry is rejected (plain TableCatalog)") { val catalog = spark.sessionState.catalogManager.catalog("view_catalog") - .asInstanceOf[TestingViewCatalog] + .asInstanceOf[TestingRelationCatalog] val tableIdent = Identifier.of(Array("default"), "v_existing_table") val tableInfo = new TableInfo.Builder() .withSchema(new StructType().add("col", "string")) @@ -297,90 +296,6 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } - // --- CREATE VIEW on a StagingTableCatalog ------------------------------- - - // The view exec routes everything through `ViewCatalog.createView` / `replaceView` regardless - // of whether the catalog also implements `StagingTableCatalog` -- views have no separate - // staging variant. These tests just confirm the view CRUD still works on a catalog that - // happens to mix in `StagingTableCatalog`; they do NOT exercise `stageCreate` / - // `stageCreateOrReplace` (which are table-only paths). - test("CREATE VIEW on a mixed StagingTableCatalog + ViewCatalog routes through createView") { - withSQLConf( - "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - - sql("CREATE VIEW staging_catalog.default.v_atomic AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 1") - checkAnswer( - spark.table("staging_catalog.default.v_atomic"), - Seq(Row(2), Row(3))) - - // Second CREATE without IF NOT EXISTS surfaces the viewAlreadyExists error from - // ViewCatalog.createView. - val ex = intercept[AnalysisException] { - sql("CREATE VIEW staging_catalog.default.v_atomic AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 1") - } - assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("already exists")) - - // CREATE OR REPLACE routes through ViewCatalog.replaceView. - sql("CREATE OR REPLACE VIEW staging_catalog.default.v_atomic AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 2") - checkAnswer(spark.table("staging_catalog.default.v_atomic"), Row(3)) - - // CREATE IF NOT EXISTS on an existing view -- no-op; the exec short-circuits on - // viewExists before buildViewInfo. - sql("CREATE VIEW IF NOT EXISTS staging_catalog.default.v_atomic AS " + - "SELECT x + 100 AS x FROM spark_catalog.default.t") - // Value unchanged -- IF NOT EXISTS was a no-op. - checkAnswer(spark.table("staging_catalog.default.v_atomic"), Row(3)) - } - } - } - - test("CREATE VIEW over a non-view table entry is rejected (mixed StagingTableCatalog)") { - withSQLConf( - "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { - val stagingCatalog = spark.sessionState.catalogManager.catalog("staging_catalog") - .asInstanceOf[TestingStagingCatalog] - val tableIdent = Identifier.of(Array("default"), "v_existing_table") - val tableInfo = new TableInfo.Builder() - .withSchema(new StructType().add("col", "string")) - .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) - .build() - stagingCatalog.createTable(tableIdent, tableInfo) - try { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - - // CREATE OR REPLACE VIEW must not silently destroy a non-view table. The exec's - // `rejectIfTable` short-circuits before any view-write call (no `stage*` involved -- - // views are written via `ViewCatalog.replaceView`, not the staging API). - val replaceEx = intercept[AnalysisException] { - sql("CREATE OR REPLACE VIEW staging_catalog.default.v_existing_table AS " + - "SELECT x FROM spark_catalog.default.t") - } - assert(replaceEx.getCondition == "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE") - - val createEx = intercept[AnalysisException] { - sql("CREATE VIEW staging_catalog.default.v_existing_table AS " + - "SELECT x FROM spark_catalog.default.t") - } - assert(createEx.getCondition == "TABLE_OR_VIEW_ALREADY_EXISTS") - - sql("CREATE VIEW IF NOT EXISTS staging_catalog.default.v_existing_table AS " + - "SELECT x FROM spark_catalog.default.t") - val loaded = stagingCatalog.loadTable(tableIdent).asInstanceOf[MetadataOnlyTable] - assert(loaded.getTableInfo.properties.get(TableCatalog.PROP_TABLE_TYPE) == - TableSummary.EXTERNAL_TABLE_TYPE) - } - } finally { - stagingCatalog.dropTable(tableIdent) - } - } - } - // --- ALTER VIEW --------------------------------------------------------- test("ALTER VIEW ... AS updates the view body on a v2 catalog") { @@ -459,7 +374,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio "SELECT x + 1 AS x FROM spark_catalog.default.t") val catalog = spark.sessionState.catalogManager.catalog("view_catalog") - .asInstanceOf[TestingViewCatalog] + .asInstanceOf[TestingRelationCatalog] val info = catalog.getStoredView(Array("default"), "v_preserve") assert(info.properties().get("mykey") == "myvalue") } @@ -472,7 +387,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio "SELECT x FROM spark_catalog.default.t") val catalog = spark.sessionState.catalogManager.catalog("view_catalog") - .asInstanceOf[TestingViewCatalog] + .asInstanceOf[TestingRelationCatalog] val info = catalog.getStoredView(Array("default"), "v_owner_create") // v2 CREATE VIEW stamps the current user into PROP_OWNER, matching v2 CREATE TABLE // (via CatalogV2Util.withDefaultOwnership) and v1 CREATE VIEW (via CatalogTable.owner's @@ -485,7 +400,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio test("ALTER VIEW preserves PROP_OWNER (v1-parity)") { val catalog = spark.sessionState.catalogManager.catalog("view_catalog") - .asInstanceOf[TestingViewCatalog] + .asInstanceOf[TestingRelationCatalog] val viewIdent = Identifier.of(Array("default"), "v_owner") // Pre-seed a view whose stored ViewInfo carries an explicit owner. val initialInfo = new ViewInfo.Builder() @@ -520,7 +435,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio "SELECT x + 1 AS x FROM spark_catalog.default.t") val catalog = spark.sessionState.catalogManager.catalog("view_catalog") - .asInstanceOf[TestingViewCatalog] + .asInstanceOf[TestingRelationCatalog] assert(catalog.getStoredView(Array("default"), "v_evo").schemaMode() == "EVOLUTION") } } @@ -533,7 +448,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio "SELECT col FROM spark_catalog.default.t") } val catalog = spark.sessionState.catalogManager.catalog("view_catalog") - .asInstanceOf[TestingViewCatalog] + .asInstanceOf[TestingRelationCatalog] assert(catalog.getStoredView(Array("default"), "v_configs") .sqlConfigs().get(SQLConf.ANSI_ENABLED.key) == "true") @@ -561,24 +476,6 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } - test("ALTER VIEW on a mixed StagingTableCatalog + ViewCatalog routes through replaceView") { - withSQLConf( - "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - sql("CREATE VIEW staging_catalog.default.v_atomic_alter AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 10") - checkAnswer(spark.table("staging_catalog.default.v_atomic_alter"), Seq.empty[Row]) - - sql("ALTER VIEW staging_catalog.default.v_atomic_alter AS " + - "SELECT x FROM spark_catalog.default.t WHERE x > 1") - checkAnswer( - spark.table("staging_catalog.default.v_atomic_alter"), - Seq(Row(2), Row(3))) - } - } - } - test("ALTER VIEW on a catalog without ViewCatalog fails with MISSING_CATALOG_ABILITY") { // ALTER VIEW's identifier is resolved via `UnresolvedView`, whose `viewOnly=true` path // in `Analyzer.lookupTableOrView` rejects non-ViewCatalog catalogs up front with the @@ -674,7 +571,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio // `unsupportedCreateOrReplaceViewOnTableError`. Pre-seed a non-view entry at a // multi-level-namespace identifier to exercise the rendering. val catalog = spark.sessionState.catalogManager.catalog("view_catalog") - .asInstanceOf[TestingViewCatalog] + .asInstanceOf[TestingRelationCatalog] val tblIdent = Identifier.of(Array("ns1", "inner"), "t_err") catalog.createTable( tblIdent, @@ -845,7 +742,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio test("DROP VIEW on a ViewCatalog drops the view") { val catalog = spark.sessionState.catalogManager.catalog("view_catalog") - .asInstanceOf[TestingViewCatalog] + .asInstanceOf[TestingRelationCatalog] withTable("spark_catalog.default.t") { Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") sql("CREATE VIEW view_catalog.default.v_drop AS " + @@ -867,7 +764,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio // `wrongCommandForObjectTypeError`. The v2 path must also refuse -- otherwise // `DROP VIEW view_catalog.default.` would silently destroy the table's entry. val catalog = spark.sessionState.catalogManager.catalog("view_catalog") - .asInstanceOf[TestingViewCatalog] + .asInstanceOf[TestingRelationCatalog] val tableIdent = Identifier.of(Array("default"), "t_not_a_view") catalog.createTable( tableIdent, @@ -887,22 +784,6 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio } } - test("DROP VIEW on a StagingTableCatalog drops the view") { - withSQLConf( - "spark.sql.catalog.staging_catalog" -> classOf[TestingStagingCatalog].getName) { - val catalog = spark.sessionState.catalogManager.catalog("staging_catalog") - .asInstanceOf[TestingStagingCatalog] - withTable("spark_catalog.default.t") { - Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") - sql("CREATE VIEW staging_catalog.default.v_drop_atomic AS " + - "SELECT x FROM spark_catalog.default.t") - assert(catalog.viewExists(Identifier.of(Array("default"), "v_drop_atomic"))) - sql("DROP VIEW staging_catalog.default.v_drop_atomic") - assert(!catalog.viewExists(Identifier.of(Array("default"), "v_drop_atomic"))) - } - } - } - test("DROP VIEW on a catalog without ViewCatalog is rejected") { withSQLConf( "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { @@ -918,7 +799,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio private def seedV2Table(name: String): Unit = { val catalog = spark.sessionState.catalogManager.catalog("view_catalog") - .asInstanceOf[TestingViewCatalog] + .asInstanceOf[TestingRelationCatalog] catalog.createTable( Identifier.of(Array("default"), name), new TableInfo.Builder() @@ -996,7 +877,7 @@ class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSessio * [[loadRelation]] returns either kind; [[loadTable]] is tables-only per the * [[TableCatalog#loadTable]] contract. */ -class TestingViewCatalog extends RelationCatalog { +class TestingRelationCatalog extends RelationCatalog { // Holds entries (views and tables) created via createTable / createView within the session. // Keyed by (namespace, name); the stored value's runtime type (ViewInfo vs TableInfo) @@ -1057,12 +938,9 @@ class TestingViewCatalog extends RelationCatalog { } override def createTable(ident: Identifier, info: TableInfo): Table = { - // Per the mixed-catalog contract: createTable must reject if the ident is already a view. - if (info.isInstanceOf[ViewInfo]) { - throw new IllegalStateException( - "TestingViewCatalog.createTable should not be called with a ViewInfo; views go through " + - "ViewCatalog.createView") - } + // Mixed-catalog contract: createTable rejects when a view sits at ident with + // TableAlreadyExistsException. The shared `createdViews` keyspace makes `putIfAbsent` + // throw uniformly for both table-at-ident and view-at-ident collisions. val key = (ident.namespace().toSeq, ident.name()) if (createdViews.putIfAbsent(key, info) != null) { throw new TableAlreadyExistsException(ident) @@ -1154,99 +1032,6 @@ class TestingViewCatalog extends RelationCatalog { override def name(): String = catalogName } -/** - * A minimal mixed [[StagingTableCatalog]] + [[RelationCatalog]]. View DDL routes through the - * ViewCatalog API (no separate staging variant for views in the new design). The staging - * methods cover table CTAS / RTAS only. - */ -class TestingStagingCatalog extends StagingTableCatalog with RelationCatalog { - - private val views = - new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() - - private def keyOf(ident: Identifier): (Seq[String], String) = - (ident.namespace().toSeq, ident.name()) - - override def loadRelation(ident: Identifier): Table = { - // loadTable, loadView, tableExists, viewExists derive from this via RelationCatalog defaults. - Option(views.get(keyOf(ident))).map(new MetadataOnlyTable(_, ident.toString)) - .getOrElse(throw new NoSuchTableException(ident)) - } - - override def createTable(ident: Identifier, info: TableInfo): Table = { - if (info.isInstanceOf[ViewInfo]) { - throw new IllegalStateException( - "TestingStagingCatalog.createTable should not be called with a ViewInfo") - } - if (views.putIfAbsent(keyOf(ident), info) != null) { - throw new TableAlreadyExistsException(ident) - } - new MetadataOnlyTable(info, ident.toString) - } - - // Staging methods are required by `StagingTableCatalog` but should never be invoked by view - // DDL (views write through `ViewCatalog.createView` / `replaceView`, not the staging API). - // Throwing here turns any accidental routing into a clear test failure. - override def stageCreate(ident: Identifier, info: TableInfo): StagedTable = - throw new RuntimeException("stageCreate must not be invoked by view DDL") - override def stageReplace(ident: Identifier, info: TableInfo): StagedTable = - throw new RuntimeException("stageReplace must not be invoked by view DDL") - override def stageCreateOrReplace(ident: Identifier, info: TableInfo): StagedTable = - throw new RuntimeException("stageCreateOrReplace must not be invoked by view DDL") - - override def alterTable(ident: Identifier, changes: TableChange*): Table = - throw new RuntimeException("shouldn't be called") - override def dropTable(ident: Identifier): Boolean = { - val v = views.get(keyOf(ident)) - if (v == null || v.isInstanceOf[ViewInfo]) return false - views.remove(keyOf(ident)) != null - } - override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = - throw new RuntimeException("shouldn't be called") - override def listTables(namespace: Array[String]): Array[Identifier] = Array.empty - - // ViewCatalog methods -- shared storage with the table side. - - override def listViews(namespace: Array[String]): Array[Identifier] = { - val targetNs = namespace.toSeq - val ids = new java.util.ArrayList[Identifier]() - views.forEach { (key, info) => - if (key._1 == targetNs && info.isInstanceOf[ViewInfo]) { - ids.add(Identifier.of(key._1.toArray, key._2)) - } - } - ids.toArray(new Array[Identifier](0)) - } - - override def createView(ident: Identifier, info: ViewInfo): ViewInfo = { - if (views.putIfAbsent(keyOf(ident), info) != null) { - throw new ViewAlreadyExistsException(ident) - } - info - } - - override def replaceView(ident: Identifier, info: ViewInfo): ViewInfo = { - val existing = views.get(keyOf(ident)) - if (existing == null || !existing.isInstanceOf[ViewInfo]) { - throw new NoSuchViewException(ident) - } - views.put(keyOf(ident), info) - info - } - - override def dropView(ident: Identifier): Boolean = { - val existing = views.get(keyOf(ident)) - if (existing == null || !existing.isInstanceOf[ViewInfo]) return false - views.remove(keyOf(ident)) != null - } - - private var catalogName = "" - override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { - catalogName = name - } - override def name(): String = catalogName -} - /** * A v2 catalog that does not implement ViewCatalog. Used by capability-gate tests: the gate * fires in `Analyzer.lookupTableOrView(viewOnly=true)` for ALTER VIEW and in From 9579c36a0a9afa262cd360625271774c46c869dd Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sun, 26 Apr 2026 16:50:08 +0000 Subject: [PATCH 50/59] DropTableExec: guard purgeTable behind tableExists for IF EXISTS no-op; minor typo Self-review findings: - DropTableExec.run: removing the upfront tableExists() probe regressed `DROP TABLE IF EXISTS X PURGE` on missing tables when the catalog does not override TableCatalog.purgeTable (the default impl throws unconditionally). Pre-PR was a clean no-op; post-PR threw UNSUPPORTED_FEATURE.PURGE_TABLE. Restore the existence guard for the purge path; the perf gain on plain DROP TABLE is preserved. Pin the no-op with a v2 DropTableSuite test. - DataSourceV2MetadataOnlyViewSuite: typo "name space" -> "namespace". Co-authored-by: Isaac --- .../datasources/v2/DropTableExec.scala | 26 +++++++++++++------ .../DataSourceV2MetadataOnlyViewSuite.scala | 2 +- .../execution/command/v2/DropTableSuite.scala | 7 +++++ 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala index 831b05149fc5c..1af2103fe6e23 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala @@ -26,12 +26,16 @@ import org.apache.spark.util.ArrayImplicits._ /** * Physical plan node for dropping a table. * - * Issues `dropTable` (or `purgeTable`) directly and inspects its return value; this saves the - * upfront `tableExists` probe (1 RPC on the happy path). On `false`, falls back to - * `viewExists` for catalogs that also implement [[ViewCatalog]] -- distinguishes - * "wrong type" from "missing" so a `DROP TABLE someView` on a mixed catalog surfaces the - * dedicated `EXPECT_TABLE_NOT_VIEW` error rather than a generic "table not found", - * matching the v1 `DropTableCommand(isView = false)` behavior. + * For plain DROP TABLE, calls `dropTable` directly and inspects its return value; this saves + * the upfront `tableExists` probe (1 RPC on the happy path). For DROP TABLE ... PURGE, keeps + * the upfront `tableExists` probe so `IF EXISTS` over a missing table is a clean no-op even + * on catalogs whose `purgeTable` is the default impl that throws unconditionally. + * + * On a `dropTable` returning false, falls back to `viewExists` for catalogs that also + * implement [[ViewCatalog]] -- distinguishes "wrong type" from "missing" so a + * `DROP TABLE someView` on a mixed catalog surfaces the dedicated `EXPECT_TABLE_NOT_VIEW` + * error rather than a generic "table not found", matching the v1 + * `DropTableCommand(isView = false)` behavior. */ case class DropTableExec( catalog: TableCatalog, @@ -41,8 +45,14 @@ case class DropTableExec( invalidateCache: () => Unit) extends LeafV2CommandExec { override def run(): Seq[InternalRow] = { - val dropped = - if (purge) catalog.purgeTable(ident) else catalog.dropTable(ident) + val dropped = if (purge) { + // Guard `purgeTable` behind `tableExists` so the default impl (which throws + // UNSUPPORTED_FEATURE.PURGE_TABLE unconditionally) doesn't fire for `IF EXISTS` over a + // missing table; the IF EXISTS contract should suppress "missing" cleanly here. + if (catalog.tableExists(ident)) catalog.purgeTable(ident) else false + } else { + catalog.dropTable(ident) + } if (dropped) { invalidateCache() } else { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala index a54169dc2135b..0851e6d2df765 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -927,7 +927,7 @@ class TestingRelationCatalog extends RelationCatalog { override def loadRelation(ident: Identifier): Table = { // Single-RPC perf path: returns tables AND views (as MetadataOnlyTable). Stored entries - // win over fixture views (the fixture name space is read-only and disjoint from + // win over fixture views (the fixture namespace is read-only and disjoint from // createdViews in practice). loadTable, loadView, tableExists, viewExists all derive // from this via the RelationCatalog default impls. val key = (ident.namespace().toSeq, ident.name()) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala index ffc2c6c679a8b..0e5cbb861d05d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala @@ -41,6 +41,13 @@ class DropTableSuite extends command.DropTableSuiteBase with CommandSuiteBase { } } + test("DROP TABLE IF EXISTS ... PURGE on a missing table is a no-op") { + // The default TableCatalog.purgeTable throws unconditionally, so without an upfront + // existence guard `IF EXISTS` would surface UNSUPPORTED_FEATURE.PURGE_TABLE for missing + // tables -- defeating the IF EXISTS contract on catalogs that do not support purge. + sql(s"DROP TABLE IF EXISTS $catalog.ns.never_existed PURGE") + } + test("table qualified with the session catalog name") { withSQLConf( V2_SESSION_CATALOG_IMPLEMENTATION.key -> classOf[InMemoryTableSessionCatalog].getName) { From d3bd0382b5afca235312ee49b90a9df111409b13 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 27 Apr 2026 02:35:27 +0000 Subject: [PATCH 51/59] DropTableExec: restore upfront tableExists for both purge and dropTable paths CI failures revealed two regressions caused by removing the upfront `tableExists` probe in DropTableExec: 1. `JDBCTableCatalog.dropTable` does not honor the v2 contract of returning false for missing tables -- e.g. Derby raises a SQLSyntaxErrorException, surfaced as `FAILED_JDBC.DROP_TABLE` instead of a clean no-op. (DerbyTableCatalogSuite cleanup after RENAME hits this.) 2. The hive `DropTableSuite.hive client calls` test counts the upfront `tableExists` call. Restore the original guard structure -- `tableExists` -> drop/purge if present, else fall through to the IF-EXISTS / view-fallback / not-found branches. This subsumes the previous purge-only fix and matches pre-PR behavior for `dropTable` while still adding the new `viewExists` fallback for the `EXPECT_TABLE_NOT_VIEW` error on mixed catalogs. Co-authored-by: Isaac --- .../datasources/v2/DropTableExec.scala | 28 +++++++------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala index 1af2103fe6e23..18e6a5eb86ac8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala @@ -26,16 +26,15 @@ import org.apache.spark.util.ArrayImplicits._ /** * Physical plan node for dropping a table. * - * For plain DROP TABLE, calls `dropTable` directly and inspects its return value; this saves - * the upfront `tableExists` probe (1 RPC on the happy path). For DROP TABLE ... PURGE, keeps - * the upfront `tableExists` probe so `IF EXISTS` over a missing table is a clean no-op even - * on catalogs whose `purgeTable` is the default impl that throws unconditionally. + * Probes `tableExists` upfront so `IF EXISTS` over a missing table is a clean no-op even + * on catalogs whose `dropTable` / `purgeTable` does not honor the "return false on missing" + * contract (e.g. JDBC catalogs that throw a SQL syntax error, or the default `purgeTable` + * that throws `UNSUPPORTED_FEATURE.PURGE_TABLE` unconditionally). * - * On a `dropTable` returning false, falls back to `viewExists` for catalogs that also - * implement [[ViewCatalog]] -- distinguishes "wrong type" from "missing" so a - * `DROP TABLE someView` on a mixed catalog surfaces the dedicated `EXPECT_TABLE_NOT_VIEW` - * error rather than a generic "table not found", matching the v1 - * `DropTableCommand(isView = false)` behavior. + * When the table is absent, falls back to `viewExists` for catalogs that also implement + * [[ViewCatalog]] -- distinguishes "wrong type" from "missing" so a `DROP TABLE someView` + * on a mixed catalog surfaces the dedicated `EXPECT_TABLE_NOT_VIEW` error rather than a + * generic "table not found", matching the v1 `DropTableCommand(isView = false)` behavior. */ case class DropTableExec( catalog: TableCatalog, @@ -45,16 +44,9 @@ case class DropTableExec( invalidateCache: () => Unit) extends LeafV2CommandExec { override def run(): Seq[InternalRow] = { - val dropped = if (purge) { - // Guard `purgeTable` behind `tableExists` so the default impl (which throws - // UNSUPPORTED_FEATURE.PURGE_TABLE unconditionally) doesn't fire for `IF EXISTS` over a - // missing table; the IF EXISTS contract should suppress "missing" cleanly here. - if (catalog.tableExists(ident)) catalog.purgeTable(ident) else false - } else { - catalog.dropTable(ident) - } - if (dropped) { + if (catalog.tableExists(ident)) { invalidateCache() + if (purge) catalog.purgeTable(ident) else catalog.dropTable(ident) } else { val nameParts = (catalog.name() +: ident.namespace() :+ ident.name()).toImmutableArraySeq From 45783b942e2e53d299eaf550850797068f17899a Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 27 Apr 2026 08:08:38 +0000 Subject: [PATCH 52/59] Fix scalastyle: wrap long Scaladoc/string lines in Analyzer and Catalogs Two lines exceeded the 100-char limit: - Analyzer.scala:1112 in the lookupTableOrView Scaladoc. - Catalogs.scala:123 in the validateRelationCatalog error message. These cascaded into 7 CI failures (Maven Java 17/25, linters, docs, sparkr, Docker integration, TPC-DS) all rooted in `(catalyst / scalaStyleOnCompile) Failing because of negative scalastyle result`. Co-authored-by: Isaac --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 6 +++--- .../org/apache/spark/sql/connector/catalog/Catalogs.scala | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 060f479e5b5e6..850b34a2743d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1109,9 +1109,9 @@ class Analyzer( * so surfacing a downstream "view not found" would hide the real reason. * * Lookup order against a non-session catalog: - * 1. If the catalog is a [[RelationCatalog]], [[RelationCatalog.loadRelation]] is called once. - * A returned [[MetadataOnlyTable]] wrapping a [[ViewInfo]] is interpreted as a view; - * other results are tables. + * 1. If the catalog is a [[RelationCatalog]], [[RelationCatalog.loadRelation]] is called + * once. A returned [[MetadataOnlyTable]] wrapping a [[ViewInfo]] is interpreted as a + * view; other results are tables. * 2. Otherwise, [[TableCatalog.loadTable]] is tried (when implemented), then * [[ViewCatalog.loadView]] as the fallback view-resolution path (when implemented). */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala index c9a87c6e88c2a..03addeb170697 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala @@ -120,8 +120,9 @@ private[sql] object Catalogs { throw new IllegalArgumentException( s"Catalog '$name' (${plugin.getClass.getName}) implements both TableCatalog and " + s"ViewCatalog directly. Catalogs that expose both tables and views must implement " + - s"RelationCatalog instead, which centralizes the cross-cutting rules (shared identifier " + - s"namespace, cross-type collision rejection, single-RPC perf entry points).") + s"RelationCatalog instead, which centralizes the cross-cutting rules (shared " + + s"identifier namespace, cross-type collision rejection, single-RPC perf entry " + + s"points).") } } } From f680aa2b8259e7d5223bd6cc11778d0f3f662722 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 27 Apr 2026 12:54:15 +0000 Subject: [PATCH 53/59] RelationCatalog: fix @inheritDoc Javadoc tag (block -> inline form) `@inheritDoc` is only valid as an inline tag (`{@inheritDoc}`); using it as a block tag is undefined. RelationCatalog had it written as the block form on the four `default` overrides (loadTable, loadView, tableExists, viewExists), which is the only such occurrence in the entire Spark Java tree -- every other file uses `{@inheritDoc}`. Suspected trigger of the recent unidoc failures: javadoc 17 dies in "Building tree" with no per-class diagnostic when this pattern combines with default-method overrides on an interface that inherits from two parents. Surface symptom: `javadoc exited with exit code 1` after "Building tree for all the packages and classes...", before any per-class HTML generation begins (so SPARK-56630's diagnostic helper correctly reports "no class HTML generation was in progress"). Co-authored-by: Isaac --- .../spark/sql/connector/catalog/RelationCatalog.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java index 4f4b4e9b3955f..8a0d6a1d8807d 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java @@ -160,7 +160,7 @@ default TableSummary[] listRelationSummaries(String[] namespace) } /** - * @inheritDoc + * {@inheritDoc} *

    * The default implementation derives from {@link #loadRelation}: a {@link MetadataOnlyTable} * wrapping a {@link ViewInfo} is rejected as not-a-table; anything else is returned. Override @@ -176,7 +176,7 @@ default Table loadTable(Identifier ident) throws NoSuchTableException { } /** - * @inheritDoc + * {@inheritDoc} *

    * The default implementation derives from {@link #loadRelation}: a {@link MetadataOnlyTable} * wrapping a {@link ViewInfo} is unwrapped and returned; anything else (table or absent) is @@ -198,7 +198,7 @@ default ViewInfo loadView(Identifier ident) throws NoSuchViewException { } /** - * @inheritDoc + * {@inheritDoc} *

    * The default implementation derives from {@link #loadRelation}: returns {@code true} only if * the entry exists and is not a view. Override only if a cheaper existence-check path exists. @@ -214,7 +214,7 @@ default boolean tableExists(Identifier ident) { } /** - * @inheritDoc + * {@inheritDoc} *

    * The default implementation derives from {@link #loadRelation}: returns {@code true} only if * the entry exists and is a view. Override only if a cheaper existence-check path exists. From 7d833a0b35cec7a25c726bd2bd159ab44a08b9bf Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 27 Apr 2026 16:55:39 +0000 Subject: [PATCH 54/59] RelationCatalog: replace {@inheritDoc} on default-method overrides with explicit prose The {@inheritDoc} -> block-form fix unblocked the obvious invalid-tag case, but doc gen still fails in the same "Building tree" phase. Hypothesis: {@inheritDoc} on a default-method override of a default method (the parent override is itself default in TableCatalog/ViewCatalog) on a multi-parent interface still trips javadoc 17's tree builder, even with the inline form. Replace each of the four overrides' {@inheritDoc} with explicit @param / @return / @throws so the doclet has no inheritance link to traverse. The descriptive text below {@inheritDoc} is preserved verbatim. Behavioral equivalent to inheriting + augmenting; just no tag chain. If this push turns the docs job green, the {@inheritDoc} chain on multi- parent default-method overrides is the trigger and we should keep this shape; if it stays red, the trigger is elsewhere and we'll bisect further. Co-authored-by: Isaac --- .../connector/catalog/RelationCatalog.java | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java index 8a0d6a1d8807d..ec0664d0cf37c 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java @@ -160,11 +160,15 @@ default TableSummary[] listRelationSummaries(String[] namespace) } /** - * {@inheritDoc} + * Load a table by identifier. *

    * The default implementation derives from {@link #loadRelation}: a {@link MetadataOnlyTable} * wrapping a {@link ViewInfo} is rejected as not-a-table; anything else is returned. Override * only if a tables-only path is materially cheaper than the unified one. + * + * @param ident a table identifier + * @return the table's metadata + * @throws NoSuchTableException if no table exists at {@code ident} */ @Override default Table loadTable(Identifier ident) throws NoSuchTableException { @@ -176,12 +180,16 @@ default Table loadTable(Identifier ident) throws NoSuchTableException { } /** - * {@inheritDoc} + * Load a view by identifier. *

    * The default implementation derives from {@link #loadRelation}: a {@link MetadataOnlyTable} * wrapping a {@link ViewInfo} is unwrapped and returned; anything else (table or absent) is * surfaced as {@link NoSuchViewException}. Override only if a views-only path is materially * cheaper than the unified one. + * + * @param ident a view identifier + * @return the view's metadata + * @throws NoSuchViewException if no view exists at {@code ident} */ @Override default ViewInfo loadView(Identifier ident) throws NoSuchViewException { @@ -198,10 +206,13 @@ default ViewInfo loadView(Identifier ident) throws NoSuchViewException { } /** - * {@inheritDoc} + * Test whether a table exists at the given identifier. *

    * The default implementation derives from {@link #loadRelation}: returns {@code true} only if * the entry exists and is not a view. Override only if a cheaper existence-check path exists. + * + * @param ident a table identifier + * @return {@code true} if a table exists at {@code ident}, {@code false} otherwise */ @Override default boolean tableExists(Identifier ident) { @@ -214,10 +225,13 @@ default boolean tableExists(Identifier ident) { } /** - * {@inheritDoc} + * Test whether a view exists at the given identifier. *

    * The default implementation derives from {@link #loadRelation}: returns {@code true} only if * the entry exists and is a view. Override only if a cheaper existence-check path exists. + * + * @param ident a view identifier + * @return {@code true} if a view exists at {@code ident}, {@code false} otherwise */ @Override default boolean viewExists(Identifier ident) { From 8adcf25c4ec1d4b9438f66bf60534cb07eabfbe8 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 27 Apr 2026 18:42:46 +0000 Subject: [PATCH 55/59] DEBUG: force JVM exception logging in javadoc to surface silent tree-build crash The unidoc failure on this PR has the same shape across every commit: Building tree for all the packages and classes... javadoc exited with exit code 1 No stack trace, no NPE keyword, no class name. Both the block-form and inline {@inheritDoc} hypotheses turned out wrong (the block-fix and the explicit-prose replacement both kept the failure). Master + this PR have identical sets of source errors before "Building tree", so the difference must be a runtime failure inside the doclet itself when traversing the new public types. Add `-J-Xlog:exceptions=info` (java 11+) to the unidoc javadoc options so any uncaught Throwable from the standard doclet's tree builder is dumped to stderr. On the next CI run, this will show whether the crash is an NPE, an IllegalStateException, or something else, and where it came from. Will revert once we have a class/line to fix. Co-authored-by: Isaac --- project/SparkBuild.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 866a535c6d951..b4be85e14a73f 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1699,7 +1699,12 @@ object Unidoc { "-tag", "todo:X", "-tag", "groupname:X", "-tag", "inheritdoc", - "--ignore-source-errors", "-notree" + "--ignore-source-errors", "-notree", + // DEBUG (revert before merge): force JVM-level exception logging so + // any NPE / RuntimeException in the doclet's tree builder shows a + // stack trace; today the failure is silent ("Building tree -> exit 1" + // with no further output), which leaves no actionable signal. + "-J-Xlog:exceptions=info" ) }, From 1af03b09ac337b6bb201e6cedb3cf9501d519008 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 28 Apr 2026 00:25:50 +0000 Subject: [PATCH 56/59] DEBUG: also log class loads to identify the symbol failing javac completion Prior debug run (-J-Xlog:exceptions=info) revealed the unidoc failure is NOT a silent doclet crash. Sequence: [6.604s][info][exceptions] CompletionFailure thrown in ClassFinder.fillIn(ClassSymbol) at bci 320 ... same Throwable address (0x0000000706748b50) re-thrown ~938k times across ClassFinder.complete / Resolve.findIdentInPackageInternal / ClassFinder.loadClass for ~46 seconds ... [52.401s] MissingResourceException: Can't find resource for bundle com.sun.tools.javac.resources.compiler, key main.error javadoc exited with exit code 1 So a single class symbol our PR's source set references cannot be completed by javac, sending it into a 46-second resolution loop until final error formatting fails on a missing bundle key. To find WHICH symbol, also enable `-Xlog:class+load=info` -- the last class load before the CompletionFailure storm at 6.6s should pinpoint the trigger. Will revert once the trigger is identified. Co-authored-by: Isaac --- project/SparkBuild.scala | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index b4be85e14a73f..5f4001b271bde 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1700,11 +1700,14 @@ object Unidoc { "-tag", "groupname:X", "-tag", "inheritdoc", "--ignore-source-errors", "-notree", - // DEBUG (revert before merge): force JVM-level exception logging so - // any NPE / RuntimeException in the doclet's tree builder shows a - // stack trace; today the failure is silent ("Building tree -> exit 1" - // with no further output), which leaves no actionable signal. - "-J-Xlog:exceptions=info" + // DEBUG (revert before merge): the prior `exceptions=info` run revealed + // that javadoc isn't dying on an uncaught Throwable -- it's javac's + // ClassFinder spinning on a CompletionFailure (same Throwable address + // re-thrown ~938k times in 46s) before MissingResourceException kills + // the process. To identify WHICH class can't be completed, also log + // every class load. The last class loaded before the + // CompletionFailure storm at ~6.6s is almost certainly the trigger. + "-J-Xlog:exceptions=info,class+load=info" ) }, From 7ce564b148a9eda0aa68ddf0274f0d9e3df60f78 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 28 Apr 2026 03:17:25 +0000 Subject: [PATCH 57/59] DEBUG: switch to javadoc -verbose to see user class file reads The previous `-Xlog:class+load=info` only captured JVM-level class loads, which are all JDK internals. javadoc reads user .java/.class files via its own `JavacFileManager`, bypassing the JVM class loader entirely -- so no user class shows up in `class+load`. javadoc's own `-verbose` flag emits "[parsing X.java]" / "[loading Y.class]" lines for those reads. Combined with `-Xlog:exceptions=info`, we should see the last `[loading ...]` or `[parsing ...]` immediately before the CompletionFailure storm starts at ~6.5s -- that's the file/class javac couldn't complete. Will revert once the trigger is identified. Co-authored-by: Isaac --- project/SparkBuild.scala | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 5f4001b271bde..c9944be859f01 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1700,14 +1700,16 @@ object Unidoc { "-tag", "groupname:X", "-tag", "inheritdoc", "--ignore-source-errors", "-notree", - // DEBUG (revert before merge): the prior `exceptions=info` run revealed - // that javadoc isn't dying on an uncaught Throwable -- it's javac's - // ClassFinder spinning on a CompletionFailure (same Throwable address - // re-thrown ~938k times in 46s) before MissingResourceException kills - // the process. To identify WHICH class can't be completed, also log - // every class load. The last class loaded before the - // CompletionFailure storm at ~6.6s is almost certainly the trigger. - "-J-Xlog:exceptions=info,class+load=info" + // DEBUG (revert before merge): javadoc's own `-verbose` mode emits + // "[parsing X.java]" and "[loading Y.class]" messages, which the + // prior `-J-Xlog:class+load=info` did NOT (the JVM logger only sees + // bytecode loads via the class loader, not javadoc's own .java/.class + // reads via JavacFileManager). Combined with `-Xlog:exceptions=info`, + // the last `[loading ...]` or `[parsing ...]` line before the + // CompletionFailure storm at ~6.5s should pinpoint the user class + // that javac cannot resolve. + "-verbose", + "-J-Xlog:exceptions=info" ) }, From df73ebaf83f351729befefa1196cf395dda2f4b0 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 28 Apr 2026 04:34:47 +0000 Subject: [PATCH 58/59] RelationCatalog: fix javadoc heading sequence (h3 -> h2); drop debug instrumentation The unidoc failure on this PR turned out to be a single doclint error: RelationCatalog.java:36: error: heading used out of sequence:

    , compared to implicit preceding heading:

    javadoc 17 enforces sequential HTML headings starting from

    (the implicit class title). Our class-level Javadoc jumped straight to

    for the three top-level sections ("Two principles", "Per-method cross-type behavior", "Single-RPC perf entry points"), which doclint rejects with `--ignore-source- errors` having no effect on doclint checks. The doclet generated all per-class HTML and index pages successfully, but javadoc still exited 1 because of this single doclint error -- which is what sbt-unidoc reported as "javadoc exited with exit code 1". The CompletionFailure storm we chased through several debug commits is unrelated noise emitted by a separate JVM that runs after the main javadoc completes; nothing in our PR's source set was actually un-completable. Promote the three

    tags to

    (the correct level beneath the class

    ) and revert the three debug-instrumentation pushes to javacOptions (`-J-Xlog:exceptions=info`, then `class+load=info`, then `-verbose`). Co-authored-by: Isaac --- project/SparkBuild.scala | 12 +----------- .../spark/sql/connector/catalog/RelationCatalog.java | 6 +++--- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index c9944be859f01..866a535c6d951 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1699,17 +1699,7 @@ object Unidoc { "-tag", "todo:X", "-tag", "groupname:X", "-tag", "inheritdoc", - "--ignore-source-errors", "-notree", - // DEBUG (revert before merge): javadoc's own `-verbose` mode emits - // "[parsing X.java]" and "[loading Y.class]" messages, which the - // prior `-J-Xlog:class+load=info` did NOT (the JVM logger only sees - // bytecode loads via the class loader, not javadoc's own .java/.class - // reads via JavacFileManager). Combined with `-Xlog:exceptions=info`, - // the last `[loading ...]` or `[parsing ...]` line before the - // CompletionFailure storm at ~6.5s should pinpoint the user class - // that javac cannot resolve. - "-verbose", - "-J-Xlog:exceptions=info" + "--ignore-source-errors", "-notree" ) }, diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java index ec0664d0cf37c..a0e2ddee6408e 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java @@ -33,7 +33,7 @@ * tables implement just {@link TableCatalog}; connectors that expose only views implement just * {@link ViewCatalog}; this interface is not relevant to them. * - *

    Two principles

    + *

    Two principles

    * * A {@code RelationCatalog} follows two rules that, taken together, define every cross-cutting * subtlety: @@ -50,7 +50,7 @@ * discriminator. * * - *

    Per-method cross-type behavior

    + *

    Per-method cross-type behavior

    * * Active rejection (write-side methods that throw on cross-type collision): *
    @@ -94,7 +94,7 @@ * *
    {@link ViewCatalog#listViews}views only
    * - *

    Single-RPC perf entry points

    + *

    Single-RPC perf entry points

    * * The orthogonal {@link TableCatalog} and {@link ViewCatalog} answer two cross-cutting * questions in two round trips each. {@code RelationCatalog} adds dedicated methods so a From 57af4e1ff155ff2be8d10037e1da094375a7e8d9 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 28 Apr 2026 04:57:10 +0000 Subject: [PATCH 59/59] RelationCatalog: restore {@inheritDoc} on default-method overrides The earlier `7d833a0b35c` commit replaced inline `{@inheritDoc}` with explicit @param/@return/@throws prose on the four default-method overrides (loadTable, loadView, tableExists, viewExists), based on the hypothesis that `{@inheritDoc}` on multi-parent default-method overrides was crashing javadoc 17's tree builder. That hypothesis turned out to be wrong -- the actual unidoc-failure trigger was an unrelated `

    ` heading-sequence violation in the same file (fixed in `df73ebaf83f`). With the real cause identified, the verbose prose is unnecessary; restore the concise inline `{@inheritDoc}` form so the parent javadoc is properly inherited and the override docs only state what the override adds. The earlier block-form `@inheritDoc` -> inline `{@inheritDoc}` correctness fix in `f680aa2b8259` stays -- that one was a real syntax fix (block-form is invalid Javadoc and produces no inherited content); only the second hypothesis-driven change is being reverted. Co-authored-by: Isaac --- .../connector/catalog/RelationCatalog.java | 22 ++++--------------- 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java index a0e2ddee6408e..bb674faa10ac5 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java @@ -160,15 +160,11 @@ default TableSummary[] listRelationSummaries(String[] namespace) } /** - * Load a table by identifier. + * {@inheritDoc} *

    * The default implementation derives from {@link #loadRelation}: a {@link MetadataOnlyTable} * wrapping a {@link ViewInfo} is rejected as not-a-table; anything else is returned. Override * only if a tables-only path is materially cheaper than the unified one. - * - * @param ident a table identifier - * @return the table's metadata - * @throws NoSuchTableException if no table exists at {@code ident} */ @Override default Table loadTable(Identifier ident) throws NoSuchTableException { @@ -180,16 +176,12 @@ default Table loadTable(Identifier ident) throws NoSuchTableException { } /** - * Load a view by identifier. + * {@inheritDoc} *

    * The default implementation derives from {@link #loadRelation}: a {@link MetadataOnlyTable} * wrapping a {@link ViewInfo} is unwrapped and returned; anything else (table or absent) is * surfaced as {@link NoSuchViewException}. Override only if a views-only path is materially * cheaper than the unified one. - * - * @param ident a view identifier - * @return the view's metadata - * @throws NoSuchViewException if no view exists at {@code ident} */ @Override default ViewInfo loadView(Identifier ident) throws NoSuchViewException { @@ -206,13 +198,10 @@ default ViewInfo loadView(Identifier ident) throws NoSuchViewException { } /** - * Test whether a table exists at the given identifier. + * {@inheritDoc} *

    * The default implementation derives from {@link #loadRelation}: returns {@code true} only if * the entry exists and is not a view. Override only if a cheaper existence-check path exists. - * - * @param ident a table identifier - * @return {@code true} if a table exists at {@code ident}, {@code false} otherwise */ @Override default boolean tableExists(Identifier ident) { @@ -225,13 +214,10 @@ default boolean tableExists(Identifier ident) { } /** - * Test whether a view exists at the given identifier. + * {@inheritDoc} *

    * The default implementation derives from {@link #loadRelation}: returns {@code true} only if * the entry exists and is a view. Override only if a cheaper existence-check path exists. - * - * @param ident a view identifier - * @return {@code true} if a view exists at {@code ident}, {@code false} otherwise */ @Override default boolean viewExists(Identifier ident) {