diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java new file mode 100644 index 0000000000000..b20a9b566646f --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataOnlyTable.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.catalog; + +import java.util.Collections; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.connector.catalog.constraints.Constraint; +import org.apache.spark.sql.connector.expressions.Transform; + +/** + * A concrete {@code Table} implementation that contains only table metadata, deferring + * read/write to Spark. It represents a general Spark data source table or a Spark view; + * Spark resolves the table provider into a data source (for tables) or expands the view text + * (for views) at read time. + *

+ * Catalogs build the metadata via {@link TableInfo.Builder} (for data-source tables) or + * {@link ViewInfo.Builder} (for views). A {@code MetadataOnlyTable} wrapping a + * {@link TableInfo} can be returned from {@link TableCatalog#loadTable(Identifier)} for a + * data-source table; a {@code MetadataOnlyTable} wrapping a {@link ViewInfo} can be returned + * from {@link RelationCatalog#loadRelation(Identifier)} as the single-RPC perf opt-in for a view. + * Downstream consumers distinguish the two by checking + * {@code getTableInfo() instanceof ViewInfo}. + * + * @since 4.2.0 + */ +@Evolving +public class MetadataOnlyTable implements Table { + private final TableInfo info; + private final String name; + + /** + * @param info metadata for the table or view. Pass a {@link ViewInfo} for a view. + * @param name human-readable name for this table, used by places that read {@link #name()} + * (e.g. the {@code Name} row of {@code DESCRIBE TABLE EXTENDED}). Catalogs + * returning a {@code MetadataOnlyTable} from {@link TableCatalog#loadTable} or + * {@link RelationCatalog#loadRelation} should typically pass + * {@code ident.toString()}, matching the quoted multi-part form used elsewhere + * for v2 identifiers. + */ + public MetadataOnlyTable(TableInfo info, String name) { + this.info = Objects.requireNonNull(info, "info should not be null"); + this.name = Objects.requireNonNull(name, "name should not be null"); + } + + public TableInfo getTableInfo() { + return info; + } + + @Override + public Column[] columns() { + return info.columns(); + } + + @Override + public Map properties() { + return Collections.unmodifiableMap(info.properties()); + } + + @Override + public Transform[] partitioning() { + return info.partitions(); + } + + @Override + public Constraint[] constraints() { + return info.constraints(); + } + + @Override + public String name() { + return name; + } + + @Override + public Set capabilities() { + return Set.of(); + } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java new file mode 100644 index 0000000000000..bb674faa10ac5 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/RelationCatalog.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connector.catalog; + +import java.util.ArrayList; + +import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; + +/** + * Catalog API for connectors that expose both tables and views in a single shared identifier + * namespace. + *

+ * Connectors that expose both tables and views must implement {@code RelationCatalog}; + * implementing {@link TableCatalog} and {@link ViewCatalog} directly without + * {@code RelationCatalog} is rejected at catalog initialization. Connectors that expose only + * tables implement just {@link TableCatalog}; connectors that expose only views implement just + * {@link ViewCatalog}; this interface is not relevant to them. + * + *

Two principles

+ * + * A {@code RelationCatalog} follows two rules that, taken together, define every cross-cutting + * subtlety: + *
    + *
  1. Orthogonal interfaces. Every {@link TableCatalog} method behaves as if views did + * not exist, and every {@link ViewCatalog} method behaves as if tables did not exist. + * From the perspective of a {@code TableCatalog} caller, a view at an identifier is + * indistinguishable from "nothing there"; symmetrically for {@code ViewCatalog} on + * tables. The implementation, of course, knows about both kinds -- it just filters them + * apart at each method boundary.
  2. + *
  3. Single identifier namespace. Tables and views share one keyspace within a + * namespace; the same {@link Identifier} cannot resolve to both at the same time. The + * implementation typically enforces this with a single backing keyspace plus a kind + * discriminator.
  4. + *
+ * + *

Per-method cross-type behavior

+ * + * Active rejection (write-side methods that throw on cross-type collision): + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Cross-type rejection
MethodRejects whenThrows
{@link TableCatalog#createTable}a view sits at {@code ident}{@link org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException}
{@link TableCatalog#renameTable}a view sits at {@code newIdent}{@link org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException}
{@link ViewCatalog#createView}a table sits at {@code ident}{@link org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException}
{@link ViewCatalog#createOrReplaceView}a table sits at {@code ident}{@link org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException}
{@link ViewCatalog#replaceView}a table sits at {@code ident}{@link org.apache.spark.sql.catalyst.analysis.NoSuchViewException}
+ * + * Passive filtering (read / non-collision mutation methods that behave as if the wrong + * kind doesn't exist): + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Cross-type filtering
MethodOn wrong-kind ident
{@link TableCatalog#loadTable(Identifier)}throws {@code NoSuchTableException} for a view
{@link TableCatalog#loadTable(Identifier, String)} / + * {@link TableCatalog#loadTable(Identifier, long)}throws {@code NoSuchTableException} for a view (no perf opt-in -- time-travel does + * not apply to views)
{@link TableCatalog#tableExists}returns {@code false} for a view
{@link TableCatalog#dropTable} / {@link TableCatalog#purgeTable}returns {@code false} for a view; does not drop it
{@link TableCatalog#renameTable}throws {@code NoSuchTableException} when the source is a view
{@link TableCatalog#listTables}tables only
{@link ViewCatalog#loadView}throws {@code NoSuchViewException} for a table
{@link ViewCatalog#viewExists}returns {@code false} for a table
{@link ViewCatalog#dropView}returns {@code false} for a table; does not drop it
{@link ViewCatalog#listViews}views only
+ * + *

Single-RPC perf entry points

+ * + * The orthogonal {@link TableCatalog} and {@link ViewCatalog} answer two cross-cutting + * questions in two round trips each. {@code RelationCatalog} adds dedicated methods so a + * catalog can answer both in one round trip: + * + * + * @since 4.2.0 + */ +@Evolving +public interface RelationCatalog extends TableCatalog, ViewCatalog { + + /** + * Load metadata for an identifier that may resolve to either a table or a view. + *

+ * For a table, returns the table's {@link Table}. For a view, returns a + * {@link MetadataOnlyTable} wrapping a {@link ViewInfo}; callers discriminate via + * {@code getTableInfo() instanceof ViewInfo}. This lets the resolver answer in a single RPC + * instead of falling back from {@link TableCatalog#loadTable} to {@link ViewCatalog#loadView}. + * + * @param ident the identifier + * @return a {@link Table} for tables, or a {@link MetadataOnlyTable} wrapping a + * {@link ViewInfo} for views + * @throws NoSuchTableException if neither a table nor a view exists at {@code ident} + */ + Table loadRelation(Identifier ident) throws NoSuchTableException; + + /** + * List the tables and views in a namespace, returned as {@link TableSummary} entries with + * the kind preserved on each summary. + *

+ * The default implementation enumerates via {@link TableCatalog#listTableSummaries} for + * tables and {@link ViewCatalog#listViews} for views (two round trips). Catalogs that can + * fetch the unified listing in a single round trip should override. + * + * @param namespace a multi-part namespace + * @return an array of summaries for both tables and views in the namespace + * @throws NoSuchNamespaceException if the namespace does not exist (optional) + * @throws NoSuchTableException if a table listed by the underlying enumeration disappears + * before its summary can be assembled (default impl only) + */ + default TableSummary[] listRelationSummaries(String[] namespace) + throws NoSuchNamespaceException, NoSuchTableException { + TableSummary[] tableSummaries = listTableSummaries(namespace); + Identifier[] viewIdentifiers = listViews(namespace); + ArrayList all = new ArrayList<>( + tableSummaries.length + viewIdentifiers.length); + for (TableSummary s : tableSummaries) { + all.add(s); + } + for (Identifier id : viewIdentifiers) { + all.add(TableSummary.of(id, TableSummary.VIEW_TABLE_TYPE)); + } + return all.toArray(TableSummary[]::new); + } + + /** + * {@inheritDoc} + *

+ * The default implementation derives from {@link #loadRelation}: a {@link MetadataOnlyTable} + * wrapping a {@link ViewInfo} is rejected as not-a-table; anything else is returned. Override + * only if a tables-only path is materially cheaper than the unified one. + */ + @Override + default Table loadTable(Identifier ident) throws NoSuchTableException { + Table t = loadRelation(ident); + if (t instanceof MetadataOnlyTable mot && mot.getTableInfo() instanceof ViewInfo) { + throw new NoSuchTableException(ident); + } + return t; + } + + /** + * {@inheritDoc} + *

+ * The default implementation derives from {@link #loadRelation}: a {@link MetadataOnlyTable} + * wrapping a {@link ViewInfo} is unwrapped and returned; anything else (table or absent) is + * surfaced as {@link NoSuchViewException}. Override only if a views-only path is materially + * cheaper than the unified one. + */ + @Override + default ViewInfo loadView(Identifier ident) throws NoSuchViewException { + Table t; + try { + t = loadRelation(ident); + } catch (NoSuchTableException e) { + throw new NoSuchViewException(ident); + } + if (t instanceof MetadataOnlyTable mot && mot.getTableInfo() instanceof ViewInfo vi) { + return vi; + } + throw new NoSuchViewException(ident); + } + + /** + * {@inheritDoc} + *

+ * The default implementation derives from {@link #loadRelation}: returns {@code true} only if + * the entry exists and is not a view. Override only if a cheaper existence-check path exists. + */ + @Override + default boolean tableExists(Identifier ident) { + try { + Table t = loadRelation(ident); + return !(t instanceof MetadataOnlyTable mot && mot.getTableInfo() instanceof ViewInfo); + } catch (NoSuchTableException e) { + return false; + } + } + + /** + * {@inheritDoc} + *

+ * The default implementation derives from {@link #loadRelation}: returns {@code true} only if + * the entry exists and is a view. Override only if a cheaper existence-check path exists. + */ + @Override + default boolean viewExists(Identifier ident) { + try { + Table t = loadRelation(ident); + return t instanceof MetadataOnlyTable mot && mot.getTableInfo() instanceof ViewInfo; + } catch (NoSuchTableException e) { + return false; + } + } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index d5a36cd8bfb86..55894357f19d1 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -31,13 +31,18 @@ import java.util.Set; /** - * Catalog methods for working with Tables. + * Catalog API for connectors that expose tables. + *

+ * Connectors that expose only tables implement this interface. Connectors that expose + * both tables and views must implement {@link RelationCatalog} (which extends both this + * interface and {@link ViewCatalog} and adds the cross-cutting contract for the combined + * case); the methods on this interface remain table-only -- they do not interact with views. *

* TableCatalog implementations may be case-sensitive or case-insensitive. Spark will pass * {@link Identifier table identifiers} without modification. Field names passed to - * {@link #alterTable(Identifier, TableChange...)} will be normalized to match the case used in the - * table schema when updating, renaming, or dropping existing columns when catalyst analysis is - * case-insensitive. + * {@link #alterTable(Identifier, TableChange...)} will be normalized to match the case used in + * the table schema when updating, renaming, or dropping existing columns when catalyst + * analysis is case-insensitive. * * @since 3.0.0 */ @@ -99,8 +104,6 @@ public interface TableCatalog extends CatalogPlugin { /** * List the tables in a namespace from the catalog. - *

- * If the catalog supports views, this must return identifiers for only tables and not views. * * @param namespace a multi-part namespace * @return an array of Identifiers for tables @@ -111,11 +114,14 @@ public interface TableCatalog extends CatalogPlugin { /** * List the table summaries in a namespace from the catalog. *

- * This method should return all tables entities from a catalog regardless of type (i.e. views - * should be listed as well). + * Returns one summary per entry returned by {@link #listTables}. Each {@link TableSummary} + * carries the entry's {@code tableType}. + *

+ * The default implementation enumerates via {@link #listTables} + {@link #loadTable}. + * Catalogs that can fetch summaries in a single round-trip should override. * * @param namespace a multi-part namespace - * @return an array of Identifiers for tables + * @return an array of summaries for tables in the namespace * @throws NoSuchNamespaceException If the namespace does not exist (optional). * @throws NoSuchTableException If certain table listed by listTables API does not exist. */ @@ -139,27 +145,21 @@ default TableSummary[] listTableSummaries(String[] namespace) /** * Load table metadata by {@link Identifier identifier} from the catalog. - *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must throw {@link NoSuchTableException}. * * @param ident a table identifier * @return the table's metadata - * @throws NoSuchTableException If the table doesn't exist or is a view + * @throws NoSuchTableException If the table doesn't exist */ Table loadTable(Identifier ident) throws NoSuchTableException; /** * Load table metadata by {@link Identifier identifier} from the catalog. Spark will write data * into this table later. - *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must throw {@link NoSuchTableException}. * * @param ident a table identifier * @param writePrivileges * @return the table's metadata - * @throws NoSuchTableException If the table doesn't exist or is a view + * @throws NoSuchTableException If the table doesn't exist * * @since 3.5.3 */ @@ -171,14 +171,11 @@ default Table loadTable( /** * Load table metadata of a specific version by {@link Identifier identifier} from the catalog. - *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must throw {@link NoSuchTableException}. * * @param ident a table identifier * @param version version of the table * @return the table's metadata - * @throws NoSuchTableException If the table doesn't exist or is a view + * @throws NoSuchTableException If the table doesn't exist */ default Table loadTable(Identifier ident, String version) throws NoSuchTableException { throw QueryCompilationErrors.noSuchTableError(name(), ident); @@ -186,14 +183,11 @@ default Table loadTable(Identifier ident, String version) throws NoSuchTableExce /** * Load table metadata at a specific time by {@link Identifier identifier} from the catalog. - *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must throw {@link NoSuchTableException}. * * @param ident a table identifier * @param timestamp timestamp of the table, which is microseconds since 1970-01-01 00:00:00 UTC * @return the table's metadata - * @throws NoSuchTableException If the table doesn't exist or is a view + * @throws NoSuchTableException If the table doesn't exist */ default Table loadTable(Identifier ident, long timestamp) throws NoSuchTableException { throw QueryCompilationErrors.noSuchTableError(name(), ident); @@ -232,12 +226,9 @@ default void invalidateTable(Identifier ident) { /** * Test whether a table exists using an {@link Identifier identifier} from the catalog. - *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must return false. * * @param ident a table identifier - * @return true if the table exists, false otherwise + * @return true if a table exists at {@code ident}, false otherwise */ default boolean tableExists(Identifier ident) { try { @@ -281,11 +272,11 @@ default Table createTable( * Create a table in the catalog. * * @param ident a table identifier - * @param tableInfo information about the table. + * @param tableInfo information about the table * @return metadata for the new table. This can be null if getting the metadata for the new table * is expensive. Spark will call {@link #loadTable(Identifier)} if needed (e.g. CTAS). * - * @throws TableAlreadyExistsException If a table or view already exists for the identifier + * @throws TableAlreadyExistsException If a table already exists for the identifier * @throws UnsupportedOperationException If a requested partition transform is not supported * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional) * @since 4.1.0 @@ -317,7 +308,7 @@ default Table createTable(Identifier ident, TableInfo tableInfo) * or other custom state from this object to clone additional metadata * @return metadata for the new table * - * @throws TableAlreadyExistsException If a table or view already exists for the identifier + * @throws TableAlreadyExistsException If a table already exists for the identifier * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional) * @throws UnsupportedOperationException If the catalog does not support CREATE TABLE LIKE * @since 4.2.0 @@ -343,16 +334,13 @@ default boolean useNullableQuerySchema() { * changes should be applied to the table. *

* The requested changes must be applied in the order given. - *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must throw {@link NoSuchTableException}. * * @param ident a table identifier * @param changes changes to apply to the table * @return updated metadata for the table. This can be null if getting the metadata for the * updated table is expensive. Spark always discard the returned table here. * - * @throws NoSuchTableException If the table doesn't exist or is a view + * @throws NoSuchTableException If the table doesn't exist * @throws IllegalArgumentException If any change is rejected by the implementation. */ Table alterTable( @@ -361,9 +349,6 @@ Table alterTable( /** * Drop a table in the catalog. - *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must not drop the view and must return false. * * @param ident a table identifier * @return true if a table was deleted, false if no table exists for the identifier @@ -374,9 +359,6 @@ Table alterTable( * Drop a table in the catalog and completely remove its data by skipping a trash even if it is * supported. *

- * If the catalog supports views and contains a view for the identifier and not a table, this - * must not drop the view and must return false. - *

* If the catalog supports to purge a table, this method should be overridden. * The default implementation throws {@link UnsupportedOperationException}. * @@ -393,17 +375,13 @@ default boolean purgeTable(Identifier ident) throws UnsupportedOperationExceptio /** * Renames a table in the catalog. *

- * If the catalog supports views and contains a view for the old identifier and not a table, this - * throws {@link NoSuchTableException}. Additionally, if the new identifier is a table or a view, - * this throws {@link TableAlreadyExistsException}. - *

* If the catalog does not support table renames between namespaces, it throws * {@link UnsupportedOperationException}. * * @param oldIdent the table identifier of the existing table to rename * @param newIdent the new table identifier of the table - * @throws NoSuchTableException If the table to rename doesn't exist or is a view - * @throws TableAlreadyExistsException If the new table name already exists or is a view + * @throws NoSuchTableException If the table to rename doesn't exist + * @throws TableAlreadyExistsException If the new table name already exists * @throws UnsupportedOperationException If the namespaces of old and new identifiers do not * match (optional) */ diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java index 9870a3b0fa45d..89709c9f1c2f0 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableInfo.java @@ -33,9 +33,8 @@ public class TableInfo { /** * Constructor for TableInfo used by the builder. - * @param builder Builder. */ - private TableInfo(Builder builder) { + protected TableInfo(BaseBuilder builder) { this.columns = builder.columns; this.properties = builder.properties; this.partitions = builder.partitions; @@ -60,35 +59,96 @@ public Transform[] partitions() { public Constraint[] constraints() { return constraints; } - public static class Builder { - private Column[] columns = new Column[0]; - private Map properties = new HashMap<>(); - private Transform[] partitions = new Transform[0]; - private Constraint[] constraints = new Constraint[0]; + public static class Builder extends BaseBuilder { + @Override + protected Builder self() { return this; } - public Builder withColumns(Column[] columns) { + @Override + public TableInfo build() { + Objects.requireNonNull(columns, "columns should not be null"); + return new TableInfo(this); + } + } + + /** + * Shared builder state for {@link TableInfo} and its subclasses. Setters return {@code B} so + * subclass builders (e.g. {@link ViewInfo.Builder}) chain through their own type without + * a covariant override on each inherited setter. + */ + protected abstract static class BaseBuilder> { + protected Column[] columns = new Column[0]; + protected Map properties = new HashMap<>(); + protected Transform[] partitions = new Transform[0]; + protected Constraint[] constraints = new Constraint[0]; + + protected abstract B self(); + + public B withColumns(Column[] columns) { this.columns = columns; - return this; + return self(); } - public Builder withProperties(Map properties) { - this.properties = properties; - return this; + public B withSchema(StructType schema) { + this.columns = CatalogV2Util.structTypeToV2Columns(schema); + return self(); } - public Builder withPartitions(Transform[] partitions) { + /** + * Replaces the current properties map with a defensive copy of the given map. Any reserved + * keys set earlier via convenience setters (e.g. {@link #withProvider}) are discarded -- + * call those setters after this method, not before. + */ + public B withProperties(Map properties) { + this.properties = new HashMap<>(properties); + return self(); + } + + public B withPartitions(Transform[] partitions) { this.partitions = partitions; - return this; + return self(); } - public Builder withConstraints(Constraint[] constraints) { + public B withConstraints(Constraint[] constraints) { this.constraints = constraints; - return this; + return self(); } - public TableInfo build() { - Objects.requireNonNull(columns, "columns should not be null"); - return new TableInfo(this); + // Convenience setters below write reserved keys into the current `properties` map. Pair + // each with a preceding `withProperties(...)` call if you want to start from a user map; + // calling `withProperties` after a convenience setter discards the value the convenience + // setter wrote. + + /** Writes {@link TableCatalog#PROP_PROVIDER} into the current properties map. */ + public B withProvider(String provider) { + properties.put(TableCatalog.PROP_PROVIDER, provider); + return self(); + } + + public B withLocation(String location) { + properties.put(TableCatalog.PROP_LOCATION, location); + return self(); } + + public B withComment(String comment) { + properties.put(TableCatalog.PROP_COMMENT, comment); + return self(); + } + + public B withCollation(String collation) { + properties.put(TableCatalog.PROP_COLLATION, collation); + return self(); + } + + public B withOwner(String owner) { + properties.put(TableCatalog.PROP_OWNER, owner); + return self(); + } + + public B withTableType(String tableType) { + properties.put(TableCatalog.PROP_TABLE_TYPE, tableType); + return self(); + } + + public abstract TableInfo build(); } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java deleted file mode 100644 index a4dc5f2f2d20f..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.catalog; - -import java.util.Map; - -import org.apache.spark.annotation.DeveloperApi; -import org.apache.spark.sql.types.StructType; - -/** - * An interface representing a persisted view. - */ -@DeveloperApi -public interface View { - /** - * A name to identify this view. - */ - String name(); - - /** - * The view query SQL text. - */ - String query(); - - /** - * The current catalog when the view is created. - */ - String currentCatalog(); - - /** - * The current namespace when the view is created. - */ - String[] currentNamespace(); - - /** - * The schema for the view when the view is created after applying column aliases. - */ - StructType schema(); - - /** - * The output column names of the query that creates this view. - */ - String[] queryColumnNames(); - - /** - * The view column aliases. - */ - String[] columnAliases(); - - /** - * The view column comments. - */ - String[] columnComments(); - - /** - * The view properties. - */ - Map properties(); -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java index abe5fb3148d08..184676023d7c4 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java @@ -14,186 +14,135 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.spark.sql.connector.catalog; -import java.util.Arrays; -import java.util.List; - -import org.apache.spark.annotation.DeveloperApi; +import org.apache.spark.annotation.Evolving; import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; import org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException; /** - * Catalog methods for working with views. + * Catalog API for connectors that expose views. + *

+ * Connectors that expose only views implement this interface. Connectors that expose + * both tables and views must implement {@link RelationCatalog} (which extends both this + * interface and {@link TableCatalog} and adds the cross-cutting contract for the combined + * case); the methods on this interface remain view-only -- they do not interact with tables. + *

+ * The presence of {@code ViewCatalog} on the catalog plugin is the signal that it + * supports views; there is no capability flag to declare. + * + * @since 4.2.0 */ -@DeveloperApi +@Evolving public interface ViewCatalog extends CatalogPlugin { - /** - * A reserved property to specify the description of the view. - */ - String PROP_COMMENT = "comment"; - - /** - * A reserved property to specify the owner of the view. - */ - String PROP_OWNER = "owner"; - - /** - * A reserved property to specify the software version used to create the view. - */ - String PROP_CREATE_ENGINE_VERSION = "create_engine_version"; - - /** - * A reserved property to specify the software version used to change the view. - */ - String PROP_ENGINE_VERSION = "engine_version"; - - /** - * All reserved properties of the view. - */ - List RESERVED_PROPERTIES = Arrays.asList( - PROP_COMMENT, - PROP_OWNER, - PROP_CREATE_ENGINE_VERSION, - PROP_ENGINE_VERSION); - /** * List the views in a namespace from the catalog. - *

- * If the catalog supports tables, this must return identifiers for only views and not tables. * * @param namespace a multi-part namespace - * @return an array of Identifiers for views - * @throws NoSuchNamespaceException If the namespace does not exist (optional). + * @return an array of identifiers for views + * @throws NoSuchNamespaceException if the namespace does not exist (optional) */ - Identifier[] listViews(String... namespace) throws NoSuchNamespaceException; + Identifier[] listViews(String[] namespace) throws NoSuchNamespaceException; /** - * Load view metadata by {@link Identifier ident} from the catalog. - *

- * If the catalog supports tables and contains a table for the identifier and not a view, - * this must throw {@link NoSuchViewException}. + * Load view metadata by identifier. * * @param ident a view identifier - * @return the view description - * @throws NoSuchViewException If the view doesn't exist or is a table + * @return the view metadata + * @throws NoSuchViewException if the view does not exist */ - View loadView(Identifier ident) throws NoSuchViewException; + ViewInfo loadView(Identifier ident) throws NoSuchViewException; /** - * Invalidate cached view metadata for an {@link Identifier identifier}. + * Test whether a view exists. *

- * If the view is already loaded or cached, drop cached data. If the view does not exist or is - * not cached, do nothing. Calling this method should not query remote services. + * The default implementation calls {@link #loadView} and catches {@link NoSuchViewException}. + * Catalogs that can answer existence cheaply should override. * * @param ident a view identifier - */ - default void invalidateView(Identifier ident) { - } - - /** - * Test whether a view exists using an {@link Identifier identifier} from the catalog. - *

- * If the catalog supports views and contains a view for the identifier and not a table, - * this must return false. - * - * @param ident a view identifier - * @return true if the view exists, false otherwise + * @return true if a view exists at {@code ident}, false otherwise */ default boolean viewExists(Identifier ident) { try { - return loadView(ident) != null; + loadView(ident); + return true; } catch (NoSuchViewException e) { return false; } } /** - * Create a view in the catalog. + * Invalidate cached metadata for a view. + *

+ * If the view is currently cached, drop the cached entry; otherwise do nothing. This must not + * issue remote calls. * - * @param viewInfo the info class holding all view information - * @return the created view. This can be null if getting the metadata for the view is expensive - * @throws ViewAlreadyExistsException If a view or table already exists for the identifier - * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional) + * @param ident a view identifier */ - View createView(ViewInfo viewInfo) throws ViewAlreadyExistsException, NoSuchNamespaceException; + default void invalidateView(Identifier ident) { + } /** - * Replace a view in the catalog. - *

- * The default implementation has a race condition. - * Catalogs are encouraged to implement this operation atomically. + * Create a view. * - * @param viewInfo the info class holding all view information - * @param orCreate create the view if it doesn't exist - * @return the created/replaced view. This can be null if getting the metadata - * for the view is expensive - * @throws NoSuchViewException If the view doesn't exist or is a table - * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional) + * @param ident the view identifier + * @param info the view metadata + * @return the metadata of the newly created view; may equal {@code info} + * @throws ViewAlreadyExistsException if a view already exists at {@code ident} + * @throws NoSuchNamespaceException if the identifier's namespace does not exist (optional) */ - default View replaceView( - ViewInfo viewInfo, - boolean orCreate) - throws NoSuchViewException, NoSuchNamespaceException { - if (viewExists(viewInfo.ident())) { - dropView(viewInfo.ident()); - } else if (!orCreate) { - throw new NoSuchViewException(viewInfo.ident()); - } - - try { - return createView(viewInfo); - } catch (ViewAlreadyExistsException e) { - throw new RuntimeException("Race condition when creating/replacing view", e); - } - } + ViewInfo createView(Identifier ident, ViewInfo info) + throws ViewAlreadyExistsException, NoSuchNamespaceException; /** - * Apply {@link ViewChange changes} to a view in the catalog. + * Atomically replace an existing view's metadata. *

- * Implementations may reject the requested changes. If any change is rejected, none of the - * changes should be applied to the view. + * Used by {@code ALTER VIEW ... AS}. Implementations should commit the new metadata + * atomically; views carry no data, so a single transactional metastore call (or equivalent) + * is sufficient -- there is no separate staging API. * - * @param ident a view identifier - * @param changes an array of changes to apply to the view - * @return the view altered - * @throws NoSuchViewException If the view doesn't exist or is a table. - * @throws IllegalArgumentException If any change is rejected by the implementation. + * @param ident the view identifier + * @param info the new view metadata + * @return the metadata of the replaced view; may equal {@code info} + * @throws NoSuchViewException if no view exists at {@code ident} */ - View alterView(Identifier ident, ViewChange... changes) - throws NoSuchViewException, IllegalArgumentException; + ViewInfo replaceView(Identifier ident, ViewInfo info) throws NoSuchViewException; /** - * Drop a view in the catalog. + * Create a view if one does not exist at {@code ident}, or atomically replace it if one does. *

- * If the catalog supports tables and contains a table for the identifier and not a view, this - * must not drop the table and must return false. + * Used by {@code CREATE OR REPLACE VIEW}. The default implementation calls + * {@link #replaceView}, falling back to {@link #createView} on + * {@link NoSuchViewException}. The fallback is non-atomic across the two calls (a concurrent + * drop or create can race), so catalogs that can answer the upsert in a single transactional + * call should override this method to collapse to one RPC and to make the swap atomic. * - * @param ident a view identifier - * @return true if a view was deleted, false if no view exists for the identifier + * @param ident the view identifier + * @param info the view metadata + * @return the metadata of the created or replaced view; may equal {@code info} + * @throws ViewAlreadyExistsException if {@code ident} cannot host this view -- either a + * concurrent {@code CREATE VIEW} won the race in the + * default impl's gap between {@link #replaceView} and + * the fallback {@link #createView}, or, in a + * {@link RelationCatalog}, a table sits at {@code ident} + * @throws NoSuchNamespaceException if the identifier's namespace does not exist (optional) */ - boolean dropView(Identifier ident); + default ViewInfo createOrReplaceView(Identifier ident, ViewInfo info) + throws ViewAlreadyExistsException, NoSuchNamespaceException { + try { + return replaceView(ident, info); + } catch (NoSuchViewException e) { + return createView(ident, info); + } + } /** - * Rename a view in the catalog. - *

- * If the catalog supports tables and contains a table with the old identifier, this throws - * {@link NoSuchViewException}. Additionally, if it contains a table with the new identifier, - * this throws {@link ViewAlreadyExistsException}. - *

- * If the catalog does not support view renames between namespaces, it throws - * {@link UnsupportedOperationException}. + * Drop a view. * - * @param oldIdent the view identifier of the existing view to rename - * @param newIdent the new view identifier of the view - * @throws NoSuchViewException If the view to rename doesn't exist or is a table - * @throws ViewAlreadyExistsException If the new view name already exists or is a table - * @throws UnsupportedOperationException If the namespaces of old and new identifiers do not - * match (optional) + * @param ident a view identifier + * @return true if a view was dropped, false otherwise */ - void renameView(Identifier oldIdent, Identifier newIdent) - throws NoSuchViewException, ViewAlreadyExistsException; + boolean dropView(Identifier ident); } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java deleted file mode 100644 index c94933beed7f6..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.catalog; - -import org.apache.spark.annotation.DeveloperApi; - -/** - * ViewChange subclasses represent requested changes to a view. - * These are passed to {@link ViewCatalog#alterView}. - */ -@DeveloperApi -public interface ViewChange { - - /** - * Create a ViewChange for setting a table property. - * - * @param property the property name - * @param value the new property value - * @return a ViewChange - */ - static ViewChange setProperty(String property, String value) { - return new SetProperty(property, value); - } - - /** - * Create a ViewChange for removing a table property. - * - * @param property the property name - * @return a ViewChange - */ - static ViewChange removeProperty(String property) { - return new RemoveProperty(property); - } - - final class SetProperty implements ViewChange { - private final String property; - private final String value; - - private SetProperty(String property, String value) { - this.property = property; - this.value = value; - } - - public String property() { - return property; - } - - public String value() { - return value; - } - } - - final class RemoveProperty implements ViewChange { - private final String property; - - private RemoveProperty(String property) { - this.property = property; - } - - public String property() { - return property; - } - } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java index b01e133365661..da82de01f8e4d 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewInfo.java @@ -14,168 +14,139 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.spark.sql.connector.catalog; -import org.apache.spark.annotation.DeveloperApi; -import org.apache.spark.sql.types.StructType; - -import javax.annotation.Nonnull; - -import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; import java.util.Map; import java.util.Objects; -import java.util.StringJoiner; + +import org.apache.spark.annotation.Evolving; /** - * A class that holds view information. + * View metadata DTO -- the typed payload returned by {@link ViewCatalog#loadView} and accepted + * by {@link ViewCatalog#createView} / {@link ViewCatalog#replaceView}. Carries the + * view-specific fields that cannot be represented as string table properties: the query text, + * captured creation-time resolution context, captured SQL configs, schema-binding mode, and + * query output column names. Schema and user TBLPROPERTIES are inherited from {@link TableInfo} + * via the typed builder. + *

+ * {@code ViewInfo} extends {@link TableInfo} so that a {@link RelationCatalog} can opt into the + * single-RPC perf path by returning a {@link MetadataOnlyTable} wrapping a {@code ViewInfo} + * from {@link RelationCatalog#loadRelation} for a view identifier. Pure {@link ViewCatalog} + * implementations never see {@code TableInfo}; the typed setters on {@link Builder} cover + * everything they need to construct a {@code ViewInfo}. + * + * @since 4.2.0 */ -@DeveloperApi -public class ViewInfo { - private final Identifier ident; - private final String sql; +@Evolving +public class ViewInfo extends TableInfo { + + private final String queryText; private final String currentCatalog; private final String[] currentNamespace; - private final StructType schema; + private final Map sqlConfigs; + private final String schemaMode; private final String[] queryColumnNames; - private final String[] columnAliases; - private final String[] columnComments; - private final Map properties; - - public ViewInfo( - Identifier ident, - String sql, - String currentCatalog, - String[] currentNamespace, - StructType schema, - String[] queryColumnNames, - String[] columnAliases, - String[] columnComments, - Map properties) { - this.ident = ident; - this.sql = sql; - this.currentCatalog = currentCatalog; - this.currentNamespace = currentNamespace; - this.schema = schema; - this.queryColumnNames = queryColumnNames; - this.columnAliases = columnAliases; - this.columnComments = columnComments; - this.properties = properties; - } - /** - * @return The view identifier - */ - @Nonnull - public Identifier ident() { - return ident; + private ViewInfo(Builder builder) { + super(builder); + this.queryText = Objects.requireNonNull(builder.queryText, "queryText should not be null"); + this.currentCatalog = builder.currentCatalog; + this.currentNamespace = builder.currentNamespace; + this.sqlConfigs = Collections.unmodifiableMap(builder.sqlConfigs); + this.schemaMode = builder.schemaMode; + this.queryColumnNames = builder.queryColumnNames; + // Force PROP_TABLE_TYPE = VIEW so that `properties()` reflects the typed ViewInfo + // classification. Catalogs and generic viewers reading PROP_TABLE_TYPE from the properties + // bag (e.g. TableCatalog.listTableSummaries default impl, DESCRIBE) see "VIEW" without + // requiring authors to remember to call withTableType(VIEW). + properties().put(TableCatalog.PROP_TABLE_TYPE, TableSummary.VIEW_TABLE_TYPE); } - /** - * @return The SQL text that defines the view - */ - @Nonnull - public String sql() { - return sql; - } + /** The SQL text of the view. */ + public String queryText() { return queryText; } /** - * @return The current catalog + * The current catalog at the time the view was created, used to resolve unqualified + * identifiers in {@link #queryText()} at read time. May be {@code null} if the view was + * created with no captured resolution context. */ - @Nonnull - public String currentCatalog() { - return currentCatalog; - } + public String currentCatalog() { return currentCatalog; } /** - * @return The current namespace + * The current namespace at the time the view was created, used alongside + * {@link #currentCatalog()} to resolve unqualified identifiers in {@link #queryText()} at + * read time. Never {@code null}; empty when no namespace was captured. */ - @Nonnull - public String[] currentNamespace() { - return currentNamespace; - } + public String[] currentNamespace() { return currentNamespace; } /** - * @return The view query output schema + * The SQL configs captured at view creation time, applied when parsing and analyzing the + * view body. Keys are unprefixed SQL config names (e.g. {@code spark.sql.ansi.enabled}). */ - @Nonnull - public StructType schema() { - return schema; - } + public Map sqlConfigs() { return sqlConfigs; } /** - * @return The query column names + * The view's schema binding mode. Allowed values match the {@code toString} form of + * {@code org.apache.spark.sql.catalyst.analysis.ViewSchemaMode}: + * {@code BINDING}, {@code COMPENSATION}, {@code TYPE EVOLUTION}, {@code EVOLUTION}. + * May be {@code null} when schema binding is not configured. */ - @Nonnull - public String[] queryColumnNames() { - return queryColumnNames; - } + public String schemaMode() { return schemaMode; } /** - * @return The column aliases + * Output column names of the query that created the view, used to map the query output to + * the view's declared columns during view resolution. Empty for views in {@code EVOLUTION} + * mode, which always use the view's current schema. */ - @Nonnull - public String[] columnAliases() { - return columnAliases; - } + public String[] queryColumnNames() { return queryColumnNames; } + + public static class Builder extends BaseBuilder { + private String queryText; + private String currentCatalog; + private String[] currentNamespace = new String[0]; + private Map sqlConfigs = new HashMap<>(); + private String schemaMode; + private String[] queryColumnNames = new String[0]; + + @Override + protected Builder self() { return this; } + + public Builder withQueryText(String queryText) { + this.queryText = queryText; + return this; + } - /** - * @return The column comments - */ - @Nonnull - public String[] columnComments() { - return columnComments; - } + public Builder withCurrentCatalog(String currentCatalog) { + this.currentCatalog = currentCatalog; + return this; + } - /** - * @return The view properties - */ - @Nonnull - public Map properties() { - return properties; - } + public Builder withCurrentNamespace(String[] currentNamespace) { + this.currentNamespace = currentNamespace == null ? new String[0] : currentNamespace; + return this; + } - @Override - public boolean equals(Object o) { - if (this == o) { - return true; + public Builder withSqlConfigs(Map sqlConfigs) { + this.sqlConfigs = new HashMap<>(sqlConfigs); + return this; } - if (o == null || getClass() != o.getClass()) { - return false; + + public Builder withSchemaMode(String schemaMode) { + this.schemaMode = schemaMode; + return this; } - ViewInfo viewInfo = (ViewInfo) o; - return ident.equals(viewInfo.ident) && sql.equals(viewInfo.sql) && - currentCatalog.equals(viewInfo.currentCatalog) && - Arrays.equals(currentNamespace, viewInfo.currentNamespace) && - schema.equals(viewInfo.schema) && - Arrays.equals(queryColumnNames, viewInfo.queryColumnNames) && - Arrays.equals(columnAliases, viewInfo.columnAliases) && - Arrays.equals(columnComments, viewInfo.columnComments) && - properties.equals(viewInfo.properties); - } - @Override - public int hashCode() { - int result = Objects.hash(ident, sql, currentCatalog, schema, properties); - result = 31 * result + Arrays.hashCode(currentNamespace); - result = 31 * result + Arrays.hashCode(queryColumnNames); - result = 31 * result + Arrays.hashCode(columnAliases); - result = 31 * result + Arrays.hashCode(columnComments); - return result; - } + public Builder withQueryColumnNames(String[] queryColumnNames) { + this.queryColumnNames = queryColumnNames == null ? new String[0] : queryColumnNames; + return this; + } - @Override - public String toString() { - return new StringJoiner(", ", ViewInfo.class.getSimpleName() + "[", "]") - .add("ident=" + ident) - .add("sql='" + sql + "'") - .add("currentCatalog='" + currentCatalog + "'") - .add("currentNamespace=" + Arrays.toString(currentNamespace)) - .add("schema=" + schema) - .add("queryColumnNames=" + Arrays.toString(queryColumnNames)) - .add("columnAliases=" + Arrays.toString(columnAliases)) - .add("columnComments=" + Arrays.toString(columnComments)) - .add("properties=" + properties) - .toString(); + @Override + public ViewInfo build() { + Objects.requireNonNull(columns, "columns should not be null"); + return new ViewInfo(this); + } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index c09361969a9e4..850b34a2743d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -50,7 +50,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern._ import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.catalyst.util.{toPrettySQL, trimTempResolvedColumn, CharVarcharUtils} import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._ -import org.apache.spark.sql.connector.catalog.{View => _, _} +import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.TableChange.{After, ColumnPosition} import org.apache.spark.sql.connector.catalog.functions.UnboundFunction @@ -1101,7 +1101,19 @@ class Analyzer( /** * Resolves relations to `ResolvedTable` or `Resolved[Temp/Persistent]View`. This is - * for resolving DDL and misc commands. + * for resolving DDL and misc commands. UnresolvedView callers reject non-view results + * downstream via `expectViewNotTableError`. + * + * When `viewOnly=true`, non-session catalogs that do not implement [[ViewCatalog]] are + * rejected up front with MISSING_CATALOG_ABILITY.VIEWS -- they cannot host views at all, + * so surfacing a downstream "view not found" would hide the real reason. + * + * Lookup order against a non-session catalog: + * 1. If the catalog is a [[RelationCatalog]], [[RelationCatalog.loadRelation]] is called + * once. A returned [[MetadataOnlyTable]] wrapping a [[ViewInfo]] is interpreted as a + * view; other results are tables. + * 2. Otherwise, [[TableCatalog.loadTable]] is tried (when implemented), then + * [[ViewCatalog.loadView]] as the fallback view-resolution path (when implemented). */ private def lookupTableOrView( identifier: Seq[String], @@ -1111,18 +1123,60 @@ class Analyzer( }.orElse { relationResolution.expandIdentifier(identifier) match { case CatalogAndIdentifier(catalog, ident) => - if (viewOnly && !CatalogV2Util.isSessionCatalog(catalog)) { - throw QueryCompilationErrors.catalogOperationNotSupported(catalog, "views") + if (viewOnly && !CatalogV2Util.isSessionCatalog(catalog) && + !catalog.isInstanceOf[ViewCatalog]) { + throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) } - CatalogV2Util.loadTable(catalog, ident).map { - case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) && - v1Table.v1Table.tableType == CatalogTableType.VIEW => - val v1Ident = v1Table.catalogTable.identifier - val v2Ident = Identifier.of(v1Ident.database.toArray, v1Ident.identifier) - ResolvedPersistentView( - catalog, v2Ident, v1Table.catalogTable) - case table => - ResolvedTable.create(catalog.asTableCatalog, ident, table) + catalog match { + case mc: RelationCatalog => + // Single-RPC perf path: loadRelation returns a Table for a table or a + // MetadataOnlyTable wrapping a ViewInfo for a view. NoSuchTable means + // neither exists. + try { + Some(mc.loadRelation(ident) match { + case t: MetadataOnlyTable if t.getTableInfo.isInstanceOf[ViewInfo] => + ResolvedPersistentView( + catalog, ident, V1Table.toCatalogTable(catalog, ident, t)) + case table => + ResolvedTable.create(catalog.asTableCatalog, ident, table) + }) + } catch { + case _: NoSuchTableException => None + } + case _ => + // Skip the table-side lookup entirely for view-only catalogs (no + // `TableCatalog` mixin): `CatalogV2Util.loadTable` would call `asTableCatalog` + // and throw MISSING_CATALOG_ABILITY.TABLES, masking the legitimate view- + // resolution path. + val tableResolved: Option[LogicalPlan] = if ( + CatalogV2Util.isSessionCatalog(catalog) || catalog.isInstanceOf[TableCatalog] + ) { + CatalogV2Util.loadTable(catalog, ident).map { + case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) && + v1Table.v1Table.tableType == CatalogTableType.VIEW => + val v1Ident = v1Table.catalogTable.identifier + val v2Ident = Identifier.of(v1Ident.database.toArray, v1Ident.identifier) + ResolvedPersistentView( + catalog, v2Ident, v1Table.catalogTable) + case table => + ResolvedTable.create(catalog.asTableCatalog, ident, table) + } + } else { + None + } + tableResolved.orElse { + catalog match { + case vc: ViewCatalog => + try { + val viewInfo = vc.loadView(ident) + val catalogTable = V1Table.toCatalogTable(catalog, ident, viewInfo) + Some(ResolvedPersistentView(catalog, ident, catalogTable)) + } catch { + case _: NoSuchViewException => None + } + case _ => None + } + } } case _ => None } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala index 67d5b70b30a33..3e8b507e4f6c0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollation.scala @@ -197,7 +197,7 @@ object ApplyDefaultCollation extends Rule[LogicalPlan] { collation = getCollationFromSchemaMetadata(catalog, identifier.namespace()))) case createView@CreateView(ResolvedIdentifier( - catalog: SupportsNamespaces, identifier), _, _, _, _, _, _, _, _, _) + catalog: SupportsNamespaces, identifier), _, _, _, _, _, _, _, _, _, _, _) if createView.collation.isEmpty => val newCreateView = CurrentOrigin.withOrigin(createView.origin) { createView.copy( @@ -209,7 +209,7 @@ object ApplyDefaultCollation extends Rule[LogicalPlan] { // We match against ResolvedPersistentView because temporary views don't have a // schema/catalog. case alterViewAs@AlterViewAs(resolvedPersistentView@ResolvedPersistentView( - catalog: SupportsNamespaces, identifier, _), _, _) + catalog: SupportsNamespaces, identifier, _), _, _, _, _) if resolvedPersistentView.metadata.collation.isEmpty => val newResolvedPersistentView = resolvedPersistentView.copy( metadata = resolvedPersistentView.metadata.copy( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala index e86248febd2eb..58f832ea6cbdf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala @@ -23,6 +23,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.SQLConfHelper import org.apache.spark.sql.catalyst.catalog.{ + CatalogTable, CatalogTableType, TemporaryViewRelation, UnresolvedCatalogRelation @@ -36,9 +37,14 @@ import org.apache.spark.sql.connector.catalog.{ ChangelogInfo, Identifier, LookupCatalog, + MetadataOnlyTable, + RelationCatalog, Table, + TableCatalog, V1Table, - V2TableWithV1Fallback + V2TableWithV1Fallback, + ViewCatalog, + ViewInfo } import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors} @@ -227,11 +233,60 @@ class RelationResolution( .orElse { val writePrivileges = u.options.get(UnresolvedRelation.REQUIRED_WRITE_PRIVILEGES) val finalOptions = u.clearWritePrivileges.options - val table = CatalogV2Util.loadTable( - catalog, - ident, - finalTimeTravelSpec, - Option(writePrivileges)) + // For a `RelationCatalog` with no time-travel / write privileges, the single-RPC + // `loadRelation` answers both "is there a table?" and "is there a view?" in one + // call. Time-travel and write privileges apply to tables only, so for those the + // lookup falls through to the table-only `loadTable` path below; views are not + // reachable via the v2 fallback in those cases. + // + // Skip the table-side lookup entirely for view-only catalogs (no `TableCatalog` + // mixin): `CatalogV2Util.loadTable` would call `asTableCatalog` and throw + // MISSING_CATALOG_ABILITY.TABLES, masking the legitimate view-resolution path. + val tableOrView: Option[Table] = catalog match { + case mc: RelationCatalog if finalTimeTravelSpec.isEmpty && writePrivileges == null => + try { + Some(mc.loadRelation(ident)) + } catch { + case _: NoSuchTableException => None + } + case _ => + val tableSide: Option[Table] = if ( + CatalogV2Util.isSessionCatalog(catalog) || catalog.isInstanceOf[TableCatalog] + ) { + CatalogV2Util.loadTable( + catalog, + ident, + finalTimeTravelSpec, + Option(writePrivileges)) + } else { + None + } + // Fallback to ViewCatalog for catalogs that host views but where loadTable + // returned None (or was skipped because there's no TableCatalog mixin). + // Time-travel / write privileges only apply to tables, not views, so the + // fallback only fires when both are absent. + tableSide.orElse { + if (finalTimeTravelSpec.isEmpty && writePrivileges == null) { + catalog match { + case vc: ViewCatalog => + try { + Some(new MetadataOnlyTable(vc.loadView(ident), ident.toString)) + } catch { + case _: NoSuchViewException => None + } + case _ => None + } + } else { + None + } + } + } + // `table` is `tableOrView` filtered to tables only -- used for cache lookup since + // we don't share-cache views. + val table: Option[Table] = tableOrView.filter { + case t: MetadataOnlyTable if t.getTableInfo.isInstanceOf[ViewInfo] => false + case _ => true + } val sharedRelationCacheMatch = for { t <- table @@ -249,7 +304,7 @@ class RelationResolution( val loaded = createRelation( catalog, ident, - table, + tableOrView, finalOptions, u.isStreaming, finalTimeTravelSpec) @@ -314,6 +369,22 @@ class RelationResolution( options: CaseInsensitiveStringMap, isStreaming: Boolean, timeTravelSpec: Option[TimeTravelSpec]): Option[LogicalPlan] = { + def createDataSourceV1Scan(v1Table: CatalogTable): LogicalPlan = { + if (isStreaming) { + if (v1Table.tableType == CatalogTableType.VIEW) { + throw QueryCompilationErrors.permanentViewNotSupportedByStreamingReadingAPIError( + ident.quoted + ) + } + SubqueryAlias( + v1Table.fullIdent, + UnresolvedCatalogRelation(v1Table, options, isStreaming = true) + ) + } else { + v1SessionCatalog.getRelation(v1Table, options) + } + } + table.map { // To utilize this code path to execute V1 commands, e.g. INSERT, // either it must be session catalog, or tracksPartitionsInCatalog @@ -324,19 +395,13 @@ class RelationResolution( case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) || !v1Table.catalogTable.tracksPartitionsInCatalog => - if (isStreaming) { - if (v1Table.v1Table.tableType == CatalogTableType.VIEW) { - throw QueryCompilationErrors.permanentViewNotSupportedByStreamingReadingAPIError( - ident.quoted - ) - } - SubqueryAlias( - catalog.name +: ident.asMultipartIdentifier, - UnresolvedCatalogRelation(v1Table.v1Table, options, isStreaming = true) - ) - } else { - v1SessionCatalog.getRelation(v1Table.v1Table, options) - } + createDataSourceV1Scan(v1Table.v1Table) + + // MetadataOnlyTable is a sentinel meaning "interpret via v1", so unlike the V1Table + // case above we apply no session-catalog / tracksPartitionsInCatalog guard -- any catalog + // returning MetadataOnlyTable has opted into v1 read semantics. + case t: MetadataOnlyTable => + createDataSourceV1Scan(V1Table.toCatalogTable(catalog, ident, t)) case table => if (isStreaming) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala index faa3b9081cbfd..b0f0ef3b092c1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala @@ -38,7 +38,7 @@ object ViewResolution { val maxNestedViewDepth = AnalysisContext.get.maxNestedViewDepth if (nestedViewDepth > maxNestedViewDepth) { throw QueryCompilationErrors.viewDepthExceedsMaxResolutionDepthError( - view.desc.identifier, + view.desc.fullIdent, maxNestedViewDepth, view ) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ViewResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ViewResolver.scala index 992f065ef3aa2..a224e521b548b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ViewResolver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ViewResolver.scala @@ -193,7 +193,7 @@ case class ViewResolutionContext( def validate(unresolvedView: View): Unit = { if (nestedViewDepth > maxNestedViewDepth) { throw QueryCompilationErrors.viewDepthExceedsMaxResolutionDepthError( - unresolvedView.desc.identifier, + unresolvedView.desc.fullIdent, maxNestedViewDepth, unresolvedView ) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index ff4a135b7d044..af398eb8527e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -1054,10 +1054,15 @@ class SessionCatalog( def getRelation( metadata: CatalogTable, options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()): LogicalPlan = { - val qualifiedIdent = qualifyIdentifier(metadata.identifier) - val db = qualifiedIdent.database.get - val table = qualifiedIdent.table - val multiParts = Seq(CatalogManager.SESSION_CATALOG_NAME, db, table) + // Prefer `multipartIdentifier` (set by non-session v2 catalogs via `V1Table.toCatalogTable`) + // so the SubqueryAlias qualifier reflects the real catalog + multi-part namespace. + // Fall back to the historical 3-part form for v1 session-catalog tables -- we intentionally + // always include `SESSION_CATALOG_NAME` here and ignore + // `LEGACY_NON_IDENTIFIER_OUTPUT_CATALOG_NAME` to preserve pre-v2-MetadataOnlyTable behavior. + val multiParts = metadata.multipartIdentifier.getOrElse { + val qualifiedIdent = qualifyIdentifier(metadata.identifier) + Seq(CatalogManager.SESSION_CATALOG_NAME, qualifiedIdent.database.get, qualifiedIdent.table) + } if (CatalogTable.isMetricView(metadata)) { parseMetricViewDefinition(metadata) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 1cc4f7bcc3d29..981b2ac96a37a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -445,11 +445,22 @@ case class CatalogTable( tracksPartitionsInCatalog: Boolean = false, schemaPreservesCase: Boolean = true, ignoredProperties: Map[String, String] = Map.empty, - viewOriginalText: Option[String] = None) + viewOriginalText: Option[String] = None, + // Multi-part identifier [catalog, namespace..., name] for tables synthesized from a v2 + // `MetadataOnlyTable` whose namespace has more than one part -- the v1 `identifier: + // TableIdentifier` (single-string database) cannot carry that losslessly. `None` for + // v1-native tables; callers should use `fullIdent` which falls back to `identifier.nameParts`. + multipartIdentifier: Option[Seq[String]] = None) extends MetadataMapSupport { import CatalogTable._ + /** + * The fully-qualified multi-part identifier. Prefers `multipartIdentifier` when set (v2-sourced + * tables with multi-level namespaces); otherwise reconstructs from `identifier.nameParts`. + */ + def fullIdent: Seq[String] = multipartIdentifier.getOrElse(identifier.nameParts) + /** * schema of this table's partition columns */ @@ -544,20 +555,7 @@ case class CatalogTable( * Return the schema binding mode. Defaults to SchemaBinding if not a view or an older * version, unless the viewSchemaBindingMode config is set to false */ - def viewSchemaMode: ViewSchemaMode = { - if (!SQLConf.get.viewSchemaBindingEnabled) { - SchemaUnsupported - } else { - val schemaMode = properties.getOrElse(VIEW_SCHEMA_MODE, SchemaBinding.toString) - schemaMode match { - case SchemaBinding.toString => SchemaBinding - case SchemaEvolution.toString => SchemaEvolution - case SchemaTypeEvolution.toString => SchemaTypeEvolution - case SchemaCompensation.toString => SchemaCompensation - case other => throw SparkException.internalError("Unexpected ViewSchemaMode") - } - } - } + def viewSchemaMode: ViewSchemaMode = CatalogTable.viewSchemaModeFromProperties(properties) /** * Return temporary view names the current view was referred. should be empty if the @@ -789,6 +787,26 @@ object CatalogTable { val PROP_CLUSTERING_COLUMNS: String = "clusteringColumns" + /** + * Decode the view schema binding mode from a properties map. Shared between + * [[CatalogTable.viewSchemaMode]] and the v2 ALTER VIEW path which reads the mode directly + * from the existing view's [[TableInfo]] properties without materializing a full CatalogTable. + */ + def viewSchemaModeFromProperties(properties: Map[String, String]): ViewSchemaMode = { + if (!SQLConf.get.viewSchemaBindingEnabled) { + SchemaUnsupported + } else { + val schemaMode = properties.getOrElse(VIEW_SCHEMA_MODE, SchemaBinding.toString) + schemaMode match { + case SchemaBinding.toString => SchemaBinding + case SchemaEvolution.toString => SchemaEvolution + case SchemaTypeEvolution.toString => SchemaTypeEvolution + case SchemaCompensation.toString => SchemaCompensation + case _ => throw SparkException.internalError("Unexpected ViewSchemaMode") + } + } + } + def splitLargeTableProp( key: String, value: String, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index f63de8d1e4656..0eded2d9dbdf9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -1363,8 +1363,12 @@ case class ShowTablePartition( /** * The logical plan of the SHOW VIEWS command. * - * Notes: v2 catalogs do not support views API yet, the command will fallback to - * v1 ShowViewsCommand during ResolveSessionCatalog. + * Session-catalog targets fall back to v1 `ShowViewsCommand` via `ResolveSessionCatalog`. + * v2 [[org.apache.spark.sql.connector.catalog.ViewCatalog]] catalogs are handled in + * `DataSourceV2Strategy` (enumerates via + * [[org.apache.spark.sql.connector.catalog.ViewCatalog#listViews]]). Non-ViewCatalog v2 + * catalogs are rejected up front in `ResolveSessionCatalog` with + * `MISSING_CATALOG_ABILITY.VIEWS`. */ case class ShowViews( namespace: LogicalPlan, @@ -1714,19 +1718,42 @@ case class RepairTable( /** * The logical plan of the ALTER VIEW ... AS command. + * + * Extends [[AnalysisOnlyCommand]] so [[Analyzer.HandleSpecialCommand]] captures + * `referredTempFunctions` from [[AnalysisContext]]; this list is needed by + * [[CheckViewReferences]] and by the v2 execs when the target is a non-session catalog. + * Session-catalog targets are still rewritten to [[AlterViewAsCommand]] by + * `ResolveSessionCatalog` and the captured value is dropped there (the v1 command re-captures). */ case class AlterViewAs( child: LogicalPlan, originalText: String, - query: LogicalPlan) extends BinaryCommand with CTEInChildren { - override def left: LogicalPlan = child - override def right: LogicalPlan = query + query: LogicalPlan, + isAnalyzed: Boolean = false, + referredTempFunctions: Seq[String] = Seq.empty) + extends Command with AnalysisOnlyCommand with CTEInChildren { + + override def childrenToAnalyze: Seq[LogicalPlan] = Seq(child, query) + + override def markAsAnalyzed(analysisContext: AnalysisContext): LogicalPlan = copy( + isAnalyzed = true, + referredTempFunctions = analysisContext.referredTempFunctionNames.toSeq) + override protected def withNewChildrenInternal( - newLeft: LogicalPlan, newRight: LogicalPlan): LogicalPlan = - copy(child = newLeft, query = newRight) + newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = { + assert(!isAnalyzed) + newChildren match { + case Seq(newChild, newQuery) => + copy(child = newChild, query = newQuery) + case others => + throw new SparkIllegalArgumentException( + errorClass = "_LEGACY_ERROR_TEMP_3218", + messageParameters = Map("others" -> others.toString())) + } + } override def withCTEDefs(cteDefs: Seq[CTERelationDef]): LogicalPlan = { - withNewChildren(Seq(child, WithCTE(query, cteDefs))) + copy(query = WithCTE(query, cteDefs)) } } @@ -1743,6 +1770,11 @@ case class AlterViewSchemaBinding( /** * The logical plan of the CREATE VIEW ... command. + * + * Extends [[AnalysisOnlyCommand]] so that [[Analyzer.HandleSpecialCommand]] captures + * `referredTempFunctions` from the [[AnalysisContext]] after the child query is analyzed; + * this list is needed for `verifyTemporaryObjectsNotExists`-style checks on downstream + * execution paths. */ case class CreateView( child: LogicalPlan, @@ -1754,15 +1786,32 @@ case class CreateView( query: LogicalPlan, allowExisting: Boolean, replace: Boolean, - viewSchemaMode: ViewSchemaMode) extends BinaryCommand with CTEInChildren { - override def left: LogicalPlan = child - override def right: LogicalPlan = query + viewSchemaMode: ViewSchemaMode, + isAnalyzed: Boolean = false, + referredTempFunctions: Seq[String] = Seq.empty) + extends Command with AnalysisOnlyCommand with CTEInChildren { + + override def childrenToAnalyze: Seq[LogicalPlan] = Seq(child, query) + + override def markAsAnalyzed(analysisContext: AnalysisContext): LogicalPlan = copy( + isAnalyzed = true, + referredTempFunctions = analysisContext.referredTempFunctionNames.toSeq) + override protected def withNewChildrenInternal( - newLeft: LogicalPlan, newRight: LogicalPlan): LogicalPlan = - copy(child = newLeft, query = newRight) + newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = { + assert(!isAnalyzed) + newChildren match { + case Seq(newChild, newQuery) => + copy(child = newChild, query = newQuery) + case others => + throw new SparkIllegalArgumentException( + errorClass = "_LEGACY_ERROR_TEMP_3218", + messageParameters = Map("others" -> others.toString())) + } + } override def withCTEDefs(cteDefs: Seq[CTERelationDef]): LogicalPlan = { - withNewChildren(Seq(child, WithCTE(query, cteDefs))) + copy(query = WithCTE(query, cteDefs)) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala index cf6052009c927..a5f1ca7f1d289 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala @@ -171,6 +171,15 @@ private[sql] object CatalogV2Implicits { throw QueryCompilationErrors.requiresSinglePartNamespaceError(asMultipartIdentifier) } + // Build a v1 TableIdentifier for display / error-rendering purposes. Collapses a + // multi-part namespace to its last segment (v1 TableIdentifier has a single-string + // database field). Callers that need a lossless multi-part form should build a + // Seq[String] from toQualifiedNameParts instead. + def asLegacyTableIdentifier(catalogName: String): TableIdentifier = TableIdentifier( + table = ident.name(), + database = ident.namespace().lastOption, + catalog = Some(catalogName)) + /** * Tries to convert catalog identifier to the table identifier. Table identifier does not * support multiple namespaces (nested namespaces), so if identifier contains nested namespace, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala index e6c70fdabb159..03addeb170697 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala @@ -64,6 +64,7 @@ private[sql] object Catalogs { } val plugin = pluginClass.getDeclaredConstructor().newInstance().asInstanceOf[CatalogPlugin] plugin.initialize(name, catalogOptions(name, conf)) + validateRelationCatalog(name, plugin) plugin } catch { case e: ClassNotFoundException => @@ -106,4 +107,22 @@ private[sql] object Catalogs { } new CaseInsensitiveStringMap(options) } + + /** + * Reject catalogs that implement both [[TableCatalog]] and [[ViewCatalog]] without + * extending [[RelationCatalog]]. The combined case has cross-cutting rules (single namespace, + * cross-type collision rejection, perf opt-ins) that live on [[RelationCatalog]]; implementing + * the two interfaces directly would skip that contract. + */ + private def validateRelationCatalog(name: String, plugin: CatalogPlugin): Unit = { + if (plugin.isInstanceOf[TableCatalog] && plugin.isInstanceOf[ViewCatalog] && + !plugin.isInstanceOf[RelationCatalog]) { + throw new IllegalArgumentException( + s"Catalog '$name' (${plugin.getClass.getName}) implements both TableCatalog and " + + s"ViewCatalog directly. Catalogs that expose both tables and views must implement " + + s"RelationCatalog instead, which centralizes the cross-cutting rules (shared " + + s"identifier namespace, cross-type collision rejection, single-RPC perf entry " + + s"points).") + } + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index eee6ddf3e58fd..079b2639aa2b9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -22,8 +22,8 @@ import java.util import scala.collection.mutable import scala.jdk.CollectionConverters._ -import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogUtils} -import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils, ClusterBySpec} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.V1Table.addV2TableProperties import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform} import org.apache.spark.sql.types.StructType @@ -49,7 +49,6 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table { override lazy val schema: StructType = v1Table.schema override lazy val partitioning: Array[Transform] = { - import CatalogV2Implicits._ val partitions = new mutable.ArrayBuffer[Transform]() v1Table.partitionColumnNames.foreach { col => @@ -109,6 +108,108 @@ private[sql] object V1Table { case _ => None } } + + def toCatalogTable( + catalog: CatalogPlugin, + ident: Identifier, + t: MetadataOnlyTable): CatalogTable = t.getTableInfo match { + case viewInfo: ViewInfo => toCatalogTable(catalog, ident, viewInfo) + case tableInfo => toCatalogTable(catalog, ident, tableInfo) + } + + private def toCatalogTable( + catalog: CatalogPlugin, + ident: Identifier, + info: TableInfo): CatalogTable = { + val props = info.properties.asScala.toMap + // PROP_TABLE_TYPE is advisory on the v2 side: it may be absent or carry a value that has no + // v1 mapping (e.g. TableSummary.FOREIGN_TABLE_TYPE). v1 only has EXTERNAL/MANAGED, so + // anything other than the explicit MANAGED mapping falls back to EXTERNAL for the v1 + // representation -- the same default v1 uses when the value is missing. VIEW is reached + // only through the ViewInfo branch above. + val tableType = props.get(TableCatalog.PROP_TABLE_TYPE) match { + case Some(TableSummary.MANAGED_TABLE_TYPE) => CatalogTableType.MANAGED + case _ => CatalogTableType.EXTERNAL + } + // Reserved keys are promoted to first-class CatalogTable fields; strip them from the + // user-visible properties map so they're not double-persisted or leaked into the serde bag. + val userProps = props -- CatalogV2Util.TABLE_RESERVED_PROPERTIES + val (serdeProps, tableProps) = userProps.toSeq + .partition(_._1.startsWith(TableCatalog.OPTION_PREFIX)) + val tablePropsMap = tableProps.toMap + val (partCols, bucketSpec, clusterBySpec) = info.partitions.toSeq.convertTransforms + CatalogTable( + // `asLegacyTableIdentifier` collapses multi-part namespaces to their last segment (v1 + // limitation). We record the full multi-part form in `multipartIdentifier` below; + // callers needing the real fully-qualified name should read `CatalogTable.fullIdent`. + identifier = ident.asLegacyTableIdentifier(catalog.name()), + tableType = tableType, + storage = CatalogStorageFormat.empty.copy( + locationUri = props.get(TableCatalog.PROP_LOCATION).map(CatalogUtils.stringToURI), + // v2 table properties should be put into the serde properties as well in case + // they contain data source options. + properties = tablePropsMap ++ serdeProps.map { + case (k, v) => k.drop(TableCatalog.OPTION_PREFIX.length) -> v + } + ), + schema = CatalogV2Util.v2ColumnsToStructType(info.columns), + provider = props.get(TableCatalog.PROP_PROVIDER), + partitionColumnNames = partCols, + bucketSpec = bucketSpec, + owner = props.getOrElse(TableCatalog.PROP_OWNER, ""), + comment = props.get(TableCatalog.PROP_COMMENT), + collation = props.get(TableCatalog.PROP_COLLATION), + properties = tablePropsMap ++ + clusterBySpec.map(ClusterBySpec.toPropertyWithoutValidation), + multipartIdentifier = Some(catalog.name() +: ident.asMultipartIdentifier) + ) + } + + def toCatalogTable( + catalog: CatalogPlugin, + ident: Identifier, + info: ViewInfo): CatalogTable = { + val props = info.properties.asScala.toMap + val userProps = props -- CatalogV2Util.TABLE_RESERVED_PROPERTIES + // Serde/OPTION properties only apply to data-source tables; views' user properties are a + // plain TBLPROPERTIES bag. + val tablePropsMap = userProps + val viewContextProps = if (info.currentCatalog != null && info.currentCatalog.nonEmpty) { + CatalogTable.catalogAndNamespaceToProps( + info.currentCatalog, info.currentNamespace.toSeq) + } else { + Map.empty[String, String] + } + val sqlConfigProps = info.sqlConfigs.asScala.map { + case (k, v) => s"${CatalogTable.VIEW_SQL_CONFIG_PREFIX}$k" -> v + }.toMap + val queryOutputProps = if (info.queryColumnNames.isEmpty) { + Map.empty[String, String] + } else { + val numCols = info.queryColumnNames.length + val perColProps = info.queryColumnNames.zipWithIndex.map { case (name, idx) => + s"${CatalogTable.VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX}$idx" -> name + }.toMap + perColProps + (CatalogTable.VIEW_QUERY_OUTPUT_NUM_COLUMNS -> numCols.toString) + } + val schemaModeProps = Option(info.schemaMode) + .map(m => Map(CatalogTable.VIEW_SCHEMA_MODE -> m)) + .getOrElse(Map.empty) + CatalogTable( + identifier = ident.asLegacyTableIdentifier(catalog.name()), + tableType = CatalogTableType.VIEW, + storage = CatalogStorageFormat.empty, + schema = CatalogV2Util.v2ColumnsToStructType(info.columns), + owner = props.getOrElse(TableCatalog.PROP_OWNER, ""), + viewText = Some(info.queryText), + viewOriginalText = Some(info.queryText), + comment = props.get(TableCatalog.PROP_COMMENT), + collation = props.get(TableCatalog.PROP_COLLATION), + properties = tablePropsMap ++ viewContextProps ++ sqlConfigProps ++ + queryOutputProps ++ schemaModeProps, + multipartIdentifier = Some(catalog.name() +: ident.asMultipartIdentifier) + ) + } } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index bd1e876c9fbd6..b596d2f95391f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -567,11 +567,11 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat } def viewDepthExceedsMaxResolutionDepthError( - identifier: TableIdentifier, maxNestedDepth: Int, t: TreeNode[_]): Throwable = { + viewNameParts: Seq[String], maxNestedDepth: Int, t: TreeNode[_]): Throwable = { new AnalysisException( errorClass = "VIEW_EXCEED_MAX_NESTED_DEPTH", messageParameters = Map( - "viewName" -> toSQLId(identifier.nameParts), + "viewName" -> toSQLId(viewNameParts), "maxNestedDepth" -> maxNestedDepth.toString), origin = t.origin) } @@ -3353,25 +3353,25 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat } def cannotCreateViewTooManyColumnsError( - viewIdent: TableIdentifier, + viewNameParts: Seq[String], expected: Seq[String], query: LogicalPlan): Throwable = { new AnalysisException( errorClass = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS", messageParameters = Map( - "viewName" -> toSQLId(viewIdent.nameParts), + "viewName" -> toSQLId(viewNameParts), "viewColumns" -> expected.map(c => toSQLId(c)).mkString(", "), "dataColumns" -> query.output.map(c => toSQLId(c.name)).mkString(", "))) } def cannotCreateViewNotEnoughColumnsError( - viewIdent: TableIdentifier, + viewNameParts: Seq[String], expected: Seq[String], query: LogicalPlan): Throwable = { new AnalysisException( errorClass = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS", messageParameters = Map( - "viewName" -> toSQLId(viewIdent.nameParts), + "viewName" -> toSQLId(viewNameParts), "viewColumns" -> expected.map(c => toSQLId(c)).mkString(", "), "dataColumns" -> query.output.map(c => toSQLId(c.name)).mkString(", "))) } @@ -3383,12 +3383,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat } def unsupportedCreateOrReplaceViewOnTableError( - name: TableIdentifier, replace: Boolean): Throwable = { + nameParts: Seq[String], replace: Boolean): Throwable = { if (replace) { new AnalysisException( errorClass = "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE", messageParameters = Map( - "tableName" -> toSQLId(name.nameParts), + "tableName" -> toSQLId(nameParts), "operation" -> "CREATE OR REPLACE VIEW" ) ) @@ -3396,16 +3396,16 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat new AnalysisException( errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS", messageParameters = Map( - "relationName" -> toSQLId(name.nameParts) + "relationName" -> toSQLId(nameParts) ) ) } } - def viewAlreadyExistsError(name: TableIdentifier): Throwable = { + def viewAlreadyExistsError(nameParts: Seq[String]): Throwable = { new AnalysisException( errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS", - messageParameters = Map("relationName" -> name.toString)) + messageParameters = Map("relationName" -> toSQLId(nameParts))) } def createPersistedViewFromDatasetAPINotAllowedError(): Throwable = { @@ -3415,57 +3415,57 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat } def recursiveViewDetectedError( - viewIdent: TableIdentifier, - newPath: Seq[TableIdentifier]): Throwable = { + viewIdent: Seq[String], + newPath: Seq[Seq[String]]): Throwable = { new AnalysisException( errorClass = "RECURSIVE_VIEW", messageParameters = Map( - "viewIdent" -> toSQLId(viewIdent.nameParts), - "newPath" -> newPath.map(p => toSQLId(p.nameParts)).mkString(" -> "))) + "viewIdent" -> toSQLId(viewIdent), + "newPath" -> newPath.map(toSQLId).mkString(" -> "))) } def notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError( - name: TableIdentifier, + viewNameParts: Seq[String], attr: Attribute): Throwable = { new AnalysisException( errorClass = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS", messageParameters = Map( - "name" -> toSQLId(name.nameParts), + "name" -> toSQLId(viewNameParts), "attr" -> toSQLExpr(attr))) } def notAllowedToCreatePermanentViewByReferencingTempViewError( - name: TableIdentifier, - nameParts: String): Throwable = { + viewNameParts: Seq[String], + tempViewNameParts: String): Throwable = { new AnalysisException( errorClass = "INVALID_TEMP_OBJ_REFERENCE", messageParameters = Map( "obj" -> "VIEW", - "objName" -> toSQLId(name.nameParts), + "objName" -> toSQLId(viewNameParts), "tempObj" -> "VIEW", - "tempObjName" -> toSQLId(nameParts))) + "tempObjName" -> toSQLId(tempViewNameParts))) } def notAllowedToCreatePermanentViewByReferencingTempFuncError( - name: TableIdentifier, + viewNameParts: Seq[String], funcName: String): Throwable = { new AnalysisException( errorClass = "INVALID_TEMP_OBJ_REFERENCE", messageParameters = Map( "obj" -> "VIEW", - "objName" -> toSQLId(name.nameParts), + "objName" -> toSQLId(viewNameParts), "tempObj" -> "FUNCTION", "tempObjName" -> toSQLId(funcName))) } def notAllowedToCreatePermanentViewByReferencingTempVarError( - nameParts: Seq[String], + viewNameParts: Seq[String], varName: Seq[String]): Throwable = { new AnalysisException( errorClass = "INVALID_TEMP_OBJ_REFERENCE", messageParameters = Map( "obj" -> "VIEW", - "objName" -> toSQLId(nameParts), + "objName" -> toSQLId(viewNameParts), "tempObj" -> "VARIABLE", "tempObjName" -> toSQLId(varName))) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index e2bfaef1e7002..94523dd313b43 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, CharVarcharUtils, ResolveDefaultColumns => DefaultCols} import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._ -import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table} +import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table, ViewCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.execution.command._ @@ -327,11 +327,16 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) case DropView(DropViewInSessionCatalog(ident), ifExists) => DropTableCommand(ident, ifExists, isView = true, purge = false) - case DropView(r @ ResolvedIdentifier(catalog, ident), ifExists) => + // ViewCatalog catalogs fall through to `DataSourceV2Strategy`, which routes DROP VIEW to + // `ViewCatalog.dropView`. Other non-session catalogs get `MISSING_CATALOG_ABILITY.VIEWS`, + // matching the error raised from `CheckViewReferences` for CREATE/ALTER VIEW and from the + // analyzer gate on UnresolvedView. + case DropView(r @ ResolvedIdentifier(catalog, ident), ifExists) + if !catalog.isInstanceOf[ViewCatalog] => if (catalog == FakeSystemCatalog) { DropTempViewCommand(ident, ifExists) } else { - throw QueryCompilationErrors.catalogOperationNotSupported(catalog, "views") + throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) } case c @ CreateNamespace(DatabaseNameInSessionCatalog(name), _, _) if conf.useV1Command => @@ -517,14 +522,21 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) location) => AlterTableSetLocationCommand(ident, Some(partitionSpec), location) - case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query) => + // The final `_, _` are AlterViewAs.isAnalyzed and referredTempFunctions. We drop both: + // AlterViewAsCommand is a separate AnalysisOnlyCommand and gets its own markAsAnalyzed pass + // from HandleSpecialCommand after this rewrite. + case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query, _, _) => AlterViewAsCommand(ident, originalText, query) case AlterViewSchemaBinding(ResolvedViewIdentifier(ident), viewSchemaMode) => AlterViewSchemaBindingCommand(ident, viewSchemaMode) + // The final `_, _` are CreateView.isAnalyzed and referredTempFunctions. We drop both: + // CreateViewCommand is a separate AnalysisOnlyCommand and gets its own markAsAnalyzed pass + // from HandleSpecialCommand after this rewrite. case CreateView(CreateViewInSessionCatalog(ident), userSpecifiedColumns, comment, - collation, properties, originalText, child, allowExisting, replace, viewSchemaMode) => + collation, properties, originalText, query, allowExisting, replace, viewSchemaMode, + _, _) => CreateViewCommand( name = ident, userSpecifiedColumns = userSpecifiedColumns, @@ -532,16 +544,17 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) collation = collation, properties = properties, originalText = originalText, - plan = child, + plan = query, allowExisting = allowExisting, replace = replace, viewType = PersistedView, viewSchemaMode = viewSchemaMode) - case CreateView(ResolvedIdentifier(catalog, _), _, _, _, _, _, _, _, _, _) => - throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) - - case ShowViews(ns: ResolvedNamespace, pattern, output) => + // ViewCatalog catalogs are handled by the v2 strategy (enumerates via listViews); we skip + // the match here so the plan flows through unchanged. Only non-session, non-ViewCatalog + // catalogs hit the MISSING_CATALOG_ABILITY.VIEWS rejection. + case ShowViews(ns: ResolvedNamespace, pattern, output) + if !ns.catalog.isInstanceOf[ViewCatalog] => ns match { case ResolvedDatabaseInSessionCatalog(db) => ShowViewsCommand(db, pattern, output) case _ => @@ -772,9 +785,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) } object ResolvedViewIdentifier { + // Only matches session-catalog persistent views. Non-session-catalog persistent views + // (produced for `MetadataOnlyTable`) fall through; `AlterViewAs` is picked up by the v2 + // strategy, and the remaining view DDL / inspection plans (SET/UNSET TBLPROPERTIES, + // ALTER VIEW ... WITH SCHEMA, RENAME TO, SHOW CREATE TABLE, SHOW TBLPROPERTIES, SHOW + // COLUMNS, DESCRIBE [COLUMN]) are rejected with `UNSUPPORTED_FEATURE.TABLE_OPERATION` by + // dedicated v2 strategy cases -- tracked for a follow-up PR (SPARK-52729). def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match { - case ResolvedPersistentView(catalog, ident, _) => - assert(isSessionCatalog(catalog)) + case ResolvedPersistentView(catalog, ident, _) if isSessionCatalog(catalog) => Some(ident.asTableIdentifier.copy(catalog = Some(catalog.name))) case ResolvedTempView(ident, _) => @@ -938,4 +956,5 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) SQLConf.get.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION) == "builtin" || catalog.isInstanceOf[CatalogExtension]) } + } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala index 8c21a908ddf32..623685f6c20a7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/metricViewCommands.scala @@ -51,10 +51,10 @@ case class CreateMetricViewCommand( if (userSpecifiedColumns.nonEmpty) { if (userSpecifiedColumns.length > analyzed.output.length) { throw QueryCompilationErrors.cannotCreateViewNotEnoughColumnsError( - name, userSpecifiedColumns.map(_._1), analyzed) + name.nameParts, userSpecifiedColumns.map(_._1), analyzed) } else if (userSpecifiedColumns.length < analyzed.output.length) { throw QueryCompilationErrors.cannotCreateViewTooManyColumnsError( - name, userSpecifiedColumns.map(_._1), analyzed) + name.nameParts, userSpecifiedColumns.map(_._1), analyzed) } } catalog.createTable( @@ -90,7 +90,8 @@ object MetricViewHelper { val metricViewNode = MetricViewPlanner.planWrite( tableMeta, viewText, session.sessionState.sqlParser) val analyzed = analyzer.executeAndCheck(metricViewNode, new QueryPlanningTracker) - ViewHelper.verifyTemporaryObjectsNotExists(isTemporary = false, name, analyzed, Seq.empty) + ViewHelper.verifyTemporaryObjectsNotExists( + isTemporary = false, name.nameParts, analyzed, Seq.empty) analyzed } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 895c39dd83976..994c7836f9dd1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -26,15 +26,15 @@ import org.apache.spark.SparkException import org.apache.spark.internal.Logging import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.{CapturesConfig, SQLConfHelper, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, GlobalTempView, LocalTempView, SchemaEvolution, SchemaUnsupported, ViewSchemaMode, ViewType} +import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, GlobalTempView, LocalTempView, ResolvedIdentifier, ResolvedPersistentView, SchemaEvolution, SchemaUnsupported, ViewSchemaMode, ViewType} import org.apache.spark.sql.catalyst.analysis.V2TableReference import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, TemporaryViewRelation} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, SubqueryExpression, VariableReference} -import org.apache.spark.sql.catalyst.plans.logical.{AnalysisOnlyCommand, CreateTempView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE} +import org.apache.spark.sql.catalyst.plans.logical.{AlterViewAs, AnalysisOnlyCommand, CreateTempView, CreateView, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.classic.ClassicConversions.castToImpl -import org.apache.spark.sql.connector.catalog.CatalogManager -import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper +import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, Identifier, ViewCatalog} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{IdentifierHelper, NamespaceHelper} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.internal.StaticSQLConf @@ -47,6 +47,12 @@ import org.apache.spark.util.ArrayImplicits._ * properties(e.g. view default database, view query output column names) and store them as * properties in metastore, if we need to create a permanent view. * + * Note: this is the v1 (session catalog) path. Permanent-view checks (no temp-object refs, + * no auto-generated aliases, no cycles) run at exec time here because Dataset-built commands + * can be constructed with `isAnalyzed=true` and bypass the analyzer's recapture path. The v2 + * equivalent is [[org.apache.spark.sql.catalyst.plans.logical.CreateView]]; its checks run at + * analysis time via [[CheckViewReferences]]. Mirror any new validation in both places. + * * @param name the name of this view. * @param userSpecifiedColumns the output column names and optional comments specified by users, * can be Nil if not specified. @@ -113,10 +119,10 @@ case class CreateViewCommand( if (userSpecifiedColumns.nonEmpty) { if (userSpecifiedColumns.length > analyzedPlan.output.length) { throw QueryCompilationErrors.cannotCreateViewNotEnoughColumnsError( - name, userSpecifiedColumns.map(_._1), analyzedPlan) + name.nameParts, userSpecifiedColumns.map(_._1), analyzedPlan) } else if (userSpecifiedColumns.length < analyzedPlan.output.length) { throw QueryCompilationErrors.cannotCreateViewTooManyColumnsError( - name, userSpecifiedColumns.map(_._1), analyzedPlan) + name.nameParts, userSpecifiedColumns.map(_._1), analyzedPlan) } if (viewSchemaMode == SchemaEvolution) { throw SparkException.internalError( @@ -128,8 +134,9 @@ case class CreateViewCommand( // When creating a permanent view, not allowed to reference temporary objects. // This should be called after `qe.assertAnalyzed()` (i.e., `child` can be resolved) - verifyTemporaryObjectsNotExists(isTemporary, name, analyzedPlan, referredTempFunctions) - verifyAutoGeneratedAliasesNotExists(analyzedPlan, isTemporary, name) + verifyTemporaryObjectsNotExists( + isTemporary, name.nameParts, analyzedPlan, referredTempFunctions) + verifyAutoGeneratedAliasesNotExists(analyzedPlan, isTemporary, name.nameParts) SchemaUtils.checkIndeterminateCollationInSchema(plan.schema) @@ -166,11 +173,13 @@ case class CreateViewCommand( // Handles `CREATE VIEW IF NOT EXISTS v0 AS SELECT ...`. Does nothing when the target view // already exists. } else if (tableMetadata.tableType != CatalogTableType.VIEW) { - throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError(name, replace) + throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( + name.nameParts, replace) } else if (replace) { // Detect cyclic view reference on CREATE OR REPLACE VIEW. val viewIdent = tableMetadata.identifier - checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent) + val viewFullIdent = tableMetadata.fullIdent + checkCyclicViewReference(analyzedPlan, Seq(viewFullIdent), viewFullIdent) // uncache the cached data before replacing an exists view logDebug(s"Try to uncache ${viewIdent.quotedString} before replacing.") @@ -186,7 +195,7 @@ case class CreateViewCommand( } else { // Handles `CREATE VIEW v0 AS SELECT ...`. Throws exception when the target view already // exists. - throw QueryCompilationErrors.viewAlreadyExistsError(name) + throw QueryCompilationErrors.viewAlreadyExistsError(name.nameParts) } } else { // Create the view if it doesn't exist. @@ -209,6 +218,12 @@ case class CreateViewCommand( * this command will try to alter a temporary view first, if view not exist, try permanent view * next, if still not exist, throw an exception. * + * Note: this is the v1 (session catalog) path. Permanent-view checks (no temp-object refs, + * no auto-generated aliases, no cycles) run at exec time here because Dataset-built commands + * can be constructed with `isAnalyzed=true` and bypass the analyzer's recapture path. The v2 + * equivalent is [[org.apache.spark.sql.catalyst.plans.logical.AlterViewAs]]; its checks run at + * analysis time via [[CheckViewReferences]]. Mirror any new validation in both places. + * * @param name the name of this view. * @param originalText the original SQL text of this view. Note that we can only alter a view by * SQL API, which means we always have originalText. @@ -242,8 +257,8 @@ case class AlterViewAsCommand( override def run(session: SparkSession): Seq[Row] = { val isTemporary = session.sessionState.catalog.isTempView(name) - verifyTemporaryObjectsNotExists(isTemporary, name, query, referredTempFunctions) - verifyAutoGeneratedAliasesNotExists(query, isTemporary, name) + verifyTemporaryObjectsNotExists(isTemporary, name.nameParts, query, referredTempFunctions) + verifyAutoGeneratedAliasesNotExists(query, isTemporary, name.nameParts) SchemaUtils.checkIndeterminateCollationInSchema(query.schema) if (isTemporary) { alterTemporaryView(session, query) @@ -277,7 +292,8 @@ case class AlterViewAsCommand( // Detect cyclic view reference on ALTER VIEW. val viewIdent = viewMeta.identifier - checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent) + val viewFullIdent = viewMeta.fullIdent + checkCyclicViewReference(analyzedPlan, Seq(viewFullIdent), viewFullIdent) logDebug(s"Try to uncache ${viewIdent.quotedString} before replacing.") CommandUtils.uncacheTableOrView(session, viewIdent) @@ -559,16 +575,16 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { * * @param plan the logical plan we detect cyclic view references from. * @param path the path between the altered view and current node. - * @param viewIdent the table identifier of the altered view, we compare two views by the - * `desc.identifier`. + * @param viewIdent the full multi-part identifier of the altered view. We compare two views by + * `desc.fullIdent` so multi-level namespaces (v2 catalogs) are distinguished. */ def checkCyclicViewReference( plan: LogicalPlan, - path: Seq[TableIdentifier], - viewIdent: TableIdentifier): Unit = { + path: Seq[Seq[String]], + viewIdent: Seq[String]): Unit = { plan match { case v: View => - val ident = v.desc.identifier + val ident = v.desc.fullIdent val newPath = path :+ ident // If the table identifier equals to the `viewIdent`, current view node is the same with // the altered view. We detect a view reference cycle, should throw an AnalysisException. @@ -594,12 +610,13 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { } def verifyAutoGeneratedAliasesNotExists( - child: LogicalPlan, isTemporary: Boolean, name: TableIdentifier): Unit = { + child: LogicalPlan, isTemporary: Boolean, viewNameParts: Seq[String]): Unit = { if (!isTemporary && !conf.allowAutoGeneratedAliasForView) { child.output.foreach { attr => if (attr.metadata.contains("__autoGeneratedAlias")) { throw QueryCompilationErrors - .notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError(name, attr) + .notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError( + viewNameParts, attr) } } } @@ -610,7 +627,7 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { */ def verifyTemporaryObjectsNotExists( isTemporary: Boolean, - name: TableIdentifier, + viewNameParts: Seq[String], child: LogicalPlan, referredTempFunctions: Seq[String]): Unit = { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ @@ -618,16 +635,16 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { val tempViews = collectTemporaryViews(child) tempViews.foreach { nameParts => throw QueryCompilationErrors.notAllowedToCreatePermanentViewByReferencingTempViewError( - name, nameParts.quoted) + viewNameParts, nameParts.quoted) } referredTempFunctions.foreach { funcName => throw QueryCompilationErrors.notAllowedToCreatePermanentViewByReferencingTempFuncError( - name, funcName) + viewNameParts, funcName) } val tempVars = collectTemporaryVariables(child) tempVars.foreach { nameParts => throw QueryCompilationErrors.notAllowedToCreatePermanentViewByReferencingTempVarError( - name.nameParts, nameParts) + viewNameParts, nameParts) } } } @@ -704,7 +721,7 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { if (!storeAnalyzedPlanForView) { // Skip cyclic check because when stored analyzed plan for view, the depended // view is already converted to the underlying tables. So no cyclic views. - checkCyclicViewReference(analyzedPlan, Seq(name), name) + checkCyclicViewReference(analyzedPlan, Seq(name.nameParts), name.nameParts) } CommandUtils.uncacheTableOrView(session, name) } @@ -882,3 +899,71 @@ object ViewHelper extends SQLConfHelper with Logging with CapturesConfig { } } } + +/** + * Post-analysis check for v2 CREATE VIEW / ALTER VIEW. First rejects catalogs that do not + * implement [[ViewCatalog]] with `MISSING_CATALOG_ABILITY.VIEWS` -- we do this before the + * temp-object and auto-alias checks so a catalog that cannot host views at all surfaces the + * correct root cause instead of a misleading "references temp" error. Then rejects permanent + * views that reference temporary objects and view bodies with auto-generated aliases. + * `referredTempFunctions` is captured by the command's `markAsAnalyzed` before this rule runs. + * The v1 counterparts [[CreateViewCommand]] and [[AlterViewAsCommand]] keep their existing + * exec-time checks -- Dataset-built commands bypass the analyzer's re-capture path, so the + * exec-time safety net must stay for v1. + */ +object CheckViewReferences extends (LogicalPlan => Unit) { + import ViewHelper._ + + // Extract (catalog, identifier) for the two resolved shapes view commands reach us with: + // `ResolvedIdentifier` for CREATE VIEW, `ResolvedPersistentView` for ALTER VIEW. Other shapes + // are an analyzer bug. + private def catalogAndIdent(resolved: LogicalPlan): (CatalogPlugin, Identifier) = + resolved match { + case ri: ResolvedIdentifier => (ri.catalog, ri.identifier) + case rpv: ResolvedPersistentView => (rpv.catalog, rpv.identifier) + case other => + throw SparkException.internalError( + s"Unexpected child of view command: ${other.getClass.getName}") + } + + private def fullIdentFor(resolved: LogicalPlan): Seq[String] = { + val (catalog, ident) = catalogAndIdent(resolved) + catalog.name() +: ident.asMultipartIdentifier + } + + // Fail fast if the catalog cannot host views. Gate non-ViewCatalog plugins here so callers + // get the VIEWS-specific error rather than a generic cast failure later. + private def requireViewCatalog(resolved: LogicalPlan): Unit = { + val (catalog, _) = catalogAndIdent(resolved) + if (!catalog.isInstanceOf[ViewCatalog]) { + throw QueryCompilationErrors.missingCatalogViewsAbilityError(catalog) + } + } + + override def apply(plan: LogicalPlan): Unit = plan.foreach { + case cv: CreateView if cv.isAnalyzed => + requireViewCatalog(cv.child) + val fullIdent = fullIdentFor(cv.child) + verifyTemporaryObjectsNotExists( + isTemporary = false, fullIdent, cv.query, cv.referredTempFunctions) + verifyAutoGeneratedAliasesNotExists(cv.query, isTemporary = false, fullIdent) + // Cycles can only form when REPLACE'ing an existing view; a plain CREATE against an + // existing view fails earlier with `viewAlreadyExistsError` and against a non-existent + // view has nothing to cycle with. + if (cv.replace) { + checkCyclicViewReference(cv.query, Seq(fullIdent), fullIdent) + } + + case av: AlterViewAs if av.isAnalyzed => + // No capability check here: `Analyzer.lookupTableOrView(identifier, viewOnly=true)` + // already rejects non-ViewCatalog catalogs upstream for `UnresolvedView`, so by the time + // an AlterViewAs reaches this rule the catalog is guaranteed to be a ViewCatalog. + val fullIdent = fullIdentFor(av.child) + verifyTemporaryObjectsNotExists( + isTemporary = false, fullIdent, av.query, av.referredTempFunctions) + verifyAutoGeneratedAliasesNotExists(av.query, isTemporary = false, fullIdent) + checkCyclicViewReference(av.query, Seq(fullIdent), fullIdent) + + case _ => + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala new file mode 100644 index 0000000000000..cb21e773d86c4 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterV2ViewExec.scala @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import scala.jdk.CollectionConverters._ + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.{NoSuchViewException, ResolvedIdentifier, ViewSchemaMode} +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, ViewCatalog, ViewInfo} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.execution.command.CommandUtils + +/** + * Shared bits for the v2 ALTER VIEW ... AS exec. Loads the existing view once via + * `existingView` and uses it to preserve user-set TBLPROPERTIES, comment, collation, owner, + * and schema binding mode when constructing the replacement [[ViewInfo]]. A racing DDL between + * analysis and exec can change the target out from under us (dropped, or replaced with a + * non-view table); in that case we surface a regular no-such-view / not-a-view analysis error + * rather than propagating a stale analyzer decision. + * + * Transient fields (SQL configs, query column names) are re-captured from the + * current session by [[V2ViewPreparation.buildViewInfo]], matching v1 + * `AlterViewAsCommand.alterPermanentView`. PROP_OWNER and user TBLPROPERTIES flow through + * unchanged. + */ +private[v2] trait V2AlterViewPreparation extends V2ViewPreparation { + protected lazy val existingView: ViewInfo = try { + catalog.loadView(identifier) + } catch { + case _: NoSuchViewException => + // Race: the view disappeared after analysis. Surface no-such-view, or + // expect-view-not-table if a colliding non-view table appeared in a mixed catalog. + catalog match { + case tc: TableCatalog if tc.tableExists(identifier) => + throw QueryCompilationErrors.expectViewNotTableError( + (catalog.name() +: identifier.asMultipartIdentifier).toSeq, + cmd = "ALTER VIEW ... AS", + suggestAlternative = false, + t = this) + case _ => + throw new NoSuchViewException(identifier) + } + } + + protected lazy val existingProps: Map[String, String] = + existingView.properties.asScala.toMap + + private def existingProp(key: String): Option[String] = existingProps.get(key) + + // ALTER VIEW ... AS does not accept a user column list. + override def userSpecifiedColumns: Seq[(String, Option[String])] = Seq.empty + override def comment: Option[String] = existingProp(TableCatalog.PROP_COMMENT) + override def collation: Option[String] = existingProp(TableCatalog.PROP_COLLATION) + // Preserve the existing view's owner (v1-parity with AlterViewAsCommand's viewMeta.copy, + // which leaves `owner` untouched). If the existing view has no PROP_OWNER, pass it through + // as None so the replacement ViewInfo also has no owner. + override def owner: Option[String] = existingProp(TableCatalog.PROP_OWNER) + override def userProperties: Map[String, String] = existingProps + + // Preserve the existing view's schema binding mode. Reuse `viewSchemaModeFromProperties` + // for a v1-identical decode -- it honors `viewSchemaBindingEnabled` and defaults missing + // values to SchemaBinding. We feed the typed `ViewInfo.schemaMode` String in via a + // single-key map so the decode logic stays in one place. + override def viewSchemaMode: ViewSchemaMode = + CatalogTable.viewSchemaModeFromProperties( + Option(existingView.schemaMode) + .map(CatalogTable.VIEW_SCHEMA_MODE -> _) + .toMap) + + /** + * Force-evaluate `existingView` so `NoSuchViewException` / `expectViewNotTableError` + * surfaces before any other work (e.g. `buildViewInfo`, uncache, replace). The result is + * intentionally discarded; call this purely for its side effect of materializing the + * lazy val. + */ + protected def requireExistingView(): Unit = existingView +} + +/** + * Physical plan node for ALTER VIEW ... AS on a v2 [[ViewCatalog]]. Dispatches to + * [[ViewCatalog#replaceView]], which is contractually atomic. + */ +case class AlterV2ViewExec( + catalog: ViewCatalog, + identifier: Identifier, + originalText: String, + query: LogicalPlan) extends V2AlterViewPreparation { + + override protected def run(): Seq[InternalRow] = { + requireExistingView() + val info = buildViewInfo() + // Cyclic reference detection is done at analysis time in CheckViewReferences. + CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) + catalog.replaceView(identifier, info) + Seq.empty + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala new file mode 100644 index 0000000000000..6cfa95a2eaf43 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateV2ViewExec.scala @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import scala.jdk.CollectionConverters._ + +import org.apache.spark.SparkException +import org.apache.spark.sql.catalyst.{CurrentUserContext, InternalRow} +import org.apache.spark.sql.catalyst.analysis.{ResolvedIdentifier, SchemaEvolution, ViewAlreadyExistsException, ViewSchemaMode} +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.util.CharVarcharUtils +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, ViewCatalog, ViewInfo} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.execution.command.{CommandUtils, ViewHelper} +import org.apache.spark.sql.util.SchemaUtils +import org.apache.spark.util.ArrayImplicits._ + +/** + * Shared validation + ViewInfo construction for v2 CREATE VIEW / ALTER VIEW execs. + * + * Mirrors the persistent-view portion of v1 [[ViewHelper.prepareTable]] + the execution-time + * checks in [[org.apache.spark.sql.execution.command.CreateViewCommand.run]]. Post-analysis + * checks for temp-object references and auto-generated aliases run once for both v1 and v2 in + * [[org.apache.spark.sql.execution.command.CheckViewReferences]]. + */ +private[v2] trait V2ViewPreparation extends LeafV2CommandExec { + def catalog: ViewCatalog + def identifier: Identifier + def userSpecifiedColumns: Seq[(String, Option[String])] + def comment: Option[String] + def collation: Option[String] + def owner: Option[String] + def userProperties: Map[String, String] + def originalText: String + def query: LogicalPlan + def viewSchemaMode: ViewSchemaMode + + // Full multi-part identifier used for error rendering. Built once so we can avoid routing + // through the lossy v1 `TableIdentifier` for multi-level-namespace v2 catalogs. + protected lazy val fullNameParts: Seq[String] = + (catalog.name() +: identifier.asMultipartIdentifier).toSeq + + override def output: Seq[Attribute] = Seq.empty + + protected def buildViewInfo(): ViewInfo = { + import ViewHelper._ + + if (userSpecifiedColumns.nonEmpty) { + if (userSpecifiedColumns.length > query.output.length) { + throw QueryCompilationErrors.cannotCreateViewNotEnoughColumnsError( + fullNameParts, userSpecifiedColumns.map(_._1), query) + } else if (userSpecifiedColumns.length < query.output.length) { + throw QueryCompilationErrors.cannotCreateViewTooManyColumnsError( + fullNameParts, userSpecifiedColumns.map(_._1), query) + } + if (viewSchemaMode == SchemaEvolution) { + throw SparkException.internalError( + "View with user column list has viewSchemaMode EVOLUTION") + } + } + + SchemaUtils.checkIndeterminateCollationInSchema(query.schema) + + val aliasedSchema = CharVarcharUtils.getRawSchema( + aliasPlan(session, query, userSpecifiedColumns).schema, session.sessionState.conf) + SchemaUtils.checkColumnNameDuplication( + aliasedSchema.fieldNames.toImmutableArraySeq, session.sessionState.conf.resolver) + + val manager = session.sessionState.catalogManager + val queryColumnNames = if (viewSchemaMode == SchemaEvolution) { + Array.empty[String] + } else { + query.output.map(_.name).toArray + } + + val builder = new ViewInfo.Builder() + .withSchema(aliasedSchema) + .withProperties(userProperties.asJava) + .withQueryText(originalText) + .withCurrentCatalog(manager.currentCatalog.name) + .withCurrentNamespace(manager.currentNamespace) + .withSqlConfigs(sqlConfigsToProps(session.sessionState.conf, "").asJava) + .withSchemaMode(viewSchemaMode.toString) + .withQueryColumnNames(queryColumnNames) + // CREATE stamps the current user into PROP_OWNER (matching v2 CREATE TABLE via + // CatalogV2Util.withDefaultOwnership and v1 CREATE VIEW via CatalogTable.owner's default); + // ALTER preserves the existing view's owner (v1-parity with AlterViewAsCommand's + // viewMeta.copy). Both cases are expressed via the `owner` hook provided by the subclass. + owner.foreach(builder.withOwner) + comment.foreach(builder.withComment) + collation.foreach(builder.withCollation) + builder.build() + } + + protected def viewAlreadyExists(): Throwable = + QueryCompilationErrors.viewAlreadyExistsError(fullNameParts) +} + +/** + * Physical plan node for CREATE VIEW on a v2 [[ViewCatalog]]. Dispatches to + * [[ViewCatalog#createView]] for plain CREATE, [[ViewCatalog#createOrReplaceView]] for + * `OR REPLACE`, and short-circuits `IF NOT EXISTS` early via [[ViewCatalog#viewExists]] so + * the view body isn't analyzed when the view already exists. + */ +case class CreateV2ViewExec( + catalog: ViewCatalog, + identifier: Identifier, + userSpecifiedColumns: Seq[(String, Option[String])], + comment: Option[String], + collation: Option[String], + userProperties: Map[String, String], + originalText: String, + query: LogicalPlan, + allowExisting: Boolean, + replace: Boolean, + viewSchemaMode: ViewSchemaMode) extends V2ViewPreparation { + + override def owner: Option[String] = Some(CurrentUserContext.getCurrentUser) + + override protected def run(): Seq[InternalRow] = { + // CREATE VIEW IF NOT EXISTS: short-circuit before `buildViewInfo` if a view already sits + // at the ident -- avoids `aliasPlan` / config capture for the common no-op case (matches + // v1 `CreateViewCommand.run`). The mixed-catalog "table at ident" no-op is handled in the + // catch block below; that case is rare enough that paying for `buildViewInfo` is fine. + if (allowExisting && catalog.viewExists(identifier)) return Seq.empty + + val info = buildViewInfo() + try { + if (replace) { + CommandUtils.uncacheTableOrView(session, ResolvedIdentifier(catalog, identifier)) + catalog.createOrReplaceView(identifier, info) + } else { + catalog.createView(identifier, info) + } + } catch { + case _: ViewAlreadyExistsException => + // Catalog refused: something already occupies the ident. Decode whether it's a table + // (cross-type collision) or a view (race for plain CREATE / OR REPLACE), and emit the + // precise error -- or no-op for IF NOT EXISTS. + val isTable = catalog match { + case tc: TableCatalog => tc.tableExists(identifier) + case _ => false + } + if (isTable) { + if (!allowExisting) { + throw QueryCompilationErrors.unsupportedCreateOrReplaceViewOnTableError( + fullNameParts, replace) + } + // CREATE VIEW IF NOT EXISTS over a table is a no-op (v1 parity). + } else if (!allowExisting) { + throw viewAlreadyExists() + } + // else: a view appeared between our viewExists probe and createView; IF NOT EXISTS + // semantics make this a no-op. + } + Seq.empty + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 6730673cab025..d677ff1c4be2b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreePattern.SCALAR_SUBQUERY import org.apache.spark.sql.catalyst.util.{toPrettySQL, GeneratedColumn, IdentityColumn, ResolveDefaultColumns, ResolveTableConstraints, V2ExpressionBuilder} import org.apache.spark.sql.classic.SparkSession -import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TruncatableTable, V1Table} +import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TruncatableTable, V1Table, ViewCatalog} import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.connector.catalog.index.SupportsIndex import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue} @@ -301,6 +301,91 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat qualifyLocInTableSpec(tableSpec), orCreate = orCreate, invalidateCache) :: Nil } + // CheckViewReferences guarantees the catalog is a ViewCatalog by the time these strategy + // cases fire (it throws MISSING_CATALOG_ABILITY.VIEWS otherwise). + case CreateView(ResolvedIdentifier(catalog, ident), userSpecifiedColumns, comment, + collation, properties, originalText, child, allowExisting, replace, viewSchemaMode, + _, _) => + val sqlText = originalText.getOrElse { + throw QueryCompilationErrors.createPersistedViewFromDatasetAPINotAllowedError() + } + CreateV2ViewExec(catalog.asInstanceOf[ViewCatalog], ident, userSpecifiedColumns, comment, + collation, properties, sqlText, child, allowExisting, replace, viewSchemaMode) :: Nil + + case AlterViewAs(ResolvedPersistentView(catalog, ident, _), originalText, query, _, _) => + AlterV2ViewExec(catalog.asInstanceOf[ViewCatalog], ident, originalText, query) :: Nil + + // View DDL / inspection on a non-session v2 catalog that the v1 rewrite in + // `ResolveSessionCatalog` can't handle. These are tracked as follow-up work in SPARK-52729; + // pin the current failure mode with a clean `UNSUPPORTED_FEATURE.TABLE_OPERATION` error + // so users get a meaningful message (and test coverage catches a future regression to a + // generic planner error). + case SetViewProperties(ResolvedPersistentView(catalog, ident, _), _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "ALTER VIEW ... SET TBLPROPERTIES") + + case UnsetViewProperties(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "ALTER VIEW ... UNSET TBLPROPERTIES") + + case AlterViewSchemaBinding(ResolvedPersistentView(catalog, ident, _), _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "ALTER VIEW ... WITH SCHEMA") + + case RenameTable(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "ALTER VIEW ... RENAME TO") + + case ShowCreateTable(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "SHOW CREATE TABLE") + + case ShowTableProperties(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "SHOW TBLPROPERTIES") + + case ShowColumns(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "SHOW COLUMNS") + + case DescribeRelation(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "DESCRIBE TABLE") + + case DescribeColumn(ResolvedPersistentView(catalog, ident, _), _, _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "DESCRIBE TABLE ... COLUMN") + + // Plans that resolve through `UnresolvedTableOrView` reach here with a + // `ResolvedPersistentView` child for non-session v2 views (the v1 rewrite in + // `ResolveSessionCatalog` no longer matches them because `ResolvedViewIdentifier` is gated + // on `isSessionCatalog`). Pin each with `UNSUPPORTED_FEATURE.TABLE_OPERATION` so users get + // a clean `AnalysisException` instead of a generic "No plan for ..." assertion from the + // planner. Tracked for follow-up real handlers in SPARK-52729. + case RefreshTable(ResolvedPersistentView(catalog, ident, _)) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "REFRESH TABLE") + + case AnalyzeTable(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "ANALYZE TABLE") + + case AnalyzeColumn(ResolvedPersistentView(catalog, ident, _), _, _) => + throw QueryCompilationErrors.unsupportedTableOperationError( + catalog, ident, "ANALYZE TABLE ... FOR COLUMNS") + + // SHOW PARTITIONS on a view is already rejected during analysis: the parser uses + // `UnresolvedTable` (not `UnresolvedTableOrView`), so `CheckAnalysis` surfaces + // `EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE` before planning. No strategy case needed. + + // DROP VIEW on a non-session ViewCatalog. The v1 rewrite in `ResolveSessionCatalog` skips + // ViewCatalog catalogs, so they fall through here. `DropViewExec` calls + // `ViewCatalog.dropView` and surfaces `EXPECT_VIEW_NOT_TABLE` if the identifier resolves to + // a table in a mixed catalog. + case DropView(r @ ResolvedIdentifier(catalog: ViewCatalog, ident), ifExists) => + val invalidateFunc = () => CommandUtils.uncacheTableOrView(session, r) + DropViewExec(catalog, ident, ifExists, invalidateFunc) :: Nil + case ReplaceTableAsSelect(ResolvedIdentifier(catalog, ident), parts, query, tableSpec: TableSpec, options, orCreate, true) => catalog match { @@ -493,6 +578,15 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case ShowTables(ResolvedNamespace(catalog, ns, _), pattern, output) => ShowTablesExec(output, catalog.asTableCatalog, ns, pattern) :: Nil + // SHOW VIEWS on a v2 ViewCatalog. `ResolveSessionCatalog` rewrites the SHOW VIEWS plan to + // v1 `ShowViewsCommand` only when the catalog is NOT a `ViewCatalog`; non-`ViewCatalog` + // catalogs (session or not) are rejected with `MISSING_CATALOG_ABILITY.VIEWS` there. So + // this case sees `ViewCatalog` catalogs (typically non-session, since the default + // `V2SessionCatalog` is not a `ViewCatalog`; a session-catalog override that mixes in + // `ViewCatalog` would also reach here). + case ShowViews(ResolvedNamespace(catalog: ViewCatalog, ns, _), pattern, output) => + ShowViewsExec(output, catalog, ns, pattern) :: Nil + case ShowTablesExtended( ResolvedNamespace(catalog, ns, _), pattern, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala index c94af4e3dceb3..18e6a5eb86ac8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala @@ -19,12 +19,22 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, ViewCatalog} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.util.ArrayImplicits._ /** * Physical plan node for dropping a table. + * + * Probes `tableExists` upfront so `IF EXISTS` over a missing table is a clean no-op even + * on catalogs whose `dropTable` / `purgeTable` does not honor the "return false on missing" + * contract (e.g. JDBC catalogs that throw a SQL syntax error, or the default `purgeTable` + * that throws `UNSUPPORTED_FEATURE.PURGE_TABLE` unconditionally). + * + * When the table is absent, falls back to `viewExists` for catalogs that also implement + * [[ViewCatalog]] -- distinguishes "wrong type" from "missing" so a `DROP TABLE someView` + * on a mixed catalog surfaces the dedicated `EXPECT_TABLE_NOT_VIEW` error rather than a + * generic "table not found", matching the v1 `DropTableCommand(isView = false)` behavior. */ case class DropTableExec( catalog: TableCatalog, @@ -37,9 +47,18 @@ case class DropTableExec( if (catalog.tableExists(ident)) { invalidateCache() if (purge) catalog.purgeTable(ident) else catalog.dropTable(ident) - } else if (!ifExists) { - val nameParts = (catalog.name() +: ident.namespace() :+ ident.name()).toImmutableArraySeq - throw QueryCompilationErrors.noSuchTableError(nameParts) + } else { + val nameParts = + (catalog.name() +: ident.namespace() :+ ident.name()).toImmutableArraySeq + catalog match { + case vc: ViewCatalog if vc.viewExists(ident) => + throw QueryCompilationErrors.expectTableNotViewError( + nameParts, cmd = "DROP TABLE", suggestAlternative = false, t = this) + case _ if !ifExists => + throw QueryCompilationErrors.noSuchTableError(nameParts) + case _ => + // IF EXISTS: no-op. + } } Seq.empty diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala new file mode 100644 index 0000000000000..9a665f644e0de --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropViewExec.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, ViewCatalog} +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.util.ArrayImplicits._ + +/** + * Physical plan node for DROP VIEW on a v2 [[ViewCatalog]]. Calls [[ViewCatalog#dropView]]; if + * it returns false and the catalog also implements [[TableCatalog]] with a table at this + * identifier, surfaces the dedicated `EXPECT_VIEW_NOT_TABLE` error rather than a generic + * "view not found" -- matching v1 `DropTableCommand(isView = true)`. + */ +case class DropViewExec( + catalog: ViewCatalog, + ident: Identifier, + ifExists: Boolean, + invalidateCache: () => Unit) extends LeafV2CommandExec { + + override protected def run(): Seq[InternalRow] = { + val dropped = catalog.dropView(ident) + if (dropped) { + invalidateCache() + } else { + val nameParts = + (catalog.name() +: ident.namespace() :+ ident.name()).toImmutableArraySeq + catalog match { + case tc: TableCatalog if tc.tableExists(ident) => + throw QueryCompilationErrors.expectViewNotTableError( + nameParts, cmd = "DROP VIEW", suggestAlternative = false, t = this) + case _ if !ifExists => + throw new NoSuchViewException(ident) + case _ => + // IF EXISTS: no-op. + } + } + Seq.empty + } + + override def output: Seq[Attribute] = Seq.empty +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala new file mode 100644 index 0000000000000..00927f05842ad --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowViewsExec.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.util.StringUtils +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper +import org.apache.spark.sql.connector.catalog.ViewCatalog +import org.apache.spark.sql.execution.LeafExecNode + +/** + * Physical plan node for SHOW VIEWS on a v2 [[ViewCatalog]]. Enumerates view identifiers via + * [[ViewCatalog#listViews]]. v2 catalogs have no temp views, so the {@code isTemporary} column + * is always false -- mirroring v1 {@code ShowViewsCommand}, which sets {@code isTemporary=true} + * only for local/global temp views that live in the session catalog. + */ +case class ShowViewsExec( + output: Seq[Attribute], + catalog: ViewCatalog, + namespace: Seq[String], + pattern: Option[String]) extends V2CommandExec with LeafExecNode { + override protected def run(): Seq[InternalRow] = { + val rows = new ArrayBuffer[InternalRow]() + catalog.listViews(namespace.toArray).foreach { ident => + val nameMatches = + pattern.forall(p => StringUtils.filterPattern(Seq(ident.name), p).nonEmpty) + if (nameMatches) { + rows += toCatalystRow(ident.namespace().quoted, ident.name(), false) + } + } + rows.toSeq + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 9bd68cbe72a07..d8fe14a0664c1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.{ColumnarRule, CommandExecutionMode, Query import org.apache.spark.sql.execution.adaptive.AdaptiveRulesHolder import org.apache.spark.sql.execution.aggregate.{ResolveEncodersInScalaAgg, ScalaUDAF} import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin -import org.apache.spark.sql.execution.command.CommandCheck +import org.apache.spark.sql.execution.command.{CheckViewReferences, CommandCheck} import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.v2.{TableCapabilityCheck, V2SessionCatalog} import org.apache.spark.sql.execution.streaming.runtime.ResolveWriteToStream @@ -259,6 +259,7 @@ abstract class BaseSessionStateBuilder( HiveOnlyCheck +: TableCapabilityCheck +: CommandCheck +: + CheckViewReferences +: ViewSyncSchemaToMetaStore +: customCheckRules } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out index 4b9bb859cd567..3f16d4f756511 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out @@ -174,7 +174,7 @@ EXPLAIN FORMATTED CREATE VIEW explain_view AS SELECT key, val FROM explain_temp1 -- !query analysis -ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, FormattedMode +ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, false, FormattedMode -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out index 4b9bb859cd567..3f16d4f756511 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out @@ -174,7 +174,7 @@ EXPLAIN FORMATTED CREATE VIEW explain_view AS SELECT key, val FROM explain_temp1 -- !query analysis -ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, FormattedMode +ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, false, FormattedMode -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala new file mode 100644 index 0000000000000..8d3ad19419dff --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyTableSuite.scala @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, Table, TableCatalog, TableChange, TableInfo, TableSummary} +import org.apache.spark.sql.connector.expressions.LogicalExpressions +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +/** + * Tests for the data-source-table side of [[MetadataOnlyTable]]: a v2 catalog returns + * metadata-only tables and Spark reads / writes them via the V1 data-source path. + * View-related paths live in [[DataSourceV2MetadataOnlyViewSuite]]. + */ +class DataSourceV2MetadataOnlyTableSuite extends QueryTest with SharedSparkSession { + import testImplicits._ + + override def sparkConf: SparkConf = super.sparkConf + .set( + "spark.sql.catalog.table_catalog", + classOf[TestingDataSourceTableCatalog].getName) + + test("file source table") { + withTempPath { path => + val loc = path.getCanonicalPath + val tableName = s"table_catalog.`$loc`.test_json" + + spark.range(10).select($"id".cast("string").as("col")).write.json(loc) + checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i.toString))) + + sql(s"INSERT INTO $tableName SELECT 'abc'") + checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i.toString)) :+ Row("abc")) + + sql(s"INSERT OVERWRITE $tableName SELECT 'xyz'") + checkAnswer(spark.table(tableName), Row("xyz")) + } + } + + test("partitioned file source table") { + withTempPath { path => + val loc = path.getCanonicalPath + val tableName = s"table_catalog.`$loc`.test_partitioned_json" + + Seq(1 -> 1, 2 -> 1).toDF("c1", "c2").write.partitionBy("c2").json(loc) + checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1))) + + sql(s"INSERT INTO $tableName SELECT 1, 2") + checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1), Row(1, 2))) + + sql(s"INSERT INTO $tableName PARTITION(c2=3) SELECT 1") + checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1), Row(1, 2), Row(1, 3))) + + sql(s"INSERT OVERWRITE $tableName PARTITION(c2=2) SELECT 10") + checkAnswer(spark.table(tableName), Seq(Row(1, 1), Row(2, 1), Row(10, 2), Row(1, 3))) + + sql(s"INSERT OVERWRITE $tableName SELECT 20, 20") + checkAnswer(spark.table(tableName), Row(20, 20)) + } + } + + // TODO: move the v2 data source table handling from V2SessionCatalog to the analyzer + ignore("v2 data source table") { + val tableName = "table_catalog.default.test_v2" + checkAnswer(spark.table(tableName), 0.until(10).map(i => Row(i, -i))) + } + + test("DESCRIBE TABLE EXTENDED on a non-view MetadataOnlyTable shows the real identifier") { + // MetadataOnlyTable.name() is read by DescribeTableExec's "Name" row. Pin that it + // reflects the catalog-supplied identifier (here TestingDataSourceTableCatalog passes + // `ident.toString`) rather than a generic placeholder, so the DESCRIBE output is + // meaningful for users. + withTempPath { path => + val loc = path.getCanonicalPath + val tableName = s"table_catalog.`$loc`.test_json" + spark.range(1).select($"id".cast("string").as("col")).write.json(loc) + val nameRow = sql(s"DESCRIBE TABLE EXTENDED $tableName") + .collect() + .find(_.getString(0) == "Name") + .getOrElse(fail("DESCRIBE output missing the `Name` row")) + val rendered = nameRow.getString(1) + assert(rendered.contains("test_json"), s"expected the real identifier, got: $rendered") + } + } + + test("fully-qualified column reference uses the real catalog name") { + withTempPath { path => + val loc = path.getCanonicalPath + val tableName = s"table_catalog.`$loc`.test_json" + + spark.range(3).select($"id".cast("string").as("col")).write.json(loc) + + // 1-part and 2-part references resolve via last-part suffix matching. + checkAnswer( + sql(s"SELECT test_json.col FROM $tableName"), + Seq(Row("0"), Row("1"), Row("2"))) + checkAnswer( + sql(s"SELECT `$loc`.test_json.col FROM $tableName"), + Seq(Row("0"), Row("1"), Row("2"))) + + // 3-part reference uses the real catalog name. `V1Table.toCatalogTable` sets + // `CatalogTable.multipartIdentifier` to `[table_catalog, , test_json]`; the + // SessionCatalog change in this PR makes `getRelation` prefer that over the hardcoded + // `spark_catalog` qualifier, so the SubqueryAlias carries the real catalog and this + // 3-part column ref resolves. + checkAnswer( + sql(s"SELECT $tableName.col FROM $tableName"), + Seq(Row("0"), Row("1"), Row("2"))) + } + } +} + +/** + * A read-only [[TableCatalog]] that returns [[MetadataOnlyTable]] for a small set of canned + * table fixtures. Used to drive the data-source-table read path (file source + v2 provider) + * through Spark's V1 data-source machinery. + */ +class TestingDataSourceTableCatalog extends TableCatalog { + override def loadTable(ident: Identifier): Table = ident.name() match { + case "test_json" => + val info = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withProvider("json") + .withLocation(ident.namespace().head) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build() + new MetadataOnlyTable(info, ident.toString) + case "test_partitioned_json" => + val partitioning = LogicalExpressions.identity(LogicalExpressions.reference(Seq("c2"))) + val info = new TableInfo.Builder() + .withSchema(new StructType().add("c1", "int").add("c2", "int")) + .withProvider("json") + .withLocation(ident.namespace().head) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .withPartitions(Array(partitioning)) + .build() + new MetadataOnlyTable(info, ident.toString) + case "test_v2" => + val info = new TableInfo.Builder() + .withSchema(FakeV2Provider.schema) + .withProvider(classOf[FakeV2Provider].getName) + .build() + new MetadataOnlyTable(info, ident.toString) + case _ => throw new NoSuchTableException(ident) + } + + override def createTable(ident: Identifier, info: TableInfo): Table = + throw new RuntimeException("shouldn't be called") + override def alterTable(ident: Identifier, changes: TableChange*): Table = + throw new RuntimeException("shouldn't be called") + override def dropTable(ident: Identifier): Boolean = + throw new RuntimeException("shouldn't be called") + override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = + throw new RuntimeException("shouldn't be called") + override def listTables(namespace: Array[String]): Array[Identifier] = + throw new RuntimeException("shouldn't be called") + + private var catalogName = "" + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + catalogName = name + } + override def name(): String = catalogName +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala new file mode 100644 index 0000000000000..0851e6d2df765 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetadataOnlyViewSuite.scala @@ -0,0 +1,1120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, NoSuchViewException, TableAlreadyExistsException, ViewAlreadyExistsException} +import org.apache.spark.sql.connector.catalog.{Identifier, MetadataOnlyTable, RelationCatalog, Table, TableCatalog, TableChange, TableInfo, TableSummary, V1Table, ViewCatalog, ViewInfo} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +/** + * Tests for the view side of [[MetadataOnlyTable]]: view-text expansion on read, and + * CREATE VIEW / ALTER VIEW ... AS going through the v2 write path + * (`CreateV2ViewExec` / `AlterV2ViewExec`). View writes route through + * [[ViewCatalog#createView]] / [[ViewCatalog#replaceView]]. + * Data-source-table read paths live in + * [[org.apache.spark.sql.connector.DataSourceV2MetadataOnlyTableSuite]]. + * + * TODO: once the remaining v2 view DDL is implemented (SET/UNSET TBLPROPERTIES, SHOW CREATE + * VIEW, RENAME TO, SCHEMA BINDING, DESCRIBE / SHOW TBLPROPERTIES on v2 views), register a + * `MetadataOnlyTable`-backed `DelegatingCatalogExtension` as `spark.sql.catalog.spark_catalog` + * and run the shared [[org.apache.spark.sql.execution.PersistedViewTestSuite]] body against + * the v2 path for full parity with the v1 persisted-view coverage. + */ +class DataSourceV2MetadataOnlyViewSuite extends QueryTest with SharedSparkSession { + import testImplicits._ + + override def sparkConf: SparkConf = super.sparkConf + .set("spark.sql.catalog.view_catalog", classOf[TestingRelationCatalog].getName) + + // --- View read path ----------------------------------------------------- + + test("read view expands SQL text and applies captured SQL configs") { + withTable("spark_catalog.default.t") { + Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") + // view_catalog.ansi.test_view stores view.sqlConfig.spark.sql.ansi.enabled=true; + // view_catalog.non_ansi.test_view stores it =false. The view body does + // `col::int` which errors in ANSI mode and yields NULL in non-ANSI mode. + intercept[Exception](spark.table("view_catalog.ansi.test_view").collect()) + checkAnswer(spark.table("view_catalog.non_ansi.test_view"), Row("b", null)) + } + } + + test("read view resolves unqualified refs via captured current catalog/namespace") { + withTable("spark_catalog.default.t") { + Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") + // View text uses the unqualified name `t`; it resolves via the stored + // current catalog / namespace properties. + checkAnswer(spark.table("view_catalog.ns.test_unqualified_view"), Row("b")) + } + } + + test("read view resolves unqualified refs via multi-part captured namespace") { + // End-to-end coverage of the v2 encoder -> parser round-trip: test_unqualified_multi is a + // view whose captured catalog+namespace is view_catalog.ns1.ns2 (two-part namespace) and + // whose body references `t` unqualified. At read time the unqualified `t` must expand to + // view_catalog.ns1.ns2.t via the captured context -- which TestingRelationCatalog resolves to + // its own `t` fixture at that namespace. + checkAnswer( + spark.table("view_catalog.outer_ns.test_unqualified_multi"), + Row("multi")) + } + + // --- ViewInfo unit tests ----------------------------------------------- + + test("multi-part captured namespace round-trips through V1Table.toCatalogTable") { + // (a) ViewInfo.Builder stores (cat, Array(db1, db2)) as typed fields. + // (b) V1Table.toCatalogTable reads them directly and emits v1's numbered + // view.catalogAndNamespace.* keys so (c) the resulting CatalogTable's + // `viewCatalogAndNamespace` exposes the full (cat, db1, db2), which is what the v1 + // view-resolution path consumes to expand unqualified references in the view body. + val info = new ViewInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withQueryText("SELECT col FROM t") + .withCurrentCatalog("my_cat") + .withCurrentNamespace(Array("db1", "db2")) + .build() + val motTable = new MetadataOnlyTable(info, "v") + // Any CatalogPlugin works here; toCatalogTable only reads `catalog.name()`. + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + val ct = V1Table.toCatalogTable( + catalog, Identifier.of(Array("ns"), "v"), motTable) + assert(ct.viewCatalogAndNamespace == Seq("my_cat", "db1", "db2")) + + // Namespace parts containing dots flow through structurally (no string encoding). + val infoWeird = new ViewInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withQueryText("SELECT col FROM t") + .withCurrentCatalog("my_cat") + .withCurrentNamespace(Array("weird.db", "normal")) + .build() + val ctWeird = V1Table.toCatalogTable( + catalog, Identifier.of(Array("ns"), "v"), new MetadataOnlyTable(infoWeird, "v")) + assert(ctWeird.viewCatalogAndNamespace == Seq("my_cat", "weird.db", "normal")) + } + + test("view with no captured catalog omits viewCatalogAndNamespace") { + val info = new ViewInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withQueryText("SELECT * FROM spark_catalog.default.t") + .build() + val motTable = new MetadataOnlyTable(info, "v") + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + val ct = V1Table.toCatalogTable(catalog, Identifier.of(Array("ns"), "v"), motTable) + assert(ct.viewCatalogAndNamespace.isEmpty) + } + + // --- CREATE VIEW on a plain TableCatalog -------------------------------- + + test("CREATE VIEW on a v2 catalog") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.my_view AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer(spark.table("view_catalog.default.my_view"), Seq(Row(2), Row(3))) + } + } + + test("CREATE VIEW IF NOT EXISTS is a no-op when the view exists") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_ifne AS " + + "SELECT x FROM spark_catalog.default.t") + // Re-running with IF NOT EXISTS should not fail and should not change the view. + sql("CREATE VIEW IF NOT EXISTS view_catalog.default.v_ifne AS " + + "SELECT x + 100 AS x FROM spark_catalog.default.t") + checkAnswer(spark.table("view_catalog.default.v_ifne"), + Seq(Row(1), Row(2), Row(3))) + } + } + + test("CREATE VIEW without IF NOT EXISTS fails when the view exists") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_dup AS " + + "SELECT x FROM spark_catalog.default.t") + intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_dup AS " + + "SELECT x FROM spark_catalog.default.t") + } + } + } + + test("CREATE OR REPLACE VIEW replaces an existing view") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_replace AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 10") + checkAnswer(spark.table("view_catalog.default.v_replace"), Seq.empty[Row]) + sql("CREATE OR REPLACE VIEW view_catalog.default.v_replace AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer(spark.table("view_catalog.default.v_replace"), Seq(Row(2), Row(3))) + } + } + + test("CREATE VIEW on a catalog without ViewCatalog fails") { + withSQLConf( + "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { + val ex = intercept[AnalysisException] { + sql("CREATE VIEW no_view_catalog.default.v AS SELECT 1") + } + assert(ex.getCondition == "MISSING_CATALOG_ABILITY.VIEWS") + } + } + + test("CREATE VIEW rejects too-few / too-many user-specified columns") { + withTable("spark_catalog.default.t") { + Seq(1 -> 10).toDF("x", "y").write.saveAsTable("spark_catalog.default.t") + intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_few (a) AS " + + "SELECT x, y FROM spark_catalog.default.t") + } + intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_many (a, b, c) AS " + + "SELECT x, y FROM spark_catalog.default.t") + } + } + } + + test("CREATE VIEW rejects reference to a temporary function") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + spark.udf.register("temp_udf", (i: Int) => i + 1) + val ex = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_tempfn AS " + + "SELECT temp_udf(x) FROM spark_catalog.default.t") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("temporary")) + } + } + + test("CREATE VIEW rejects reference to a temporary view") { + withTempView("tv") { + spark.range(3).createOrReplaceTempView("tv") + val ex = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_tempview AS SELECT id FROM tv") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("temporary")) + } + } + + test("CREATE VIEW rejects reference to a temporary variable") { + withSessionVariable("temp_var") { + sql("DECLARE VARIABLE temp_var INT DEFAULT 1") + val ex = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_tempvar AS SELECT temp_var AS x") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("temporary")) + } + } + + test("CREATE VIEW propagates DEFAULT COLLATION to TableInfo") { + withTable("spark_catalog.default.t") { + Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_coll DEFAULT COLLATION UTF8_BINARY AS " + + "SELECT col FROM spark_catalog.default.t") + // TestingRelationCatalog stores the TableInfo verbatim, so the collation property is + // observable via the catalog-stored builder output. + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingRelationCatalog] + val info = catalog.getStoredView(Array("default"), "v_coll") + assert(info.properties().get(TableCatalog.PROP_COLLATION) == "UTF8_BINARY") + } + } + + test("CREATE OR REPLACE VIEW detects cyclic view references") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_cycle_a AS " + + "SELECT x FROM spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_cycle_b AS " + + "SELECT x FROM view_catalog.default.v_cycle_a") + val ex = intercept[AnalysisException] { + sql("CREATE OR REPLACE VIEW view_catalog.default.v_cycle_a AS " + + "SELECT x FROM view_catalog.default.v_cycle_b") + } + assert(ex.getCondition == "RECURSIVE_VIEW") + } + } + + test("CREATE VIEW over a non-view table entry is rejected (plain TableCatalog)") { + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingRelationCatalog] + val tableIdent = Identifier.of(Array("default"), "v_existing_table") + val tableInfo = new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build() + catalog.createTable(tableIdent, tableInfo) + try { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + + // CREATE OR REPLACE VIEW must not silently destroy a non-view table -- v1 parity. + val replaceEx = intercept[AnalysisException] { + sql("CREATE OR REPLACE VIEW view_catalog.default.v_existing_table AS " + + "SELECT x FROM spark_catalog.default.t") + } + assert(replaceEx.getCondition == "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE") + + // Plain CREATE VIEW over a table surfaces TABLE_OR_VIEW_ALREADY_EXISTS, matching v1. + val createEx = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.default.v_existing_table AS " + + "SELECT x FROM spark_catalog.default.t") + } + assert(createEx.getCondition == "TABLE_OR_VIEW_ALREADY_EXISTS") + + // CREATE VIEW IF NOT EXISTS is a no-op -- the table entry is untouched. + sql("CREATE VIEW IF NOT EXISTS view_catalog.default.v_existing_table AS " + + "SELECT x FROM spark_catalog.default.t") + val stored = catalog.getStoredInfo(Array("default"), "v_existing_table") + assert(!stored.isInstanceOf[ViewInfo]) + assert(stored.properties().get(TableCatalog.PROP_TABLE_TYPE) == + TableSummary.EXTERNAL_TABLE_TYPE) + } + } finally { + catalog.dropTable(tableIdent) + } + } + + // --- ALTER VIEW --------------------------------------------------------- + + test("ALTER VIEW ... AS updates the view body on a v2 catalog") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_alter AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 10") + checkAnswer(spark.table("view_catalog.default.v_alter"), Seq.empty[Row]) + + sql("ALTER VIEW view_catalog.default.v_alter AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer(spark.table("view_catalog.default.v_alter"), Seq(Row(2), Row(3))) + } + } + + test("ALTER VIEW on a missing view fails at analysis") { + // UnresolvedView resolves through lookupTableOrView and the missing view surfaces as an + // AnalysisException before we ever reach the v2 exec. The exact error condition (e.g. + // TABLE_OR_VIEW_NOT_FOUND) varies across Spark versions; we just assert we fail cleanly. + intercept[AnalysisException] { + sql("ALTER VIEW view_catalog.default.does_not_exist AS SELECT 1 AS x") + } + } + + test("ALTER VIEW rejects reference to a temporary function") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_alter_tempfn AS " + + "SELECT x FROM spark_catalog.default.t") + spark.udf.register("temp_udf_alter", (i: Int) => i + 1) + val ex = intercept[AnalysisException] { + sql("ALTER VIEW view_catalog.default.v_alter_tempfn AS " + + "SELECT temp_udf_alter(x) FROM spark_catalog.default.t") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("temporary")) + } + } + + test("ALTER VIEW rejects reference to a temporary view") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_alter_tempview AS " + + "SELECT x FROM spark_catalog.default.t") + withTempView("tv_alter") { + spark.range(3).createOrReplaceTempView("tv_alter") + val ex = intercept[AnalysisException] { + sql("ALTER VIEW view_catalog.default.v_alter_tempview AS SELECT id FROM tv_alter") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("temporary")) + } + } + } + + test("ALTER VIEW rejects reference to a temporary variable") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_alter_tempvar AS " + + "SELECT x FROM spark_catalog.default.t") + withSessionVariable("temp_var_alter") { + sql("DECLARE VARIABLE temp_var_alter INT DEFAULT 1") + val ex = intercept[AnalysisException] { + sql("ALTER VIEW view_catalog.default.v_alter_tempvar AS SELECT temp_var_alter AS x") + } + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("temporary")) + } + } + } + + test("ALTER VIEW preserves user-set TBLPROPERTIES") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_preserve " + + "TBLPROPERTIES ('mykey'='myvalue') AS " + + "SELECT x FROM spark_catalog.default.t") + sql("ALTER VIEW view_catalog.default.v_preserve AS " + + "SELECT x + 1 AS x FROM spark_catalog.default.t") + + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingRelationCatalog] + val info = catalog.getStoredView(Array("default"), "v_preserve") + assert(info.properties().get("mykey") == "myvalue") + } + } + + test("CREATE VIEW stamps PROP_OWNER on the stored TableInfo") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_owner_create AS " + + "SELECT x FROM spark_catalog.default.t") + + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingRelationCatalog] + val info = catalog.getStoredView(Array("default"), "v_owner_create") + // v2 CREATE VIEW stamps the current user into PROP_OWNER, matching v2 CREATE TABLE + // (via CatalogV2Util.withDefaultOwnership) and v1 CREATE VIEW (via CatalogTable.owner's + // default). Without this, the ALTER VIEW preservation test above would have nothing to + // carry forward on a v2-created view. + val owner = info.properties().get(TableCatalog.PROP_OWNER) + assert(owner != null && owner.nonEmpty, s"expected a non-empty owner, got: $owner") + } + } + + test("ALTER VIEW preserves PROP_OWNER (v1-parity)") { + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingRelationCatalog] + val viewIdent = Identifier.of(Array("default"), "v_owner") + // Pre-seed a view whose stored ViewInfo carries an explicit owner. + val initialInfo = new ViewInfo.Builder() + .withSchema(new StructType().add("x", "int")) + .withQueryText("SELECT 1 AS x") + .withOwner("alice") + .withCurrentCatalog("spark_catalog") + .withCurrentNamespace(Array("default")) + .build() + catalog.createView(viewIdent, initialInfo) + try { + withTable("spark_catalog.default.t") { + Seq(2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("ALTER VIEW view_catalog.default.v_owner AS " + + "SELECT x FROM spark_catalog.default.t") + // v1 ALTER VIEW AS carries `owner` forward via `viewMeta.copy(...)`. v2 must match: + // the stored TableInfo after the ALTER should still have the original owner. + val info = catalog.getStoredView(Array("default"), "v_owner") + assert(info.properties().get(TableCatalog.PROP_OWNER) == "alice") + } + } finally { + catalog.dropTable(viewIdent) + } + } + + test("ALTER VIEW preserves SCHEMA EVOLUTION binding mode") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_evo WITH SCHEMA EVOLUTION AS " + + "SELECT x FROM spark_catalog.default.t") + sql("ALTER VIEW view_catalog.default.v_evo AS " + + "SELECT x + 1 AS x FROM spark_catalog.default.t") + + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingRelationCatalog] + assert(catalog.getStoredView(Array("default"), "v_evo").schemaMode() == "EVOLUTION") + } + } + + test("ALTER VIEW re-captures the current session's SQL configs") { + withTable("spark_catalog.default.t") { + Seq("a", "b").toDF("col").write.saveAsTable("spark_catalog.default.t") + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + sql("CREATE VIEW view_catalog.default.v_configs AS " + + "SELECT col FROM spark_catalog.default.t") + } + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingRelationCatalog] + assert(catalog.getStoredView(Array("default"), "v_configs") + .sqlConfigs().get(SQLConf.ANSI_ENABLED.key) == "true") + + // ALTER under a different ANSI setting should replace the stored config, not merge. + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + sql("ALTER VIEW view_catalog.default.v_configs AS " + + "SELECT col FROM spark_catalog.default.t WHERE col = 'b'") + } + assert(catalog.getStoredView(Array("default"), "v_configs") + .sqlConfigs().get(SQLConf.ANSI_ENABLED.key) == "false") + } + } + + test("CREATE OR REPLACE VIEW whose new body references a nonexistent table fails at " + + "analysis") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_replace_missing AS " + + "SELECT x FROM spark_catalog.default.t") + val ex = intercept[AnalysisException] { + sql("CREATE OR REPLACE VIEW view_catalog.default.v_replace_missing AS " + + "SELECT * FROM spark_catalog.default.does_not_exist") + } + assert(ex.getCondition == "TABLE_OR_VIEW_NOT_FOUND") + } + } + + test("ALTER VIEW on a catalog without ViewCatalog fails with MISSING_CATALOG_ABILITY") { + // ALTER VIEW's identifier is resolved via `UnresolvedView`, whose `viewOnly=true` path + // in `Analyzer.lookupTableOrView` rejects non-ViewCatalog catalogs up front with the + // expected error class -- before `loadTable` is even called. `TestingTableOnlyCatalog` + // happens to round-trip `default.v` as a view-typed MetadataOnlyTable, but that fixture + // is not actually consulted on this path. CREATE VIEW's capability check lives in + // `CheckViewReferences`; ALTER VIEW's lives in the analyzer gate. Both yield + // `MISSING_CATALOG_ABILITY.VIEWS`. + withSQLConf( + "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { + val ex = intercept[AnalysisException] { + sql("ALTER VIEW no_view_catalog.default.v AS SELECT 1 AS x") + } + assert(ex.getCondition == "MISSING_CATALOG_ABILITY.VIEWS") + } + } + + // --- Pure ViewCatalog (no TableCatalog mixin) --------------------------- + + test("read view from a pure ViewCatalog (no TableCatalog mixin)") { + // The analyzer's table-side lookup must skip `loadTable` entirely for catalogs that don't + // implement `TableCatalog`; otherwise `asTableCatalog` would throw + // MISSING_CATALOG_ABILITY.TABLES and the legitimate `loadView` fallback would never run. + withSQLConf( + "spark.sql.catalog.view_only" -> classOf[TestingViewOnlyCatalog].getName) { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + // The fixture stores a `pure_v` view whose body filters spark_catalog.default.t. + checkAnswer(spark.table("view_only.default.pure_v"), Seq(Row(2), Row(3))) + } + } + } + + test("ALTER VIEW on a pure ViewCatalog (no TableCatalog mixin)") { + withSQLConf( + "spark.sql.catalog.view_only" -> classOf[TestingViewOnlyCatalog].getName) { + val catalog = spark.sessionState.catalogManager.catalog("view_only") + .asInstanceOf[TestingViewOnlyCatalog] + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("ALTER VIEW view_only.default.pure_v AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 2") + assert(catalog.loadView(Identifier.of(Array("default"), "pure_v")).queryText() == + "SELECT x FROM spark_catalog.default.t WHERE x > 2") + } + } + } + + test("cyclic detection distinguishes views across multi-level namespaces") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + + // Two views whose last namespace segment collides (`inner`) but whose full multi-part + // identifiers differ. Before the `fullIdent` change both collapsed to + // `TableIdentifier(v, Some("inner"), Some("view_catalog"))` and cyclic detection would + // false-positive on a legitimate cross-namespace REPLACE. + sql("CREATE VIEW view_catalog.ns1.inner.v AS SELECT x FROM spark_catalog.default.t") + sql("CREATE VIEW view_catalog.ns2.inner.v AS " + + "SELECT x FROM view_catalog.ns1.inner.v") + // Legitimate non-cyclic REPLACE -- new body references a different view that happens to + // share the last namespace segment. Must not false-positive. + sql("CREATE OR REPLACE VIEW view_catalog.ns1.inner.v AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer(spark.table("view_catalog.ns1.inner.v"), Seq(Row(2), Row(3))) + + // Real cycle across the two namespaces must still be caught. + val ex = intercept[AnalysisException] { + sql("CREATE OR REPLACE VIEW view_catalog.ns1.inner.v AS " + + "SELECT x FROM view_catalog.ns2.inner.v") + } + assert(ex.getCondition == "RECURSIVE_VIEW") + } + } + + test("view error messages render the full multi-level namespace") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.ns1.inner.v_err AS " + + "SELECT x FROM spark_catalog.default.t") + // Second CREATE surfaces `viewAlreadyExistsError` (via TableAlreadyExistsException from + // the catalog). Before the error signatures took `Seq[String]`, `legacyName` collapsed + // ns1.inner into just `inner` and the error said `view_catalog.inner.v_err` -- missing + // the outer `ns1` segment. + val dup = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.ns1.inner.v_err AS " + + "SELECT x FROM spark_catalog.default.t") + } + assert(dup.getCondition == "TABLE_OR_VIEW_ALREADY_EXISTS") + assert(dup.getMessage.contains("`view_catalog`.`ns1`.`inner`.`v_err`"), + s"expected full multi-part name in error, got: ${dup.getMessage}") + + // CREATE OR REPLACE VIEW over a non-view table entry surfaces + // `unsupportedCreateOrReplaceViewOnTableError`. Pre-seed a non-view entry at a + // multi-level-namespace identifier to exercise the rendering. + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingRelationCatalog] + val tblIdent = Identifier.of(Array("ns1", "inner"), "t_err") + catalog.createTable( + tblIdent, + new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build()) + try { + val notView = intercept[AnalysisException] { + sql("CREATE OR REPLACE VIEW view_catalog.ns1.inner.t_err AS " + + "SELECT x FROM spark_catalog.default.t") + } + assert(notView.getCondition == "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE") + assert(notView.getMessage.contains("`view_catalog`.`ns1`.`inner`.`t_err`"), + s"expected full multi-part name in error, got: ${notView.getMessage}") + } finally { + catalog.dropTable(tblIdent) + } + + // Column-arity mismatch error. + val arity = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.ns1.inner.v_arity (a, b) AS " + + "SELECT x FROM spark_catalog.default.t") + } + assert(arity.getMessage.contains("`view_catalog`.`ns1`.`inner`.`v_arity`"), + s"expected full multi-part name in error, got: ${arity.getMessage}") + } + } + + test("ALTER VIEW cyclic detection distinguishes views across multi-level namespaces") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + + sql("CREATE VIEW view_catalog.ns1.inner.v_alter AS " + + "SELECT x FROM spark_catalog.default.t") + sql("CREATE VIEW view_catalog.ns2.inner.v_alter AS " + + "SELECT x FROM view_catalog.ns1.inner.v_alter") + + // Legitimate non-cyclic ALTER -- new body does not reference the altered view. Before + // `fullIdent` this false-positived because the two views collapsed to the same + // TableIdentifier(v_alter, Some("inner"), Some("view_catalog")). + sql("ALTER VIEW view_catalog.ns1.inner.v_alter AS " + + "SELECT x FROM spark_catalog.default.t WHERE x > 1") + checkAnswer( + spark.table("view_catalog.ns1.inner.v_alter"), + Seq(Row(2), Row(3))) + + // Real cycle across the two namespaces must still be caught. + val ex = intercept[AnalysisException] { + sql("ALTER VIEW view_catalog.ns1.inner.v_alter AS " + + "SELECT x FROM view_catalog.ns2.inner.v_alter") + } + assert(ex.getCondition == "RECURSIVE_VIEW") + } + } + + test("temp-object reference errors render the full multi-level namespace") { + // `verifyTemporaryObjectsNotExists` / `verifyAutoGeneratedAliasesNotExists` used to take a + // `TableIdentifier` built via `asLegacyTableIdentifier`, which collapses multi-level + // namespaces to the last segment -- so a temp-function reference on + // `view_catalog.ns1.inner.v_tempfn` produced an error naming + // `view_catalog.inner.v_tempfn` and dropped the `ns1` middle segment. Post-migration the + // errors render the full multi-part name. + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + spark.udf.register("temp_udf_multi", (i: Int) => i + 1) + val ex = intercept[AnalysisException] { + sql("CREATE VIEW view_catalog.ns1.inner.v_tempfn AS " + + "SELECT temp_udf_multi(x) FROM spark_catalog.default.t") + } + assert(ex.getCondition == "INVALID_TEMP_OBJ_REFERENCE") + assert(ex.getMessage.contains("`view_catalog`.`ns1`.`inner`.`v_tempfn`"), + s"expected full multi-part name, got: ${ex.getMessage}") + } + } + + // --- Follow-up-blocked view DDL / inspection on a non-session v2 catalog ------------ + // These plans don't have a dedicated v2 strategy yet (tracked for a follow-up PR). We pin + // the current failure mode -- UNSUPPORTED_FEATURE.TABLE_OPERATION with a statement-specific + // operation string -- so a future generic "no plan found" regression would surface here + // rather than silently degrading the UX. + + private def seedV2View(name: String): Unit = { + sql(s"CREATE VIEW view_catalog.default.$name AS SELECT 1 AS x") + } + + private def assertUnsupportedViewOp(statement: String): Unit = { + val ex = intercept[AnalysisException](sql(statement)) + assert(ex.getCondition == "UNSUPPORTED_FEATURE.TABLE_OPERATION", s"got ${ex.getCondition}") + } + + test("ALTER VIEW ... SET TBLPROPERTIES on a v2 view is rejected") { + seedV2View("v_set_props") + assertUnsupportedViewOp( + "ALTER VIEW view_catalog.default.v_set_props SET TBLPROPERTIES ('k' = 'v')") + } + + test("ALTER VIEW ... UNSET TBLPROPERTIES on a v2 view is rejected") { + seedV2View("v_unset_props") + assertUnsupportedViewOp( + "ALTER VIEW view_catalog.default.v_unset_props UNSET TBLPROPERTIES ('k')") + } + + test("ALTER VIEW ... WITH SCHEMA on a v2 view is rejected") { + seedV2View("v_schema_binding") + assertUnsupportedViewOp( + "ALTER VIEW view_catalog.default.v_schema_binding WITH SCHEMA EVOLUTION") + } + + test("ALTER VIEW ... RENAME TO on a v2 view is rejected") { + seedV2View("v_rename") + assertUnsupportedViewOp( + "ALTER VIEW view_catalog.default.v_rename RENAME TO view_catalog.default.v_renamed") + } + + test("SHOW CREATE TABLE on a v2 view is rejected") { + seedV2View("v_show_create") + assertUnsupportedViewOp("SHOW CREATE TABLE view_catalog.default.v_show_create") + } + + test("SHOW TBLPROPERTIES on a v2 view is rejected") { + seedV2View("v_show_props") + assertUnsupportedViewOp("SHOW TBLPROPERTIES view_catalog.default.v_show_props") + } + + test("SHOW COLUMNS on a v2 view is rejected") { + seedV2View("v_show_cols") + assertUnsupportedViewOp("SHOW COLUMNS IN view_catalog.default.v_show_cols") + } + + test("DESCRIBE TABLE on a v2 view is rejected") { + seedV2View("v_describe") + assertUnsupportedViewOp("DESCRIBE TABLE view_catalog.default.v_describe") + } + + test("DESCRIBE TABLE ... COLUMN on a v2 view is rejected") { + seedV2View("v_describe_col") + // Column resolution against a v2 view's output isn't wired up yet, so the analyzer fails + // with UNRESOLVED_COLUMN before reaching the planner. That's still a clean + // AnalysisException (not a generic "no plan found"), which is the pin we care about. + intercept[AnalysisException]( + sql("DESCRIBE TABLE view_catalog.default.v_describe_col x")) + } + + // These plans reach `DataSourceV2Strategy` with a `ResolvedPersistentView` child on a + // non-session v2 view (because `ResolvedV1TableOrViewIdentifier` now skips non-session views). + // Without explicit pins they would hit `QueryPlanner`'s `assert(pruned.hasNext, "No plan for + // ...")` and surface a raw AssertionError. Pin each to UNSUPPORTED_FEATURE.TABLE_OPERATION. + + test("REFRESH TABLE on a v2 view is rejected") { + seedV2View("v_refresh") + assertUnsupportedViewOp("REFRESH TABLE view_catalog.default.v_refresh") + } + + test("ANALYZE TABLE on a v2 view is rejected") { + seedV2View("v_analyze") + assertUnsupportedViewOp( + "ANALYZE TABLE view_catalog.default.v_analyze COMPUTE STATISTICS") + } + + test("ANALYZE TABLE ... FOR COLUMNS on a v2 view is rejected") { + seedV2View("v_analyze_cols") + assertUnsupportedViewOp( + "ANALYZE TABLE view_catalog.default.v_analyze_cols COMPUTE STATISTICS FOR COLUMNS x") + } + + // --- DROP VIEW on a v2 catalog -------------------------------- + + test("DROP VIEW on a ViewCatalog drops the view") { + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingRelationCatalog] + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_drop AS " + + "SELECT x FROM spark_catalog.default.t") + assert(catalog.viewExists(Identifier.of(Array("default"), "v_drop"))) + sql("DROP VIEW view_catalog.default.v_drop") + assert(!catalog.viewExists(Identifier.of(Array("default"), "v_drop"))) + } + } + + test("DROP VIEW IF EXISTS on a v2 catalog is a no-op when the view is missing") { + // Exercises the `ifExists=true` path -- DropViewExec should not throw when the view + // doesn't exist on a ViewCatalog. + sql("DROP VIEW IF EXISTS view_catalog.default.v_never_existed") + } + + test("DROP VIEW on a non-view table entry is rejected (v1-parity)") { + // v1 `DropTableCommand(isView = true)` rejects a non-view target via + // `wrongCommandForObjectTypeError`. The v2 path must also refuse -- otherwise + // `DROP VIEW view_catalog.default.` would silently destroy the table's entry. + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingRelationCatalog] + val tableIdent = Identifier.of(Array("default"), "t_not_a_view") + catalog.createTable( + tableIdent, + new TableInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build()) + try { + val ex = intercept[AnalysisException] { + sql("DROP VIEW view_catalog.default.t_not_a_view") + } + assert(ex.getCondition == "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE") + // The table entry must still be there -- DROP VIEW did not destroy it. + assert(catalog.tableExists(tableIdent)) + } finally { + catalog.dropTable(tableIdent) + } + } + + test("DROP VIEW on a catalog without ViewCatalog is rejected") { + withSQLConf( + "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { + val ex = intercept[AnalysisException] { + sql("DROP VIEW no_view_catalog.default.v") + } + // Preserves the pre-PR error surface for non-ViewCatalog catalogs. + assert(ex.getMessage.toLowerCase(java.util.Locale.ROOT).contains("views")) + } + } + + // --- SHOW TABLES / SHOW VIEWS on a v2 catalog -------------------------------- + + private def seedV2Table(name: String): Unit = { + val catalog = spark.sessionState.catalogManager.catalog("view_catalog") + .asInstanceOf[TestingRelationCatalog] + catalog.createTable( + Identifier.of(Array("default"), name), + new TableInfo.Builder() + .withSchema(new StructType().add("x", "int")) + .withTableType(TableSummary.EXTERNAL_TABLE_TYPE) + .build()) + } + + test("SHOW TABLES on a v2 catalog returns only tables") { + // Per the new `TableCatalog.listTables` contract, SHOW TABLES returns table identifiers + // only -- views (in mixed catalogs) are listed via SHOW VIEWS / `ViewCatalog.listViews`. + // This is an intentional divergence from v1 SHOW TABLES (which includes both tables and + // views in a single listing); v2 catalogs separate the two so callers can target either + // kind without filtering. + seedV2View("v_in_show_tables") + seedV2Table("t_in_show_tables") + val rows = sql("SHOW TABLES IN view_catalog.default").collect() + val names = rows.map(_.getString(1)).toSet + assert(names.contains("t_in_show_tables"), s"table missing from SHOW TABLES: $names") + assert(!names.contains("v_in_show_tables"), s"view leaked into SHOW TABLES: $names") + rows.foreach(r => assert(!r.getBoolean(2), s"isTemporary must be false: $r")) + } + + test("SHOW VIEWS on a v2 catalog returns only views") { + seedV2View("v_in_show_views") + seedV2Table("t_not_in_show_views") + val rows = sql("SHOW VIEWS IN view_catalog.default").collect() + val names = rows.map(_.getString(1)).toSet + assert(names.contains("v_in_show_views"), s"view missing: $names") + assert(!names.contains("t_not_in_show_views"), + s"non-view leaked into SHOW VIEWS: $names") + rows.foreach(r => assert(!r.getBoolean(2), s"isTemporary must be false for v2: $r")) + } + + test("SHOW VIEWS with LIKE pattern filters on the view name") { + seedV2View("v_foo") + seedV2View("v_bar") + val rows = sql("SHOW VIEWS IN view_catalog.default LIKE 'v_foo'").collect() + val names = rows.map(_.getString(1)).toSet + assert(names == Set("v_foo"), s"expected only v_foo, got $names") + } + + test("SHOW VIEWS on a catalog without ViewCatalog is rejected") { + withSQLConf( + "spark.sql.catalog.no_view_catalog" -> classOf[TestingTableOnlyCatalog].getName) { + val ex = intercept[AnalysisException] { + sql("SHOW VIEWS IN no_view_catalog.default") + } + assert(ex.getCondition == "MISSING_CATALOG_ABILITY.VIEWS") + } + } + + test("ALTER VIEW detects cyclic view references") { + withTable("spark_catalog.default.t") { + Seq(1, 2, 3).toDF("x").write.saveAsTable("spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_alter_cycle_a AS " + + "SELECT x FROM spark_catalog.default.t") + sql("CREATE VIEW view_catalog.default.v_alter_cycle_b AS " + + "SELECT x FROM view_catalog.default.v_alter_cycle_a") + val ex = intercept[AnalysisException] { + sql("ALTER VIEW view_catalog.default.v_alter_cycle_a AS " + + "SELECT x FROM view_catalog.default.v_alter_cycle_b") + } + assert(ex.getCondition == "RECURSIVE_VIEW") + } + } +} + +/** + * A [[RelationCatalog]]: round-trips [[MetadataOnlyTable]] for created views and tables and + * exposes a few canned read-only view fixtures (`test_view`, `test_unqualified_view`, + * `test_unqualified_multi`, plus an unqualified-target view at `ns1.ns2.t`) used by the + * view-read tests. Entries created via `createTable` / `createView` are distinguished by the + * stored value's runtime type (ViewInfo vs TableInfo). The single-RPC perf entry point + * [[loadRelation]] returns either kind; [[loadTable]] is tables-only per the + * [[TableCatalog#loadTable]] contract. + */ +class TestingRelationCatalog extends RelationCatalog { + + // Holds entries (views and tables) created via createTable / createView within the session. + // Keyed by (namespace, name); the stored value's runtime type (ViewInfo vs TableInfo) + // distinguishes views from tables. Mixed-catalog: shared identifier namespace per the + // RelationCatalog contract. + private val createdViews = + new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), TableInfo]() + + // Canned read-only view fixtures, exposed only via the perf path (loadRelation). loadView + // does not need to expose them because the resolver routes RelationCatalog reads through + // loadRelation. + private def fixtureView(ident: Identifier): Option[ViewInfo] = ident.name() match { + case "test_view" => + Some(new ViewInfo.Builder() + .withSchema(new StructType().add("col", "string").add("i", "int")) + .withQueryText( + "SELECT col, col::int AS i FROM spark_catalog.default.t WHERE col = 'b'") + .withSqlConfigs(java.util.Collections.singletonMap( + SQLConf.ANSI_ENABLED.key, (ident.namespace().head == "ansi").toString)) + .build()) + case "test_unqualified_view" => + Some(new ViewInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withQueryText("SELECT col FROM t WHERE col = 'b'") + .withCurrentCatalog("spark_catalog") + .withCurrentNamespace(Array("default")) + .build()) + case "test_unqualified_multi" => + // View whose captured catalog+namespace is view_catalog.ns1.ns2 (two-part). The + // unqualified `t` in the body must resolve via that captured context to + // view_catalog.ns1.ns2.t, which this catalog also serves (see `t` case below). + Some(new ViewInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withQueryText("SELECT col FROM t") + .withCurrentCatalog("view_catalog") + .withCurrentNamespace(Array("ns1", "ns2")) + .build()) + case "t" if ident.namespace().toSeq == Seq("ns1", "ns2") => + // Target of test_unqualified_multi's unqualified reference. Self-contained view so + // the test doesn't need external data. + Some(new ViewInfo.Builder() + .withSchema(new StructType().add("col", "string")) + .withQueryText("SELECT 'multi' AS col") + .build()) + case _ => None + } + + override def loadRelation(ident: Identifier): Table = { + // Single-RPC perf path: returns tables AND views (as MetadataOnlyTable). Stored entries + // win over fixture views (the fixture namespace is read-only and disjoint from + // createdViews in practice). loadTable, loadView, tableExists, viewExists all derive + // from this via the RelationCatalog default impls. + val key = (ident.namespace().toSeq, ident.name()) + Option(createdViews.get(key)) + .orElse(fixtureView(ident)) + .map(new MetadataOnlyTable(_, ident.toString)) + .getOrElse(throw new NoSuchTableException(ident)) + } + + override def createTable(ident: Identifier, info: TableInfo): Table = { + // Mixed-catalog contract: createTable rejects when a view sits at ident with + // TableAlreadyExistsException. The shared `createdViews` keyspace makes `putIfAbsent` + // throw uniformly for both table-at-ident and view-at-ident collisions. + val key = (ident.namespace().toSeq, ident.name()) + if (createdViews.putIfAbsent(key, info) != null) { + throw new TableAlreadyExistsException(ident) + } + new MetadataOnlyTable(info, ident.toString) + } + + /** Test-only accessor: returns the stored TableInfo (table or view) for the identifier. */ + def getStoredInfo(namespace: Array[String], name: String): TableInfo = { + Option(createdViews.get((namespace.toSeq, name))).getOrElse { + throw new NoSuchTableException(Identifier.of(namespace, name)) + } + } + + /** Test-only accessor: returns the stored ViewInfo; fails if the entry is not a view. */ + def getStoredView(namespace: Array[String], name: String): ViewInfo = getStoredInfo( + namespace, name) match { + case v: ViewInfo => v + case _ => throw new IllegalStateException( + s"stored entry at ${namespace.mkString(".")}.$name is not a view") + } + + override def alterTable(ident: Identifier, changes: TableChange*): Table = { + throw new RuntimeException("shouldn't be called") + } + override def dropTable(ident: Identifier): Boolean = { + val key = (ident.namespace().toSeq, ident.name()) + val existing = createdViews.get(key) + if (existing == null || existing.isInstanceOf[ViewInfo]) return false + createdViews.remove(key) != null + } + override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = { + throw new RuntimeException("shouldn't be called") + } + override def listTables(namespace: Array[String]): Array[Identifier] = { + // Tables only -- views are listed via ViewCatalog.listViews per the new contract. + val targetNs = namespace.toSeq + val ids = new java.util.ArrayList[Identifier]() + createdViews.forEach { (key, info) => + if (key._1 == targetNs && !info.isInstanceOf[ViewInfo]) { + ids.add(Identifier.of(key._1.toArray, key._2)) + } + } + ids.toArray(new Array[Identifier](0)) + } + + // ViewCatalog methods. Storage is shared with TableCatalog (mixed-catalog pattern). + + override def listViews(namespace: Array[String]): Array[Identifier] = { + val targetNs = namespace.toSeq + val ids = new java.util.ArrayList[Identifier]() + createdViews.forEach { (key, info) => + if (key._1 == targetNs && info.isInstanceOf[ViewInfo]) { + ids.add(Identifier.of(key._1.toArray, key._2)) + } + } + ids.toArray(new Array[Identifier](0)) + } + + override def createView(ident: Identifier, info: ViewInfo): ViewInfo = { + val key = (ident.namespace().toSeq, ident.name()) + if (createdViews.putIfAbsent(key, info) != null) { + throw new ViewAlreadyExistsException(ident) + } + info + } + + override def replaceView(ident: Identifier, info: ViewInfo): ViewInfo = { + val key = (ident.namespace().toSeq, ident.name()) + val existing = createdViews.get(key) + if (existing == null || !existing.isInstanceOf[ViewInfo]) { + throw new NoSuchViewException(ident) + } + createdViews.put(key, info) + info + } + + override def dropView(ident: Identifier): Boolean = { + val key = (ident.namespace().toSeq, ident.name()) + val existing = createdViews.get(key) + if (existing == null || !existing.isInstanceOf[ViewInfo]) return false + createdViews.remove(key) != null + } + + private var catalogName = "" + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + catalogName = name + } + override def name(): String = catalogName +} + +/** + * A v2 catalog that does not implement ViewCatalog. Used by capability-gate tests: the gate + * fires in `Analyzer.lookupTableOrView(viewOnly=true)` for ALTER VIEW and in + * [[CheckViewReferences]] for CREATE VIEW -- in both cases before `loadTable` is called -- + * so this catalog's content is intentionally empty. + */ +class TestingTableOnlyCatalog extends TableCatalog { + override def loadTable(ident: Identifier): Table = throw new NoSuchTableException(ident) + + override def alterTable(ident: Identifier, changes: TableChange*): Table = + throw new RuntimeException("shouldn't be called") + override def dropTable(ident: Identifier): Boolean = false + override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = + throw new RuntimeException("shouldn't be called") + override def listTables(namespace: Array[String]): Array[Identifier] = Array.empty + private var catalogName = "" + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + catalogName = name + } + override def name(): String = catalogName +} + +/** + * A pure [[ViewCatalog]] (no [[TableCatalog]] mixin). Used to exercise that the analyzer's + * resolution paths skip the `loadTable` step and fall through to `loadView` for catalogs that + * cannot host tables. Pre-seeds a single mutable view at `default.pure_v` so the read and + * ALTER VIEW tests can both reach it. + */ +class TestingViewOnlyCatalog extends ViewCatalog { + private val store = + new java.util.concurrent.ConcurrentHashMap[(Seq[String], String), ViewInfo]() + + // Seeded on first `initialize`. Filters `spark_catalog.default.t` so the read test can + // assert deterministic output. ALTER VIEW tests overwrite it via `replaceView`. + private def seedDefault(): Unit = { + val key = (Seq("default"), "pure_v") + if (!store.containsKey(key)) { + val info = new ViewInfo.Builder() + .withSchema(new StructType().add("x", "int")) + .withQueryText("SELECT x FROM spark_catalog.default.t WHERE x > 1") + .build() + store.put(key, info) + } + } + + override def listViews(namespace: Array[String]): Array[Identifier] = { + val target = namespace.toSeq + val ids = new java.util.ArrayList[Identifier]() + store.forEach { (key, _) => + if (key._1 == target) ids.add(Identifier.of(key._1.toArray, key._2)) + } + ids.toArray(new Array[Identifier](0)) + } + + override def loadView(ident: Identifier): ViewInfo = { + val key = (ident.namespace().toSeq, ident.name()) + Option(store.get(key)).getOrElse(throw new NoSuchViewException(ident)) + } + + override def createView(ident: Identifier, info: ViewInfo): ViewInfo = { + val key = (ident.namespace().toSeq, ident.name()) + if (store.putIfAbsent(key, info) != null) { + throw new ViewAlreadyExistsException(ident) + } + info + } + + override def replaceView(ident: Identifier, info: ViewInfo): ViewInfo = { + val key = (ident.namespace().toSeq, ident.name()) + if (!store.containsKey(key)) throw new NoSuchViewException(ident) + store.put(key, info) + info + } + + override def dropView(ident: Identifier): Boolean = { + val key = (ident.namespace().toSeq, ident.name()) + store.remove(key) != null + } + + private var catalogName = "" + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + catalogName = name + seedDefault() + } + override def name(): String = catalogName +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index d2cc342f48112..d1dc9c282829f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2966,13 +2966,13 @@ class DataSourceV2SQLSuiteV1Filter } } - test("View commands are not supported in v2 catalogs") { + test("View commands are not supported in v2 catalogs that don't implement ViewCatalog") { def validateViewCommand(sqlStatement: String): Unit = { val e = analysisException(sqlStatement) checkError( e, - condition = "UNSUPPORTED_FEATURE.CATALOG_OPERATION", - parameters = Map("catalogName" -> "`testcat`", "operation" -> "views")) + condition = "MISSING_CATALOG_ABILITY.VIEWS", + parameters = Map("plugin" -> "testcat")) } validateViewCommand("DROP VIEW testcat.v") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 89fb6eca223ee..b564cad0fe9c8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -778,8 +778,8 @@ class PlanResolutionSuite extends SharedSparkSession with AnalysisTest { } checkError( e, - condition = "UNSUPPORTED_FEATURE.CATALOG_OPERATION", - parameters = Map("catalogName" -> "`testcat`", "operation" -> "views")) + condition = "MISSING_CATALOG_ABILITY.VIEWS", + parameters = Map("plugin" -> "testcat")) } // ALTER VIEW view_name SET TBLPROPERTIES ('comment' = new_comment); diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala index ffc2c6c679a8b..0e5cbb861d05d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala @@ -41,6 +41,13 @@ class DropTableSuite extends command.DropTableSuiteBase with CommandSuiteBase { } } + test("DROP TABLE IF EXISTS ... PURGE on a missing table is a no-op") { + // The default TableCatalog.purgeTable throws unconditionally, so without an upfront + // existence guard `IF EXISTS` would surface UNSUPPORTED_FEATURE.PURGE_TABLE for missing + // tables -- defeating the IF EXISTS contract on catalogs that do not support purge. + sql(s"DROP TABLE IF EXISTS $catalog.ns.never_existed PURGE") + } + test("table qualified with the session catalog name") { withSQLConf( V2_SESSION_CATALOG_IMPLEMENTATION.key -> classOf[InMemoryTableSessionCatalog].getName) {