From 07c340023aaa69699e80eb4cd4f3e8633eaa0f2b Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 13 May 2026 12:40:09 -0700 Subject: [PATCH 1/3] [SPARK-56853] Improve PATH Tests --- python/pyspark/sql/tests/test_catalog.py | 41 +++ .../catalyst/catalog/SqlPathFormatSuite.scala | 98 +++++ .../catalog/CatalogManagerSuite.scala | 35 ++ .../connector/catalog/PathElementSuite.scala | 100 ++++++ .../sql/connect/SqlPathE2ETestSuite.scala | 97 +++++ .../analyzer-results/sql-path.sql.out | 286 +++++++++++++++ .../resources/sql-tests/inputs/sql-path.sql | 80 +++++ .../sql-tests/results/sql-path.sql.out | 340 ++++++++++++++++++ .../org/apache/spark/sql/SetPathSuite.scala | 229 +++++++++++- .../sql/connector/SqlPathV2CatalogSuite.scala | 143 ++++++++ .../spark/sql/execution/SQLViewSuite.scala | 93 +++++ .../v1/AlterViewSchemaBindingSuite.scala | 48 ++- 12 files changed, 1584 insertions(+), 6 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SqlPathFormatSuite.scala create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/PathElementSuite.scala create mode 100644 sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SqlPathE2ETestSuite.scala create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/inputs/sql-path.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/sql-path.sql.out create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py index 92ffea233215f..9e151a2b4ae9b 100644 --- a/python/pyspark/sql/tests/test_catalog.py +++ b/python/pyspark/sql/tests/test_catalog.py @@ -588,6 +588,47 @@ def test_catalog_analyze_table(self): spark.sql(f"INSERT INTO {t} VALUES (1)") spark.catalog.analyzeTable(t, noScan=True) + def test_path_current_path_disabled(self): + # SPARK-56853: current_path() is a regular builtin and resolves even when + # spark.sql.path.enabled is false. The DataFrame and SQL surfaces must agree. + from pyspark.sql.functions import current_path + + spark = self.spark + with self.sql_conf({"spark.sql.path.enabled": False}): + sql_form = spark.sql("SELECT current_path()").collect()[0][0] + self.assertIsInstance(sql_form, str) + self.assertNotEqual(sql_form, "") + api_form = spark.range(1).select(current_path()).collect()[0][0] + self.assertEqual(sql_form, api_form) + + def test_path_set_path_and_current_path(self): + # SPARK-56853: SET PATH is parsed and applied; current_path() reflects it + # over both the SQL and DataFrame surfaces. Restores DEFAULT_PATH on exit. + from pyspark.sql.functions import current_path + + spark = self.spark + with self.sql_conf({"spark.sql.path.enabled": True}): + try: + spark.sql("SET PATH = spark_catalog.default, system.builtin") + sql_form = spark.sql("SELECT current_path()").collect()[0][0] + self.assertEqual(sql_form, "spark_catalog.default,system.builtin") + api_form = spark.range(1).select(current_path()).collect()[0][0] + self.assertEqual(sql_form, api_form) + finally: + spark.sql("SET PATH = DEFAULT_PATH") + + def test_path_set_path_rejected_when_disabled(self): + # SPARK-56853: SET PATH must raise UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED + # when the feature flag is off (covers both classic and Connect error paths). + spark = self.spark + with self.sql_conf({"spark.sql.path.enabled": False}): + with self.assertRaises(AnalysisException) as ctx: + spark.sql("SET PATH = spark_catalog.default") + self.assertEqual( + ctx.exception.getCondition(), + "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED", + ) + class CatalogTests(CatalogTestsMixin, ReusedSQLTestCase): pass diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SqlPathFormatSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SqlPathFormatSuite.scala new file mode 100644 index 0000000000000..0ed3bcfb19639 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SqlPathFormatSuite.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.catalog + +import org.json4s.JsonAST.{JArray, JObject, JString} +import org.json4s.jackson.JsonMethods.{compact, render} + +import org.apache.spark.SparkFunSuite + +/** + * Unit tests for [[SqlPathFormat]] -- the helper that converts the raw JSON-array-of-arrays + * path stored on view / SQL function metadata into the JSON-object form used by DESCRIBE + * AS JSON and the human-readable form used by DESCRIBE EXTENDED. + */ +class SqlPathFormatSuite extends SparkFunSuite { + + private def compactJson(v: JArray): String = compact(render(v)) + + test("toDescribeJson: maps each [catalog, ns...] entry to a JSON object") { + val stored = + """[["spark_catalog","default"],["system","builtin"]]""" + val result = SqlPathFormat.toDescribeJson(stored) + .getOrElse(fail(s"Expected a JSON value, got None for: $stored")) + val expected = JArray(List( + JObject("catalog_name" -> JString("spark_catalog"), + "namespace" -> JArray(List(JString("default")))), + JObject("catalog_name" -> JString("system"), + "namespace" -> JArray(List(JString("builtin")))))) + assert(compactJson(result.asInstanceOf[JArray]) == compactJson(expected)) + } + + test("toDescribeJson: multi-level namespace becomes [head, tail...]") { + val stored = """[["cat1","db","sub"]]""" + val result = SqlPathFormat.toDescribeJson(stored) + .getOrElse(fail("Expected a JSON value")) + val expected = JArray(List( + JObject("catalog_name" -> JString("cat1"), + "namespace" -> JArray(List(JString("db"), JString("sub")))))) + assert(compactJson(result.asInstanceOf[JArray]) == compactJson(expected)) + } + + test("toDescribeJson: empty array returns None") { + assert(SqlPathFormat.toDescribeJson("[]").isEmpty) + } + + test("toDescribeJson: malformed payloads return None") { + Seq( + "", + "not_json", + "{}", + """{"foo":1}""", + """[1, 2, 3]""" + ).foreach { payload => + assert(SqlPathFormat.toDescribeJson(payload).isEmpty, s"payload=$payload") + } + } + + test("formatForDisplay: renders plain identifiers without backticks") { + val json = SqlPathFormat.toDescribeJson( + """[["spark_catalog","default"],["system","builtin"]]""") + .getOrElse(fail("Expected a JSON value")) + val rendered = SqlPathFormat.formatForDisplay(json) + .getOrElse(fail("Expected a display string")) + assert(rendered == "spark_catalog.default, system.builtin") + } + + test("formatForDisplay: backticks identifiers that need quoting") { + val json = SqlPathFormat.toDescribeJson( + """[["spark_catalog","weird.schema"]]""") + .getOrElse(fail("Expected a JSON value")) + val rendered = SqlPathFormat.formatForDisplay(json) + .getOrElse(fail("Expected a display string")) + assert(rendered == "spark_catalog.`weird.schema`") + } + + test("formatForDisplay: round-trips multi-level namespaces") { + val json = SqlPathFormat.toDescribeJson("""[["cat","db","ns"]]""") + .getOrElse(fail("Expected a JSON value")) + val rendered = SqlPathFormat.formatForDisplay(json) + .getOrElse(fail("Expected a display string")) + assert(rendered == "cat.db.ns") + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala index acf86aae1eea3..6fb208e586839 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala @@ -150,6 +150,41 @@ class CatalogManagerSuite extends SparkFunSuite with SQLHelper { assert(CatalogManager.deserializePathEntries(payload).isEmpty, s"payload=$payload") } } + + test("serializePathEntries round-trips through deserialize for typical inputs") { + val cases = Seq( + Seq(Seq("spark_catalog", "default"), Seq("system", "builtin")), + Seq(Seq("system", "session")), + Seq.empty[Seq[String]]) + cases.foreach { entries => + val payload = CatalogManager.serializePathEntries(entries) + val parsed = CatalogManager.deserializePathEntries(payload) + .getOrElse(fail(s"Expected payload to round-trip: $payload")) + assert(parsed === entries, s"Round-trip mismatch for $entries; got $parsed") + } + } + + test("serializePathEntries round-trips multi-level and quoted identifiers") { + val entries = Seq( + Seq("cat", "ns1", "ns2"), + Seq("spark_catalog", "sch.with.dots"), + Seq("spark_catalog", "schema with spaces")) + val payload = CatalogManager.serializePathEntries(entries) + val parsed = CatalogManager.deserializePathEntries(payload) + .getOrElse(fail(s"Expected payload to round-trip: $payload")) + assert(parsed === entries) + } + + test("deserializePathEntriesOrFail raises a clear AnalysisException for bad payloads") { + val e = intercept[org.apache.spark.sql.AnalysisException] { + CatalogManager.deserializePathEntriesOrFail( + storedPathStr = "{bad-json", + objectType = "view", + objectName = "default.v_broken") + } + assert(e.getMessage.contains("Invalid stored SQL path metadata for view")) + assert(e.getMessage.contains("default.v_broken")) + } } class DummyCatalog extends CatalogPlugin { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/PathElementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/PathElementSuite.scala new file mode 100644 index 0000000000000..5c267151cc9bf --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/PathElementSuite.scala @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.catalog + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.connector.catalog.CatalogManager.{ + CurrentSchemaEntry, LiteralPathEntry +} + +/** + * Direct unit tests for [[PathElement.validateNoStaticDuplicates]]. The end-to-end + * `SetPathSuite` exercises this via SQL, but the duplicate-detection rules + * (literal-vs-literal, current_schema-vs-current_schema, case-sensitivity) are pure + * data and benefit from focused tests close to the implementation. + */ +class PathElementSuite extends SparkFunSuite { + + private def literal(parts: String*): LiteralPathEntry = LiteralPathEntry(parts.toSeq) + + test("validateNoStaticDuplicates: no duplicates returns the input unchanged") { + val entries = Seq( + literal("spark_catalog", "default"), + literal("system", "builtin"), + CurrentSchemaEntry) + assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) === entries) + } + + test("validateNoStaticDuplicates: duplicate literal under case-insensitive collation") { + val entries = Seq( + literal("spark_catalog", "default"), + literal("Spark_Catalog", "DEFAULT")) + val e = intercept[AnalysisException] { + PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) + } + assert(e.getCondition == "DUPLICATE_SQL_PATH_ENTRY") + assert(e.getMessageParameters.get("pathEntry") == "Spark_Catalog.DEFAULT") + } + + test("validateNoStaticDuplicates: case-sensitive mode keeps differently cased entries") { + val entries = Seq( + literal("spark_catalog", "DEFAULT"), + literal("spark_catalog", "default")) + assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = true) === entries) + } + + test("validateNoStaticDuplicates: repeated CurrentSchemaEntry is rejected") { + val entries = Seq(CurrentSchemaEntry, CurrentSchemaEntry) + val e = intercept[AnalysisException] { + PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) + } + assert(e.getCondition == "DUPLICATE_SQL_PATH_ENTRY") + assert(e.getMessageParameters.get("pathEntry") == "current_schema") + } + + test("validateNoStaticDuplicates: literal-vs-CurrentSchemaEntry collision is tolerated") { + // The CurrentSchemaEntry marker resolves dynamically against USE SCHEMA, so a literal + // that happens to match the live current schema is intentionally not flagged here. + val entries = Seq( + literal("spark_catalog", "default"), + CurrentSchemaEntry, + literal("system", "builtin")) + assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) === entries) + } + + test("validateNoStaticDuplicates: identifier containing a dot is quoted in the error") { + val entries = Seq( + literal("spark_catalog", "weird.schema"), + literal("spark_catalog", "weird.schema")) + val e = intercept[AnalysisException] { + PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) + } + assert(e.getMessageParameters.get("pathEntry") == "spark_catalog.`weird.schema`") + } + + test("validateNoStaticDuplicates: multi-level namespace duplicate is flagged") { + val entries = Seq( + literal("cat", "db", "ns"), + literal("cat", "db", "ns")) + val e = intercept[AnalysisException] { + PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) + } + assert(e.getMessageParameters.get("pathEntry") == "cat.db.ns") + } +} diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SqlPathE2ETestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SqlPathE2ETestSuite.scala new file mode 100644 index 0000000000000..88ed1f31c86ae --- /dev/null +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SqlPathE2ETestSuite.scala @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connect + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.connect.test.{ConnectFunSuite, RemoteSparkSession, SQLHelper} +import org.apache.spark.sql.functions.current_path + +/** + * End-to-end coverage for the SQL Standard PATH feature over Spark Connect. + * + * SET PATH and the frozen-path semantics for persisted views / SQL functions are implemented + * entirely server-side, but the analyzer state (`AnalysisContext`) that carries the pinned path + * must survive plan reification across the gRPC boundary. These tests run the public surface over + * a real Connect client so regressions there are caught: + * - `SET PATH = ...` is parsed and applied to the session, + * - `current_path()` (SQL and the DataFrame builtin) reflects it, + * - a persisted view created under one path resolves its body under the frozen path even when + * the invoker switches the session path. + */ +class SqlPathE2ETestSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelper { + + test("SET PATH and current_path() round-trip over Connect") { + withSQLConf("spark.sql.path.enabled" -> "true") { + try { + spark.sql("SET PATH = spark_catalog.default, system.builtin") + val sqlPath = spark.sql("SELECT current_path()").head().getString(0) + assert( + sqlPath == "spark_catalog.default,system.builtin", + s"current_path() over Connect should reflect SET PATH; got: $sqlPath") + + // DataFrame builtin should agree with the SQL form. + val apiPath = spark.range(1).select(current_path()).head().getString(0) + assert( + apiPath == sqlPath, + s"functions.current_path() should match SQL current_path(); got: $apiPath vs $sqlPath") + } finally { + spark.sql("SET PATH = DEFAULT_PATH") + } + } + } + + test("Persisted view body uses frozen path over Connect") { + withSQLConf("spark.sql.path.enabled" -> "true") { + withDatabase("connect_path_a", "connect_path_b") { + spark.sql("CREATE DATABASE connect_path_a") + spark.sql("CREATE DATABASE connect_path_b") + spark.sql("CREATE TABLE connect_path_a.frozen_t USING parquet AS SELECT 1 AS id") + spark.sql("CREATE TABLE connect_path_b.frozen_t USING parquet AS SELECT 2 AS id") + withView("default.v_path_connect") { + try { + // Create the view under PATH=a. + spark.sql("SET PATH = spark_catalog.connect_path_a, system.builtin") + spark.sql("CREATE VIEW default.v_path_connect AS SELECT id FROM frozen_t") + + // Switch the session path to b; bare `frozen_t` now resolves through b, + // but the view's frozen path keeps it pinned to a. + spark.sql("SET PATH = spark_catalog.connect_path_b, system.builtin") + val bare = spark.sql("SELECT id FROM frozen_t").head().getInt(0) + assert(bare == 2, s"Bare `frozen_t` should follow live PATH=b; got: $bare") + val viaView = spark.sql("SELECT id FROM default.v_path_connect").head().getInt(0) + assert( + viaView == 1, + s"View body should resolve via the frozen creation-time PATH; got: $viaView") + } finally { + spark.sql("SET PATH = DEFAULT_PATH") + } + } + } + } + } + + test("SET PATH is rejected over Connect when feature is disabled") { + withSQLConf("spark.sql.path.enabled" -> "false") { + val ex = intercept[AnalysisException] { + spark.sql("SET PATH = spark_catalog.default") + } + assert( + ex.getCondition == "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED", + s"Expected SET_PATH_WHEN_DISABLED, got: ${ex.getCondition}") + } + } +} diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out new file mode 100644 index 0000000000000..4ab4d14965518 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out @@ -0,0 +1,286 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = SYSTEM_PATH +-- !query analysis +SetPathCommand [SystemPath] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SET PATH = PATH, system.session +-- !query analysis +SetPathCommand [PathRef, SchemaInPath(List(system, session))] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = current_schema, system.builtin +-- !query analysis +SetPathCommand [CurrentSchema, SchemaInPath(List(system, builtin))] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = current_database, system.builtin +-- !query analysis +SetPathCommand [CurrentSchema, SchemaInPath(List(system, builtin))] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT CURRENT_PATH = current_path() AS same +-- !query analysis +Project [(current_path() = current_path()) AS same#x] ++- OneRowRelation + + +-- !query +CREATE SCHEMA sql_path_routines +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines] + + +-- !query +CREATE FUNCTION sql_path_routines.pick() RETURNS INT RETURN 7 +-- !query analysis +CreateSQLFunctionCommand spark_catalog.sql_path_routines.pick, INT, 7, false, false, false, false + + +-- !query +SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_routines)), SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT pick() +-- !query analysis +Project [spark_catalog.sql_path_routines.pick() AS spark_catalog.sql_path_routines.pick()#x] ++- Project + +- OneRowRelation + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +DROP FUNCTION sql_path_routines.pick +-- !query analysis +DropFunctionCommand spark_catalog.sql_path_routines.pick, false, false + + +-- !query +DROP SCHEMA sql_path_routines +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines] + + +-- !query +CREATE SCHEMA sql_path_relations +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations] + + +-- !query +CREATE TABLE sql_path_relations.tbl USING parquet AS SELECT 42 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_relations`.`tbl`, ErrorIfExists, [id] + +- Project [42 AS id#x] + +- OneRowRelation + + +-- !query +SET PATH = spark_catalog.sql_path_relations, spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_relations)), SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT id FROM tbl +-- !query analysis +Project [id#x] ++- SubqueryAlias spark_catalog.sql_path_relations.tbl + +- Relation spark_catalog.sql_path_relations.tbl[id#x] parquet + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +DROP TABLE sql_path_relations.tbl +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_relations.tbl + + +-- !query +DROP SCHEMA sql_path_relations +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations] + + +-- !query +SET PATH = spark_catalog.default, spark_catalog.default +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "spark_catalog.default" + } +} + + +-- !query +SET PATH = DEFAULT_PATH, system.builtin +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "system.builtin" + } +} + + +-- !query +SET PATH = my_schema_no_catalog +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_PATH_SCHEMA_REFERENCE", + "sqlState" : "42601", + "messageParameters" : { + "qualifiedName" : "my_schema_no_catalog" + } +} + + +-- !query +SELECT current_path(1) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "1", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "0", + "functionName" : "`current_path`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 22, + "fragment" : "current_path(1)" + } ] +} + + +-- !query +SET spark.sql.path.enabled=false +-- !query analysis +SetCommand (spark.sql.path.enabled,Some(false)) + + +-- !query +SELECT current_path() IS NOT NULL AS has_path +-- !query analysis +Project [isnotnull(current_path()) AS has_path#x] ++- OneRowRelation + + +-- !query +SET PATH = spark_catalog.default +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED", + "sqlState" : "0A000", + "messageParameters" : { + "config" : "spark.sql.path.enabled" + } +} diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql new file mode 100644 index 0000000000000..4e6ab68286b5f --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql @@ -0,0 +1,80 @@ +-- SPARK-56853: SQL Standard PATH golden file coverage. +-- Covers the SET PATH grammar, CURRENT_PATH() reflection, path-driven +-- routine/relation resolution, and the most common static error conditions. + +--SET spark.sql.path.enabled=true + +-- Default path (no SET PATH issued, no DEFAULT_PATH conf): the spark-builtin +-- default ordering with current_schema in the catalog slot. +SELECT current_path(); + +-- A literal SET PATH that pins both a user schema and system.builtin. +SET PATH = spark_catalog.default, system.builtin; +SELECT current_path(); + +-- DEFAULT_PATH restores the spark-builtin default ordering for the session. +SET PATH = DEFAULT_PATH; +SELECT current_path(); + +-- SYSTEM_PATH expands to the two system entries in the default order. +SET PATH = SYSTEM_PATH; +SELECT current_path(); + +-- The PATH keyword reuses the live path; a new entry can be appended. +SET PATH = spark_catalog.default, system.builtin; +SET PATH = PATH, system.session; +SELECT current_path(); + +-- current_schema / current_database expand to the live USE SCHEMA. +SET PATH = current_schema, system.builtin; +SELECT current_path(); +SET PATH = current_database, system.builtin; +SELECT current_path(); + +-- ANSI keyword form (no parens) returns the same string as current_path(). +SET PATH = spark_catalog.default, system.builtin; +SELECT CURRENT_PATH = current_path() AS same; + +-- Routine resolution follows the path. +CREATE SCHEMA sql_path_routines; +CREATE FUNCTION sql_path_routines.pick() RETURNS INT RETURN 7; +SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin; +SELECT pick(); +SET PATH = DEFAULT_PATH; +DROP FUNCTION sql_path_routines.pick; +DROP SCHEMA sql_path_routines; + +-- Relation resolution follows the path (first-match wins). +CREATE SCHEMA sql_path_relations; +CREATE TABLE sql_path_relations.tbl USING parquet AS SELECT 42 AS id; +SET PATH = spark_catalog.sql_path_relations, spark_catalog.default, system.builtin; +SELECT id FROM tbl; +SET PATH = DEFAULT_PATH; +DROP TABLE sql_path_relations.tbl; +DROP SCHEMA sql_path_relations; + +-- Static error cases --------------------------------------------------------- + +-- Static duplicate literal at SET PATH time. +SET PATH = spark_catalog.default, spark_catalog.default; + +-- DEFAULT_PATH already contains system.builtin; listing it again is a duplicate. +SET PATH = DEFAULT_PATH, system.builtin; + +-- Single-part name in SET PATH is not a valid qualified schema reference. +SET PATH = my_schema_no_catalog; + +-- current_path() takes no arguments. +SELECT current_path(1); + +-- PATH disabled -------------------------------------------------------------- +-- Flip the feature flag inline so the disabled behavior is exercised in the +-- same golden run. + +SET spark.sql.path.enabled=false; + +-- current_path() is still resolvable (it is a regular builtin). +SELECT current_path() IS NOT NULL AS has_path; + +-- SET PATH itself is rejected with UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED. +SET PATH = spark_catalog.default; diff --git a/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out new file mode 100644 index 0000000000000..7904272b6c6c1 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out @@ -0,0 +1,340 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +system.builtin,system.session,spark_catalog.default + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +spark_catalog.default,system.builtin + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +system.builtin,system.session,spark_catalog.default + + +-- !query +SET PATH = SYSTEM_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +system.builtin,system.session + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = PATH, system.session +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +spark_catalog.default,system.builtin,system.session + + +-- !query +SET PATH = current_schema, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +spark_catalog.default,system.builtin + + +-- !query +SET PATH = current_database, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +spark_catalog.default,system.builtin + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT CURRENT_PATH = current_path() AS same +-- !query schema +struct +-- !query output +true + + +-- !query +CREATE SCHEMA sql_path_routines +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION sql_path_routines.pick() RETURNS INT RETURN 7 +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT pick() +-- !query schema +struct +-- !query output +7 + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION sql_path_routines.pick +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_routines +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_relations +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_relations.tbl USING parquet AS SELECT 42 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_relations, spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT id FROM tbl +-- !query schema +struct +-- !query output +42 + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_relations.tbl +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_relations +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.default, spark_catalog.default +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "spark_catalog.default" + } +} + + +-- !query +SET PATH = DEFAULT_PATH, system.builtin +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "system.builtin" + } +} + + +-- !query +SET PATH = my_schema_no_catalog +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_PATH_SCHEMA_REFERENCE", + "sqlState" : "42601", + "messageParameters" : { + "qualifiedName" : "my_schema_no_catalog" + } +} + + +-- !query +SELECT current_path(1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "1", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "0", + "functionName" : "`current_path`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 22, + "fragment" : "current_path(1)" + } ] +} + + +-- !query +SET spark.sql.path.enabled=false +-- !query schema +struct +-- !query output +spark.sql.path.enabled false + + +-- !query +SELECT current_path() IS NOT NULL AS has_path +-- !query schema +struct +-- !query output +true + + +-- !query +SET PATH = spark_catalog.default +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED", + "sqlState" : "0A000", + "messageParameters" : { + "config" : "spark.sql.path.enabled" + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala index 18b9f6b6f3b7e..af6976d797e5a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala @@ -503,11 +503,110 @@ class SetPathSuite extends SharedSparkSession { } } - // TODO: cloneSession() constructs a new CatalogManager per forked session and - // explicitly copies only the stored session path via copySessionPathFrom. - // Other CatalogManager state propagation (current catalog/namespace, registered - // catalogs) on clone is currently incidental -- audit and pin down the intended - // semantics in a follow-up. + // --- cloneSession() propagation matrix (SPARK-56853) ---------------------- + // The cloned session is built via `BaseSessionStateBuilder` from a parent + // `SessionState`. Per-component hand-offs on clone: + // - `SessionCatalog.copyStateTo` copies `currentDb` and `tempViews`, + // - `CatalogManager.copySessionPathFrom` copies the stored `_sessionPath`, + // - `functionRegistry.clone()` and `tableFunctionRegistry.clone()` copy + // temporary functions. + // What is NOT propagated: + // - the temp variable registry (new `TempVariableManager` per session), + // - the `CatalogManager` current-catalog / current-namespace (re-read from + // conf defaults in the child), + // - the registered v2 `catalogs` map (lazy-loaded per session). + // The tests below pin this observed behavior so any future change has to + // update the assertions. + + test("cloneSession: stored SET PATH propagates to the child session") { + withPathEnabled { + sql("SET PATH = spark_catalog.default, system.builtin") + try { + val child = spark.cloneSession() + val entries = pathEntries( + child.sql("SELECT current_path()").collect().head.getString(0)) + assert(entries === Seq("spark_catalog.default", "system.builtin"), + s"Cloned session should inherit stored SET PATH; got: $entries") + } finally { + sql("SET PATH = DEFAULT_PATH") + } + } + } + + test("cloneSession: USE SCHEMA on the parent propagates to the child") { + sql("CREATE SCHEMA IF NOT EXISTS path_clone_use") + try { + sql("USE spark_catalog.path_clone_use") + val child = spark.cloneSession() + val childDb = child.sql("SELECT current_database()").head().getString(0) + assert(childDb == "path_clone_use", + s"Cloned session should inherit the parent's current schema; got: $childDb") + } finally { + sql("USE spark_catalog.default") + sql("DROP SCHEMA IF EXISTS path_clone_use") + } + } + + test("cloneSession: temp views on the parent propagate to the child") { + sql("CREATE TEMPORARY VIEW path_clone_view AS SELECT 1 AS c") + try { + val child = spark.cloneSession() + checkAnswer(child.sql("SELECT c FROM path_clone_view"), Row(1)) + } finally { + sql("DROP VIEW IF EXISTS path_clone_view") + } + } + + test("cloneSession: temp functions on the parent propagate to the child (cloned " + + "functionRegistry)") { + sql("CREATE TEMPORARY FUNCTION path_clone_fn() RETURNS INT RETURN 42") + try { + val child = spark.cloneSession() + checkAnswer(child.sql("SELECT path_clone_fn()"), Row(42)) + // Snapshot semantics: dropping in the parent must not affect the already-cloned child. + sql("DROP TEMPORARY FUNCTION path_clone_fn") + checkAnswer(child.sql("SELECT path_clone_fn()"), Row(42)) + } finally { + sql("DROP TEMPORARY FUNCTION IF EXISTS path_clone_fn") + } + } + + test("cloneSession: temp variables on the parent are NOT propagated to the child") { + sql("DECLARE OR REPLACE VARIABLE path_clone_var INT DEFAULT 7") + try { + val child = spark.cloneSession() + val e = intercept[AnalysisException] { + child.sql("SELECT path_clone_var").collect() + } + // Either UNRESOLVED_VARIABLE or UNRESOLVED_COLUMN; both confirm the variable + // did not survive the clone. + assert( + e.getCondition == "UNRESOLVED_VARIABLE" || + e.getCondition.startsWith("UNRESOLVED_COLUMN"), + s"Temp variables should NOT propagate to the clone; got: ${e.getCondition}") + } finally { + sql("DROP TEMPORARY VARIABLE IF EXISTS path_clone_var") + } + } + + test("cloneSession: child SET PATH does not leak back to the parent") { + withPathEnabled { + sql("SET PATH = spark_catalog.default, system.builtin") + try { + val child = spark.cloneSession() + child.sql("SET PATH = system.session, system.builtin") + val parentEntries = pathEntries(currentPath()) + assert(parentEntries === Seq("spark_catalog.default", "system.builtin"), + s"Child SET PATH must not affect the parent; parent got: $parentEntries") + val childEntries = pathEntries( + child.sql("SELECT current_path()").collect().head.getString(0)) + assert(childEntries === Seq("system.session", "system.builtin"), + s"Child SET PATH should be visible only in the child; child got: $childEntries") + } finally { + sql("SET PATH = DEFAULT_PATH") + } + } + } // --- Resolution tests: verify SET PATH affects actual table/function lookup --- @@ -786,6 +885,126 @@ class SetPathSuite extends SharedSparkSession { } } + test("path-driven COUNT(*) rewrite gate: temp count shadowing builtin under SET PATH " + + "(session-first) suppresses the * -> 1 rewrite") { + // SPARK-56853: `Analyzer.matchesFunctionName` consults + // `FunctionResolution.isSessionBeforeBuiltinInPath` to decide whether COUNT(*) is the + // builtin (eligible for the COUNT(*) -> COUNT(1) shortcut) or a user-defined override. + // Default `sessionFunctionResolutionOrder` is "second", so creating a temp count while + // the default PATH is in effect passes the security check. Once SET PATH puts + // `system.session` before `system.builtin`, the rewrite must be suppressed and the + // star expansion must reach the temp `count`. + withPathEnabled { + sql("CREATE TEMPORARY FUNCTION count(x INT) RETURNS INT RETURN x + 100") + try { + // PATH still has builtin first: count(*) rewrites to count(1), which resolves to + // the builtin count and returns the row count of the input (1). + checkAnswer(sql("SELECT count(*) FROM VALUES (1) AS t(a)"), Row(1)) + + // Put session before builtin via SET PATH. The rewrite gate now reports + // `isSessionBeforeBuiltinInPath = true` AND a temp count exists, so the + // analyzer must NOT collapse `count(*)` to `count(1)`. The `*` then expands + // against the table's single column to `count(a)`, which resolves through + // the temp under the live path: 1 + 100 = 101. + sql("SET PATH = system.session, system.builtin") + checkAnswer(sql("SELECT count(*) FROM VALUES (1) AS t(a)"), Row(101)) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TEMPORARY FUNCTION IF EXISTS count") + } + } + } + + test("path-driven COUNT(*) rewrite gate: rewrite still applies for unrelated builtins") { + // SPARK-56853: the gate fires ONLY when a temp function with the same unqualified + // name as the builtin exists. A temp with a different name must not affect the + // COUNT(*) -> COUNT(1) shortcut even when session is searched before builtin. + withPathEnabled { + sql("CREATE TEMPORARY FUNCTION my_helper(x INT) RETURNS INT RETURN x + 1") + try { + sql("SET PATH = system.session, system.builtin") + // No temp `count` exists; the rewrite still fires and the builtin row counter + // returns the row count of the input (3). + checkAnswer(sql("SELECT count(*) FROM VALUES (1), (2), (3) AS t(a)"), Row(3)) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TEMPORARY FUNCTION IF EXISTS my_helper") + } + } + } + + test("PATH enabled: concurrent SET PATH and unqualified lookups do not deadlock") { + // SPARK-56853: SessionCatalog.lookupBuiltinOrTempFunction is intentionally NOT + // synchronized on SessionCatalog because the path-driven kinds provider acquires + // CatalogManager.synchronized, and another thread holding that lock can call back + // into SessionCatalog (e.g. via setCurrentNamespace). This test hammers both sides + // concurrently: one thread flips SET PATH while another performs unqualified + // function lookups that go through the kinds provider. Within the budget we should + // observe no deadlock and no spurious analysis failures. + withPathEnabled { + val budget = 200 + val iterations = new java.util.concurrent.atomic.AtomicInteger(0) + val barrier = new java.util.concurrent.CyclicBarrier(2) + val errors = new java.util.concurrent.ConcurrentLinkedQueue[Throwable]() + + val setterThread = new Thread(() => { + try { + barrier.await() + var i = 0 + while (i < budget && errors.isEmpty) { + if ((i % 2) == 0) { + sql("SET PATH = spark_catalog.default, system.builtin") + } else { + sql("SET PATH = system.builtin, system.session, spark_catalog.default") + } + i += 1 + } + } catch { + case t: Throwable => errors.add(t) + } + }, "SetPathSuite-setter") + + val lookupThread = new Thread(() => { + try { + barrier.await() + var i = 0 + while (i < budget && errors.isEmpty) { + // Forces unqualified function resolution against the live PATH and triggers + // the session-kinds provider on the catalog-manager side. + val n = sql("SELECT count(*) FROM VALUES (1), (2), (3) AS t(a)") + .head().getLong(0) + assert(n == 3L, s"unexpected count: $n at iteration $i") + iterations.incrementAndGet() + i += 1 + } + } catch { + case t: Throwable => errors.add(t) + } + }, "SetPathSuite-lookup") + + setterThread.start() + lookupThread.start() + + // Generous join: 30s is plenty for 200 cheap queries on either side and gives a + // clear failure signal if the implementation regresses into a deadlock. + val joinMillis = 30000L + setterThread.join(joinMillis) + lookupThread.join(joinMillis) + + assert(!setterThread.isAlive, + "SET PATH thread did not finish; potential deadlock between SessionCatalog and " + + "CatalogManager synchronized blocks.") + assert(!lookupThread.isAlive, + "Lookup thread did not finish; potential deadlock between SessionCatalog and " + + "CatalogManager synchronized blocks.") + assert(errors.isEmpty, + s"Concurrent lookups raised unexpected errors: ${errors.toArray.mkString("; ")}") + assert(iterations.get() > 0, + "Lookup thread never completed a query; suspect contention or deadlock.") + sql("SET PATH = DEFAULT_PATH") + } + } + test("DEFAULT_PATH conf: duplicate entries are tolerated (first-match resolution)") { // Lookup uses first-match resolution, so redundant entries on DEFAULT_PATH are dead code // rather than an error. (Contrast with SET PATH, which still rejects static duplicates as diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala new file mode 100644 index 0000000000000..8533de55019a1 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import java.util.Collections + +import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryCatalog, SupportsNamespaces} +import org.apache.spark.sql.connector.catalog.functions.UnboundFunction +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession + +/** + * SPARK-56853: end-to-end coverage of [[SQLConf.PATH_ENABLED]] resolution through + * non-session V2 catalogs. + * + * Other path tests live in `SetPathSuite` (session catalog) and `ProcedureSuite` + * (procedures via CALL). This suite specifically exercises: + * - unqualified table resolution across two V2 catalogs in SET PATH, + * - first-match ordering when both catalogs hold the same name, + * - unqualified V2 function resolution across two V2 catalogs in SET PATH, + * - the negative case where the unqualified name only lives in a catalog + * that is NOT on the path. + */ +class SqlPathV2CatalogSuite extends SharedSparkSession { + + private val emptyProps: java.util.Map[String, String] = Collections.emptyMap() + + override def beforeAll(): Unit = { + super.beforeAll() + spark.conf.set("spark.sql.catalog.pathcat", classOf[InMemoryCatalog].getName) + spark.conf.set("spark.sql.catalog.pathcat2", classOf[InMemoryCatalog].getName) + } + + override def afterAll(): Unit = { + try { + spark.sessionState.catalogManager.reset() + spark.sessionState.conf.unsetConf("spark.sql.catalog.pathcat") + spark.sessionState.conf.unsetConf("spark.sql.catalog.pathcat2") + } finally { + super.afterAll() + } + } + + private def v2Catalog(name: String): InMemoryCatalog = + spark.sessionState.catalogManager.catalog(name).asInstanceOf[InMemoryCatalog] + + private def createV2Namespace(catalog: String, ns: String): Unit = { + v2Catalog(catalog).asInstanceOf[SupportsNamespaces] + .createNamespace(Array(ns), emptyProps) + } + + private def addV2Function( + catalog: String, + ns: String, + name: String, + fn: UnboundFunction): Unit = { + v2Catalog(catalog).createFunction(Identifier.of(Array(ns), name), fn) + } + + test("V2 catalogs on SET PATH: unqualified table follows first match") { + withSQLConf(SQLConf.PATH_ENABLED.key -> "true") { + // pathcat and pathcat2 each have a namespace `ns` and a table `path_v2_t` with + // different contents, so we can tell which catalog supplied the row. + createV2Namespace("pathcat", "ns") + createV2Namespace("pathcat2", "ns") + sql("CREATE TABLE pathcat.ns.path_v2_t (id INT) USING foo") + sql("INSERT INTO pathcat.ns.path_v2_t VALUES (10)") + sql("CREATE TABLE pathcat2.ns.path_v2_t (id INT) USING foo") + sql("INSERT INTO pathcat2.ns.path_v2_t VALUES (20)") + + try { + sql("SET PATH = pathcat.ns, pathcat2.ns, system.builtin") + checkAnswer(sql("SELECT id FROM path_v2_t"), Row(10)) + + sql("SET PATH = pathcat2.ns, pathcat.ns, system.builtin") + checkAnswer(sql("SELECT id FROM path_v2_t"), Row(20)) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TABLE IF EXISTS pathcat.ns.path_v2_t") + sql("DROP TABLE IF EXISTS pathcat2.ns.path_v2_t") + } + } + } + + test("V2 catalogs on SET PATH: unqualified table only in a non-path catalog is not found") { + withSQLConf(SQLConf.PATH_ENABLED.key -> "true") { + createV2Namespace("pathcat", "ns_only_here") + sql("CREATE TABLE pathcat.ns_only_here.hidden_t (id INT) USING foo") + try { + // Path does not include pathcat.ns_only_here; bare `hidden_t` must not resolve. + sql("SET PATH = pathcat2.ns, system.builtin") + val e = intercept[AnalysisException] { + sql("SELECT id FROM hidden_t").collect() + } + assert(e.getCondition == "TABLE_OR_VIEW_NOT_FOUND" || + e.getMessage.contains("TABLE_OR_VIEW_NOT_FOUND"), + s"Expected TABLE_OR_VIEW_NOT_FOUND; got: ${e.getCondition}: ${e.getMessage}") + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TABLE IF EXISTS pathcat.ns_only_here.hidden_t") + } + } + } + + test("V2 catalogs on SET PATH: unqualified function follows first match") { + withSQLConf(SQLConf.PATH_ENABLED.key -> "true") { + // Two V2 catalogs each register a `strlen` function; resolution must follow path order. + createV2Namespace("pathcat", "fns") + createV2Namespace("pathcat2", "fns") + addV2Function("pathcat", "fns", "strlen", StrLen(StrLenDefault)) + addV2Function("pathcat2", "fns", "strlen", StrLen(StrLenMagic)) + try { + sql("SET PATH = pathcat.fns, pathcat2.fns, system.builtin") + // Both backing impls return the same numeric length, so a correct result here + // also implies neither catalog raised "not found" -- the path drove resolution. + checkAnswer(sql("SELECT strlen('abc')"), Row(3)) + + sql("SET PATH = pathcat2.fns, pathcat.fns, system.builtin") + checkAnswer(sql("SELECT strlen('hello')"), Row(5)) + } finally { + sql("SET PATH = DEFAULT_PATH") + v2Catalog("pathcat").clearFunctions() + v2Catalog("pathcat2").clearFunctions() + } + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index f6ace55849d26..bf1a91e344a7c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -1453,6 +1453,99 @@ abstract class SQLViewSuite extends QueryTest { } } + test("SPARK-56853: stored view path is ignored when PATH is disabled at read time") { + // A view created with PATH enabled persists its frozen resolution path in metadata. + // If the reader's session has `spark.sql.path.enabled=false`, the pinned entries are + // intentionally dropped (`CatalogManager.resolutionPathEntriesForAnalysis`); the view + // body falls back to its catalog/namespace. Verify both directions: + // - fully-qualified bodies keep working (qualification doesn't depend on PATH), + // - unqualified bodies that relied on the frozen path now resolve through the + // view's home schema (and fail when the unqualified name isn't there). + withDatabase("compat_view_a", "compat_view_b") { + sql("CREATE DATABASE compat_view_a") + sql("CREATE DATABASE compat_view_b") + withTable( + "compat_view_a.compat_t", + "compat_view_b.compat_t") { + sql("CREATE TABLE compat_view_a.compat_t USING parquet AS SELECT 1 AS id") + sql("CREATE TABLE compat_view_b.compat_t USING parquet AS SELECT 2 AS id") + withView( + "compat_view_b.v_unq_path", + "compat_view_b.v_fq_path") { + // Create both views with USE compat_view_b in effect so the stored + // viewCatalogAndNamespace points at compat_view_b, then SET PATH=a so the + // frozen path pins compat_view_a. + withSQLConf(PATH_ENABLED.key -> "true") { + try { + sql("USE spark_catalog.compat_view_b") + sql("SET PATH = spark_catalog.compat_view_a, system.builtin") + sql( + """ + |CREATE VIEW compat_view_b.v_unq_path AS + |SELECT id FROM compat_t + |""".stripMargin) + sql( + """ + |CREATE VIEW compat_view_b.v_fq_path AS + |SELECT id FROM spark_catalog.compat_view_a.compat_t + |""".stripMargin) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("USE spark_catalog.default") + } + } + + // Now read with PATH disabled. The fully-qualified view body is independent of + // PATH and must keep returning rows from compat_view_a. The unqualified-body view + // drops its frozen-path pin and falls back to viewCatalogAndNamespace + // (compat_view_b), so unqualified `compat_t` resolves to compat_view_b.compat_t. + withSQLConf(PATH_ENABLED.key -> "false") { + checkAnswer(sql("SELECT id FROM compat_view_b.v_fq_path"), Row(1)) + checkAnswer(sql("SELECT id FROM compat_view_b.v_unq_path"), Row(2)) + } + } + } + } + } + + test("SPARK-56853: stored view path with no fallback target fails clearly when PATH is off") { + // Same setup as above but the view's home schema does NOT contain the unqualified + // name; under PATH disabled the analyzer cannot fall back anywhere, so the lookup + // must raise TABLE_OR_VIEW_NOT_FOUND against the view's catalog/namespace. + withDatabase("compat_home_only", "compat_referenced") { + sql("CREATE DATABASE compat_home_only") + sql("CREATE DATABASE compat_referenced") + withTable("compat_referenced.only_here") { + sql("CREATE TABLE compat_referenced.only_here USING parquet AS SELECT 7 AS id") + withView("compat_home_only.v_unq_home") { + withSQLConf(PATH_ENABLED.key -> "true") { + try { + sql("USE spark_catalog.compat_home_only") + sql("SET PATH = spark_catalog.compat_referenced, system.builtin") + sql( + """ + |CREATE VIEW compat_home_only.v_unq_home AS + |SELECT id FROM only_here + |""".stripMargin) + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("USE spark_catalog.default") + } + } + + withSQLConf(PATH_ENABLED.key -> "false") { + val e = intercept[AnalysisException] { + sql("SELECT id FROM compat_home_only.v_unq_home").collect() + } + assert(e.getCondition == "TABLE_OR_VIEW_NOT_FOUND" || + e.getMessage.contains("TABLE_OR_VIEW_NOT_FOUND"), + s"Expected TABLE_OR_VIEW_NOT_FOUND; got: ${e.getCondition}: ${e.getMessage}") + } + } + } + } + } + // Regression guard: frozen resolution path must not leak into CURRENT_SCHEMA/CURRENT_PATH. test("SPARK-56639: current_schema/current_path in persisted view use invoker context") { withSQLConf(PATH_ENABLED.key -> "true") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala index 39e6e708403aa..a78b0842e87b7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala @@ -17,7 +17,53 @@ package org.apache.spark.sql.execution.command.v1 +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.command +import org.apache.spark.sql.internal.SQLConf class AlterViewSchemaBindingSuite - extends command.AlterViewSchemaBindingSuiteBase with ViewCommandSuiteBase + extends command.AlterViewSchemaBindingSuiteBase with ViewCommandSuiteBase { + + test("SPARK-56853: ALTER VIEW ... WITH SCHEMA preserves the frozen SQL path") { + // `generateViewProperties(captureNewPath = false)` is the documented behavior for + // ALTER VIEW WITH SCHEMA: the view's body resolution path must stay pinned to the + // create-time PATH, not the caller's current PATH. This test creates the view under + // PATH=a, then runs ALTER VIEW WITH SCHEMA EVOLUTION under PATH=b, and asserts that + // the persisted VIEW_RESOLUTION_PATH still reflects PATH=a. + withSQLConf(SQLConf.PATH_ENABLED.key -> "true") { + val viewName = "v_path_preserved_on_alter" + val view = s"$catalog.$namespace.$viewName" + sql(s"CREATE SCHEMA IF NOT EXISTS $catalog.alter_view_path_a") + try { + sql(s"SET PATH = $catalog.alter_view_path_a, system.builtin") + sql(s"CREATE VIEW $view AS SELECT 1 AS x") + val pathAfterCreate = spark.sessionState.catalog + .getTableMetadata(TableIdentifier(viewName, Some(namespace))) + .viewStoredResolutionPath + .getOrElse(fail("Expected the view to persist a frozen SQL path")) + val parsedCreate = CatalogManager.deserializePathEntries(pathAfterCreate) + .getOrElse(fail(s"Expected a valid serialized path, got: $pathAfterCreate")) + assert(parsedCreate.contains(Seq(catalog, "alter_view_path_a")), + s"Frozen path should include alter_view_path_a; got: $parsedCreate") + + // Switch the live PATH to something else and run ALTER VIEW WITH SCHEMA. + // The captureNewPath = false code path must NOT overwrite the frozen path. + sql(s"SET PATH = $catalog.default, system.builtin") + sql(s"ALTER VIEW $view WITH SCHEMA EVOLUTION") + + val pathAfterAlter = spark.sessionState.catalog + .getTableMetadata(TableIdentifier(viewName, Some(namespace))) + .viewStoredResolutionPath + .getOrElse(fail("Frozen SQL path was dropped by ALTER VIEW WITH SCHEMA")) + assert(pathAfterAlter == pathAfterCreate, + s"ALTER VIEW WITH SCHEMA must preserve the frozen path. " + + s"Before: $pathAfterCreate; after: $pathAfterAlter") + } finally { + sql("SET PATH = DEFAULT_PATH") + sql(s"DROP VIEW IF EXISTS $view") + sql(s"DROP SCHEMA IF EXISTS $catalog.alter_view_path_a") + } + } + } +} From 3dca8e47d8f3ae1bd77a4672d1459ecd255407d9 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Thu, 14 May 2026 07:12:56 -0700 Subject: [PATCH 2/3] [SPARK-56853][SQL][TESTS][FOLLOWUP] Address review feedback Follow-up changes after the initial PR landed on the branch: - Strip the SPARK-56853 JIRA tag from test names, comments, and file headers across the new tests. These are feature-coverage tests, not bug regressions, so the JIRA label does not belong inline. - Correct the inaccurate "view's home schema" framing in the two new PATH-disabled compat tests in `SQLViewSuite`. The fallback target is the view's `viewCatalogAndNamespace` property -- i.e. the creator session's USE state captured at CREATE VIEW time -- not the schema the view physically lives in. The previous comment overgeneralized from a setup choice (USE-ing the destination schema before CREATE) to the general behavior. - Merge `PathElementSuite` into `CatalogManagerSuite`. The seven `PathElement.validateNoStaticDuplicates` tests are pure data tests living in the same package as the consumer; folding them into the sibling suite keeps the test file count down without losing coverage. `CatalogManagerSuite` is now 16 tests grouped by section comment. - Restructure `sql-tests/inputs/sql-path.sql` as the primary SQL-level reference for the feature. Adds a self-describing header, a Table of Contents, and ten clearly delimited sections covering: default path observability; SET PATH grammar (literal, DEFAULT_PATH, SYSTEM_PATH, PATH append, current_schema/current_database); CURRENT_PATH() builtin and its ANSI no-parens form; the full set of static error conditions; routine resolution via PATH for scalar AND table functions; relation resolution via PATH; persisted view frozen-path behavior including invoker-context current_schema/current_path; SQL function frozen-path behavior for scalar and table functions including the invoker-context guard; the `spark.sql.defaultPath` conf (explicit override, expansion, rejection of invalid values and the PATH keyword); and PATH-disabled behaviors. The corresponding `results/` and `analyzer-results/` golden files were regenerated and reviewed. --- python/pyspark/sql/tests/test_catalog.py | 6 +- .../catalog/CatalogManagerSuite.scala | 78 +- .../connector/catalog/PathElementSuite.scala | 100 -- .../analyzer-results/sql-path.sql.out | 885 ++++++++++++++-- .../resources/sql-tests/inputs/sql-path.sql | 398 ++++++- .../sql-tests/results/sql-path.sql.out | 972 +++++++++++++++++- .../org/apache/spark/sql/SetPathSuite.scala | 8 +- .../sql/connector/SqlPathV2CatalogSuite.scala | 3 +- .../spark/sql/execution/SQLViewSuite.scala | 29 +- .../v1/AlterViewSchemaBindingSuite.scala | 2 +- 10 files changed, 2205 insertions(+), 276 deletions(-) delete mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/PathElementSuite.scala diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py index 9e151a2b4ae9b..d832a9ffa7d04 100644 --- a/python/pyspark/sql/tests/test_catalog.py +++ b/python/pyspark/sql/tests/test_catalog.py @@ -589,7 +589,7 @@ def test_catalog_analyze_table(self): spark.catalog.analyzeTable(t, noScan=True) def test_path_current_path_disabled(self): - # SPARK-56853: current_path() is a regular builtin and resolves even when + # current_path() is a regular builtin and resolves even when # spark.sql.path.enabled is false. The DataFrame and SQL surfaces must agree. from pyspark.sql.functions import current_path @@ -602,7 +602,7 @@ def test_path_current_path_disabled(self): self.assertEqual(sql_form, api_form) def test_path_set_path_and_current_path(self): - # SPARK-56853: SET PATH is parsed and applied; current_path() reflects it + # SET PATH is parsed and applied; current_path() reflects it # over both the SQL and DataFrame surfaces. Restores DEFAULT_PATH on exit. from pyspark.sql.functions import current_path @@ -618,7 +618,7 @@ def test_path_set_path_and_current_path(self): spark.sql("SET PATH = DEFAULT_PATH") def test_path_set_path_rejected_when_disabled(self): - # SPARK-56853: SET PATH must raise UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED + # SET PATH must raise UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED # when the feature flag is off (covers both classic and Connect error paths). spark = self.spark with self.sql_conf({"spark.sql.path.enabled": False}): diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala index 6fb208e586839..64b2ac91fbd61 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala @@ -22,9 +22,11 @@ import java.net.URI import scala.jdk.CollectionConverters._ import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, FakeV2SessionCatalog, NoSuchNamespaceException} import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog => V1InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.sql.connector.catalog.CatalogManager.{CurrentSchemaEntry, LiteralPathEntry} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -176,7 +178,7 @@ class CatalogManagerSuite extends SparkFunSuite with SQLHelper { } test("deserializePathEntriesOrFail raises a clear AnalysisException for bad payloads") { - val e = intercept[org.apache.spark.sql.AnalysisException] { + val e = intercept[AnalysisException] { CatalogManager.deserializePathEntriesOrFail( storedPathStr = "{bad-json", objectType = "view", @@ -185,6 +187,80 @@ class CatalogManagerSuite extends SparkFunSuite with SQLHelper { assert(e.getMessage.contains("Invalid stored SQL path metadata for view")) assert(e.getMessage.contains("default.v_broken")) } + + // --------------------------------------------------------------------------- + // Direct unit tests for [[PathElement.validateNoStaticDuplicates]]. The end-to-end + // `SetPathSuite` exercises this via SQL, but the duplicate-detection rules + // (literal-vs-literal, current_schema-vs-current_schema, case-sensitivity) are pure + // data and benefit from focused tests close to the implementation. + // --------------------------------------------------------------------------- + + private def literalEntry(parts: String*): LiteralPathEntry = LiteralPathEntry(parts.toSeq) + + test("validateNoStaticDuplicates: no duplicates returns the input unchanged") { + val entries = Seq( + literalEntry("spark_catalog", "default"), + literalEntry("system", "builtin"), + CurrentSchemaEntry) + assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) === entries) + } + + test("validateNoStaticDuplicates: duplicate literal under case-insensitive collation") { + val entries = Seq( + literalEntry("spark_catalog", "default"), + literalEntry("Spark_Catalog", "DEFAULT")) + val e = intercept[AnalysisException] { + PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) + } + assert(e.getCondition == "DUPLICATE_SQL_PATH_ENTRY") + assert(e.getMessageParameters.get("pathEntry") == "Spark_Catalog.DEFAULT") + } + + test("validateNoStaticDuplicates: case-sensitive mode keeps differently cased entries") { + val entries = Seq( + literalEntry("spark_catalog", "DEFAULT"), + literalEntry("spark_catalog", "default")) + assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = true) === entries) + } + + test("validateNoStaticDuplicates: repeated CurrentSchemaEntry is rejected") { + val entries = Seq(CurrentSchemaEntry, CurrentSchemaEntry) + val e = intercept[AnalysisException] { + PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) + } + assert(e.getCondition == "DUPLICATE_SQL_PATH_ENTRY") + assert(e.getMessageParameters.get("pathEntry") == "current_schema") + } + + test("validateNoStaticDuplicates: literal-vs-CurrentSchemaEntry collision is tolerated") { + // The CurrentSchemaEntry marker resolves dynamically against USE SCHEMA, so a literal + // that happens to match the live current schema is intentionally not flagged here. + val entries = Seq( + literalEntry("spark_catalog", "default"), + CurrentSchemaEntry, + literalEntry("system", "builtin")) + assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) === entries) + } + + test("validateNoStaticDuplicates: identifier containing a dot is quoted in the error") { + val entries = Seq( + literalEntry("spark_catalog", "weird.schema"), + literalEntry("spark_catalog", "weird.schema")) + val e = intercept[AnalysisException] { + PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) + } + assert(e.getMessageParameters.get("pathEntry") == "spark_catalog.`weird.schema`") + } + + test("validateNoStaticDuplicates: multi-level namespace duplicate is flagged") { + val entries = Seq( + literalEntry("cat", "db", "ns"), + literalEntry("cat", "db", "ns")) + val e = intercept[AnalysisException] { + PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) + } + assert(e.getMessageParameters.get("pathEntry") == "cat.db.ns") + } } class DummyCatalog extends CatalogPlugin { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/PathElementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/PathElementSuite.scala deleted file mode 100644 index 5c267151cc9bf..0000000000000 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/PathElementSuite.scala +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.catalog - -import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.connector.catalog.CatalogManager.{ - CurrentSchemaEntry, LiteralPathEntry -} - -/** - * Direct unit tests for [[PathElement.validateNoStaticDuplicates]]. The end-to-end - * `SetPathSuite` exercises this via SQL, but the duplicate-detection rules - * (literal-vs-literal, current_schema-vs-current_schema, case-sensitivity) are pure - * data and benefit from focused tests close to the implementation. - */ -class PathElementSuite extends SparkFunSuite { - - private def literal(parts: String*): LiteralPathEntry = LiteralPathEntry(parts.toSeq) - - test("validateNoStaticDuplicates: no duplicates returns the input unchanged") { - val entries = Seq( - literal("spark_catalog", "default"), - literal("system", "builtin"), - CurrentSchemaEntry) - assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) === entries) - } - - test("validateNoStaticDuplicates: duplicate literal under case-insensitive collation") { - val entries = Seq( - literal("spark_catalog", "default"), - literal("Spark_Catalog", "DEFAULT")) - val e = intercept[AnalysisException] { - PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) - } - assert(e.getCondition == "DUPLICATE_SQL_PATH_ENTRY") - assert(e.getMessageParameters.get("pathEntry") == "Spark_Catalog.DEFAULT") - } - - test("validateNoStaticDuplicates: case-sensitive mode keeps differently cased entries") { - val entries = Seq( - literal("spark_catalog", "DEFAULT"), - literal("spark_catalog", "default")) - assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = true) === entries) - } - - test("validateNoStaticDuplicates: repeated CurrentSchemaEntry is rejected") { - val entries = Seq(CurrentSchemaEntry, CurrentSchemaEntry) - val e = intercept[AnalysisException] { - PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) - } - assert(e.getCondition == "DUPLICATE_SQL_PATH_ENTRY") - assert(e.getMessageParameters.get("pathEntry") == "current_schema") - } - - test("validateNoStaticDuplicates: literal-vs-CurrentSchemaEntry collision is tolerated") { - // The CurrentSchemaEntry marker resolves dynamically against USE SCHEMA, so a literal - // that happens to match the live current schema is intentionally not flagged here. - val entries = Seq( - literal("spark_catalog", "default"), - CurrentSchemaEntry, - literal("system", "builtin")) - assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) === entries) - } - - test("validateNoStaticDuplicates: identifier containing a dot is quoted in the error") { - val entries = Seq( - literal("spark_catalog", "weird.schema"), - literal("spark_catalog", "weird.schema")) - val e = intercept[AnalysisException] { - PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) - } - assert(e.getMessageParameters.get("pathEntry") == "spark_catalog.`weird.schema`") - } - - test("validateNoStaticDuplicates: multi-level namespace duplicate is flagged") { - val entries = Seq( - literal("cat", "db", "ns"), - literal("cat", "db", "ns")) - val e = intercept[AnalysisException] { - PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) - } - assert(e.getMessageParameters.get("pathEntry") == "cat.db.ns") - } -} diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out index 4ab4d14965518..3a494d1cd3b74 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-path.sql.out @@ -19,6 +19,38 @@ Project [current_path() AS current_path()#x] +- OneRowRelation +-- !query +SET PATH = Spark_Catalog.Default, System.Builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(Spark_Catalog, Default)), SchemaInPath(List(System, Builtin))] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = spark_catalog.`sch.b`, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sch.b)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT current_path() +-- !query analysis +Project [current_path() AS current_path()#x] ++- OneRowRelation + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + -- !query SET PATH = DEFAULT_PATH -- !query analysis @@ -64,6 +96,13 @@ Project [current_path() AS current_path()#x] +- OneRowRelation +-- !query +USE spark_catalog.default +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default] + + -- !query SET PATH = current_schema, system.builtin -- !query analysis @@ -90,6 +129,12 @@ Project [current_path() AS current_path()#x] +- OneRowRelation +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + -- !query SET PATH = spark_catalog.default, system.builtin -- !query analysis @@ -97,12 +142,119 @@ SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(sy -- !query -SELECT CURRENT_PATH = current_path() AS same +SELECT CURRENT_PATH = current_path() AS ansi_form_matches -- !query analysis -Project [(current_path() = current_path()) AS same#x] +Project [(current_path() = current_path()) AS ansi_form_matches#x] +- OneRowRelation +-- !query +SELECT current_path(1) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "1", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "0", + "functionName" : "`current_path`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 22, + "fragment" : "current_path(1)" + } ] +} + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SET PATH = spark_catalog.default, spark_catalog.default +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "spark_catalog.default" + } +} + + +-- !query +SET PATH = spark_catalog.DEFAULT, spark_catalog.default +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "spark_catalog.default" + } +} + + +-- !query +SET PATH = DEFAULT_PATH, system.builtin +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "system.builtin" + } +} + + +-- !query +SET PATH = SYSTEM_PATH, SYSTEM_PATH +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "system.builtin" + } +} + + +-- !query +SET PATH = current_database, current_schema +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "current_schema" + } +} + + +-- !query +SET PATH = my_schema_no_catalog +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_PATH_SCHEMA_REFERENCE", + "sqlState" : "42601", + "messageParameters" : { + "qualifiedName" : "my_schema_no_catalog" + } +} + + -- !query CREATE SCHEMA sql_path_routines -- !query analysis @@ -137,131 +289,734 @@ SetPathCommand [DefaultPath] -- !query -DROP FUNCTION sql_path_routines.pick +CREATE FUNCTION sql_path_routines.pick_tvf() +RETURNS TABLE(val INT) +RETURN SELECT 7 AS val -- !query analysis -DropFunctionCommand spark_catalog.sql_path_routines.pick, false, false +CreateSQLFunctionCommand spark_catalog.sql_path_routines.pick_tvf, val INT, SELECT 7 AS val, true, false, false, false -- !query -DROP SCHEMA sql_path_routines +SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin -- !query analysis -DropNamespace false, false -+- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines] +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_routines)), SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] -- !query -CREATE SCHEMA sql_path_relations +SELECT * FROM pick_tvf() -- !query analysis -CreateNamespace false -+- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations] +Project [val#x] ++- SQLFunctionNode spark_catalog.sql_path_routines.pick_tvf + +- SubqueryAlias pick_tvf + +- Project [cast(val#x as int) AS val#x] + +- Project [7 AS val#x] + +- OneRowRelation -- !query -CREATE TABLE sql_path_relations.tbl USING parquet AS SELECT 42 AS id +SET PATH = DEFAULT_PATH -- !query analysis -CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_relations`.`tbl`, ErrorIfExists, [id] - +- Project [42 AS id#x] - +- OneRowRelation +SetPathCommand [DefaultPath] -- !query -SET PATH = spark_catalog.sql_path_relations, spark_catalog.default, system.builtin +CREATE SCHEMA sql_path_routines_b -- !query analysis -SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_relations)), SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines_b] -- !query -SELECT id FROM tbl +CREATE FUNCTION sql_path_routines_b.pick() RETURNS INT RETURN 11 -- !query analysis -Project [id#x] -+- SubqueryAlias spark_catalog.sql_path_relations.tbl - +- Relation spark_catalog.sql_path_relations.tbl[id#x] parquet +CreateSQLFunctionCommand spark_catalog.sql_path_routines_b.pick, INT, 11, false, false, false, false -- !query -SET PATH = DEFAULT_PATH +SET PATH = spark_catalog.sql_path_routines, spark_catalog.sql_path_routines_b, system.builtin -- !query analysis -SetPathCommand [DefaultPath] +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_routines)), SchemaInPath(List(spark_catalog, sql_path_routines_b)), SchemaInPath(List(system, builtin))] -- !query -DROP TABLE sql_path_relations.tbl +SELECT pick() AS from_first_schema -- !query analysis -DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_relations.tbl +Project [spark_catalog.sql_path_routines.pick() AS from_first_schema#x] ++- Project + +- OneRowRelation -- !query -DROP SCHEMA sql_path_relations +SET PATH = spark_catalog.sql_path_routines_b, spark_catalog.sql_path_routines, system.builtin -- !query analysis -DropNamespace false, false -+- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations] +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_routines_b)), SchemaInPath(List(spark_catalog, sql_path_routines)), SchemaInPath(List(system, builtin))] -- !query -SET PATH = spark_catalog.default, spark_catalog.default +SELECT pick() AS from_first_schema -- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", - "sqlState" : "42732", - "messageParameters" : { - "pathEntry" : "spark_catalog.default" - } -} +Project [spark_catalog.sql_path_routines_b.pick() AS from_first_schema#x] ++- Project + +- OneRowRelation -- !query -SET PATH = DEFAULT_PATH, system.builtin +SET PATH = DEFAULT_PATH -- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", - "sqlState" : "42732", - "messageParameters" : { - "pathEntry" : "system.builtin" - } -} +SetPathCommand [DefaultPath] -- !query -SET PATH = my_schema_no_catalog +SET PATH = spark_catalog.default, system.builtin -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "INVALID_SQL_PATH_SCHEMA_REFERENCE", - "sqlState" : "42601", - "messageParameters" : { - "qualifiedName" : "my_schema_no_catalog" - } -} +SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] -- !query -SELECT current_path(1) +SELECT pick() -- !query analysis org.apache.spark.sql.AnalysisException { - "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", - "sqlState" : "42605", + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", "messageParameters" : { - "actualNum" : "1", - "docroot" : "https://spark.apache.org/docs/latest", - "expectedNum" : "0", - "functionName" : "`current_path`" + "routineName" : "`pick`", + "searchPath" : "[`spark_catalog`.`default`, `system`.`builtin`]" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 8, - "stopIndex" : 22, - "fragment" : "current_path(1)" + "stopIndex" : 13, + "fragment" : "pick()" } ] } -- !query -SET spark.sql.path.enabled=false +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +DROP FUNCTION sql_path_routines.pick +-- !query analysis +DropFunctionCommand spark_catalog.sql_path_routines.pick, false, false + + +-- !query +DROP FUNCTION sql_path_routines.pick_tvf +-- !query analysis +DropFunctionCommand spark_catalog.sql_path_routines.pick_tvf, false, false + + +-- !query +DROP FUNCTION sql_path_routines_b.pick +-- !query analysis +DropFunctionCommand spark_catalog.sql_path_routines_b.pick, false, false + + +-- !query +DROP SCHEMA sql_path_routines +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines] + + +-- !query +DROP SCHEMA sql_path_routines_b +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_routines_b] + + +-- !query +CREATE SCHEMA sql_path_relations_a +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations_a] + + +-- !query +CREATE SCHEMA sql_path_relations_b +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations_b] + + +-- !query +CREATE TABLE sql_path_relations_a.tbl USING parquet AS SELECT 1 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_relations_a`.`tbl`, ErrorIfExists, [id] + +- Project [1 AS id#x] + +- OneRowRelation + + +-- !query +CREATE TABLE sql_path_relations_b.tbl USING parquet AS SELECT 2 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_relations_b`.`tbl`, ErrorIfExists, [id] + +- Project [2 AS id#x] + +- OneRowRelation + + +-- !query +SET PATH = spark_catalog.sql_path_relations_a, spark_catalog.sql_path_relations_b, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_relations_a)), SchemaInPath(List(spark_catalog, sql_path_relations_b)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT id FROM tbl AS from_first_schema +-- !query analysis +Project [id#x] ++- SubqueryAlias from_first_schema + +- SubqueryAlias spark_catalog.sql_path_relations_a.tbl + +- Relation spark_catalog.sql_path_relations_a.tbl[id#x] parquet + + +-- !query +SET PATH = spark_catalog.sql_path_relations_b, spark_catalog.sql_path_relations_a, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_relations_b)), SchemaInPath(List(spark_catalog, sql_path_relations_a)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT id FROM tbl AS from_first_schema +-- !query analysis +Project [id#x] ++- SubqueryAlias from_first_schema + +- SubqueryAlias spark_catalog.sql_path_relations_b.tbl + +- Relation spark_catalog.sql_path_relations_b.tbl[id#x] parquet + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, default)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT id FROM tbl +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`tbl`", + "searchPath" : "[`spark_catalog`.`default`, `system`.`builtin`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 16, + "stopIndex" : 18, + "fragment" : "tbl" + } ] +} + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +DROP TABLE sql_path_relations_a.tbl +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_relations_a.tbl + + +-- !query +DROP TABLE sql_path_relations_b.tbl +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_relations_b.tbl + + +-- !query +DROP SCHEMA sql_path_relations_a +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations_a] + + +-- !query +DROP SCHEMA sql_path_relations_b +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_relations_b] + + +-- !query +CREATE SCHEMA sql_path_views_a +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_a] + + +-- !query +CREATE SCHEMA sql_path_views_b +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_b] + + +-- !query +CREATE TABLE sql_path_views_a.frozen_t USING parquet AS SELECT 1 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_views_a`.`frozen_t`, ErrorIfExists, [id] + +- Project [1 AS id#x] + +- OneRowRelation + + +-- !query +CREATE TABLE sql_path_views_b.frozen_t USING parquet AS SELECT 2 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_views_b`.`frozen_t`, ErrorIfExists, [id] + +- Project [2 AS id#x] + +- OneRowRelation + + +-- !query +SET PATH = spark_catalog.sql_path_views_a, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_views_a)), SchemaInPath(List(system, builtin))] + + +-- !query +CREATE VIEW default.v_path_frozen AS SELECT id FROM frozen_t +-- !query analysis +CreateViewCommand `spark_catalog`.`default`.`v_path_frozen`, SELECT id FROM frozen_t, false, false, PersistedView, COMPENSATION, true + +- Project [id#x] + +- SubqueryAlias spark_catalog.sql_path_views_a.frozen_t + +- Relation spark_catalog.sql_path_views_a.frozen_t[id#x] parquet + + +-- !query +SET PATH = spark_catalog.sql_path_views_b, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_views_b)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT id FROM frozen_t AS bare_lookup_uses_live_path +-- !query analysis +Project [id#x] ++- SubqueryAlias bare_lookup_uses_live_path + +- SubqueryAlias spark_catalog.sql_path_views_b.frozen_t + +- Relation spark_catalog.sql_path_views_b.frozen_t[id#x] parquet + + +-- !query +SELECT id FROM default.v_path_frozen AS view_body_uses_frozen_path +-- !query analysis +Project [id#x] ++- SubqueryAlias view_body_uses_frozen_path + +- SubqueryAlias spark_catalog.default.v_path_frozen + +- View (`spark_catalog`.`default`.`v_path_frozen`, [id#x]) + +- Project [cast(id#x as int) AS id#x] + +- Project [id#x] + +- SubqueryAlias spark_catalog.sql_path_views_a.frozen_t + +- Relation spark_catalog.sql_path_views_a.frozen_t[id#x] parquet + + +-- !query +USE spark_catalog.sql_path_views_a +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_a] + + +-- !query +CREATE VIEW sql_path_views_a.v_ctx AS +SELECT current_schema() AS cs, current_path() AS cp +-- !query analysis +CreateViewCommand `spark_catalog`.`sql_path_views_a`.`v_ctx`, SELECT current_schema() AS cs, current_path() AS cp, false, false, PersistedView, COMPENSATION, true + +- Project [current_schema() AS cs#x, current_path() AS cp#x] + +- OneRowRelation + + +-- !query +USE spark_catalog.sql_path_views_b +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_b] + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SELECT cs, cp FROM sql_path_views_a.v_ctx +-- !query analysis +Project [cs#x, cp#x] ++- SubqueryAlias spark_catalog.sql_path_views_a.v_ctx + +- View (`spark_catalog`.`sql_path_views_a`.`v_ctx`, [cs#x, cp#x]) + +- Project [cast(cs#x as string) AS cs#x, cast(cp#x as string) AS cp#x] + +- Project [current_schema() AS cs#x, current_path() AS cp#x] + +- OneRowRelation + + +-- !query +USE spark_catalog.default +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default] + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +DROP VIEW default.v_path_frozen +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`v_path_frozen`, false, true, false + + +-- !query +DROP VIEW sql_path_views_a.v_ctx +-- !query analysis +DropTableCommand `spark_catalog`.`sql_path_views_a`.`v_ctx`, false, true, false + + +-- !query +DROP TABLE sql_path_views_a.frozen_t +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_views_a.frozen_t + + +-- !query +DROP TABLE sql_path_views_b.frozen_t +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_views_b.frozen_t + + +-- !query +DROP SCHEMA sql_path_views_a +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_a] + + +-- !query +DROP SCHEMA sql_path_views_b +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_views_b] + + +-- !query +CREATE SCHEMA sql_path_fn_a +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_a] + + +-- !query +CREATE SCHEMA sql_path_fn_b +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_b] + + +-- !query +CREATE TABLE sql_path_fn_a.frozen_t USING parquet AS SELECT 10 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_fn_a`.`frozen_t`, ErrorIfExists, [id] + +- Project [10 AS id#x] + +- OneRowRelation + + +-- !query +CREATE TABLE sql_path_fn_b.frozen_t USING parquet AS SELECT 20 AS id +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_path_fn_b`.`frozen_t`, ErrorIfExists, [id] + +- Project [20 AS id#x] + +- OneRowRelation + + +-- !query +SET PATH = spark_catalog.sql_path_fn_a, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_fn_a)), SchemaInPath(List(system, builtin))] + + +-- !query +CREATE FUNCTION default.frozen_fn() +RETURNS INT +RETURN (SELECT MAX(id) FROM frozen_t) +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.frozen_fn, INT, (SELECT MAX(id) FROM frozen_t), false, false, false, false + + +-- !query +SET PATH = spark_catalog.sql_path_fn_b, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_fn_b)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT MAX(id) FROM frozen_t AS bare_lookup_uses_live_path +-- !query analysis +Aggregate [max(id#x) AS max(id)#x] ++- SubqueryAlias bare_lookup_uses_live_path + +- SubqueryAlias spark_catalog.sql_path_fn_b.frozen_t + +- Relation spark_catalog.sql_path_fn_b.frozen_t[id#x] parquet + + +-- !query +SELECT default.frozen_fn() AS scalar_body_uses_frozen_path +-- !query analysis +Project [spark_catalog.default.frozen_fn() AS scalar_body_uses_frozen_path#x] +: +- Aggregate [max(id#x) AS max(id)#x] +: +- SubqueryAlias spark_catalog.sql_path_fn_a.frozen_t +: +- Relation spark_catalog.sql_path_fn_a.frozen_t[id#x] parquet ++- Project + +- OneRowRelation + + +-- !query +SET PATH = spark_catalog.sql_path_fn_a, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_fn_a)), SchemaInPath(List(system, builtin))] + + +-- !query +CREATE FUNCTION default.frozen_tvf() +RETURNS TABLE(id INT) +RETURN SELECT MAX(id) AS id FROM frozen_t +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.frozen_tvf, id INT, SELECT MAX(id) AS id FROM frozen_t, true, false, false, false + + +-- !query +SET PATH = spark_catalog.sql_path_fn_b, system.builtin +-- !query analysis +SetPathCommand [SchemaInPath(List(spark_catalog, sql_path_fn_b)), SchemaInPath(List(system, builtin))] + + +-- !query +SELECT * FROM default.frozen_tvf() AS table_body_uses_frozen_path +-- !query analysis +Project [id#x] ++- SubqueryAlias table_body_uses_frozen_path + +- SQLFunctionNode spark_catalog.default.frozen_tvf + +- SubqueryAlias frozen_tvf + +- Project [cast(id#x as int) AS id#x] + +- Aggregate [max(id#x) AS id#x] + +- SubqueryAlias spark_catalog.sql_path_fn_a.frozen_t + +- Relation spark_catalog.sql_path_fn_a.frozen_t[id#x] parquet + + +-- !query +USE spark_catalog.sql_path_fn_a +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_a] + + +-- !query +CREATE FUNCTION sql_path_fn_a.f_ctx() +RETURNS STRING +RETURN concat(current_schema(), '::', current_path()) +-- !query analysis +CreateSQLFunctionCommand spark_catalog.sql_path_fn_a.f_ctx, STRING, concat(current_schema(), '::', current_path()), false, false, false, false + + +-- !query +USE spark_catalog.sql_path_fn_b +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_b] + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SELECT sql_path_fn_a.f_ctx() AS invoker_context +-- !query analysis +Project [spark_catalog.sql_path_fn_a.f_ctx() AS invoker_context#x] ++- Project + +- OneRowRelation + + +-- !query +USE spark_catalog.default +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default] + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +DROP FUNCTION default.frozen_fn +-- !query analysis +DropFunctionCommand spark_catalog.default.frozen_fn, false, false + + +-- !query +DROP FUNCTION default.frozen_tvf +-- !query analysis +DropFunctionCommand spark_catalog.default.frozen_tvf, false, false + + +-- !query +DROP FUNCTION sql_path_fn_a.f_ctx +-- !query analysis +DropFunctionCommand spark_catalog.sql_path_fn_a.f_ctx, false, false + + +-- !query +DROP TABLE sql_path_fn_a.frozen_t +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_fn_a.frozen_t + + +-- !query +DROP TABLE sql_path_fn_b.frozen_t +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_path_fn_b.frozen_t + + +-- !query +DROP SCHEMA sql_path_fn_a +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_a] + + +-- !query +DROP SCHEMA sql_path_fn_b +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_path_fn_b] + + +-- !query +SET spark.sql.defaultPath = system.session, system.builtin +-- !query analysis +SetCommand (spark.sql.defaultPath,Some(system.session, system.builtin)) + + +-- !query +SET PATH = system.builtin, system.session +-- !query analysis +SetPathCommand [SchemaInPath(List(system, builtin)), SchemaInPath(List(system, session))] + + +-- !query +SELECT current_path() AS explicit_set_path_wins_over_conf +-- !query analysis +Project [current_path() AS explicit_set_path_wins_over_conf#x] ++- OneRowRelation + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +RESET spark.sql.defaultPath +-- !query analysis +ResetCommand spark.sql.defaultPath + + +-- !query +SET spark.sql.defaultPath = system.session, system.builtin, current_schema +-- !query analysis +SetCommand (spark.sql.defaultPath,Some(system.session, system.builtin, current_schema)) + + +-- !query +USE spark_catalog.default +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default] + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SELECT current_path() AS default_path_expands_to_conf +-- !query analysis +Project [current_path() AS default_path_expands_to_conf#x] ++- OneRowRelation + + +-- !query +RESET spark.sql.defaultPath +-- !query analysis +ResetCommand spark.sql.defaultPath + + +-- !query +SET PATH = DEFAULT_PATH +-- !query analysis +SetPathCommand [DefaultPath] + + +-- !query +SET spark.sql.defaultPath = this is not a path +-- !query analysis +org.apache.spark.SparkIllegalArgumentException +{ + "errorClass" : "INVALID_CONF_VALUE.REQUIREMENT", + "sqlState" : "22022", + "messageParameters" : { + "confName" : "spark.sql.defaultPath", + "confRequirement" : "The value must be empty or a comma-separated SET PATH element list (same grammar as SET PATH, except PATH is not allowed).", + "confValue" : "this is not a path" + } +} + + +-- !query +SET spark.sql.defaultPath = PATH, system.builtin +-- !query analysis +org.apache.spark.SparkIllegalArgumentException +{ + "errorClass" : "INVALID_CONF_VALUE.REQUIREMENT", + "sqlState" : "22022", + "messageParameters" : { + "confName" : "spark.sql.defaultPath", + "confRequirement" : "The value must be empty or a comma-separated SET PATH element list (same grammar as SET PATH, except PATH is not allowed).", + "confValue" : "PATH, system.builtin" + } +} + + +-- !query +SET spark.sql.path.enabled = false -- !query analysis SetCommand (spark.sql.path.enabled,Some(false)) diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql index 4e6ab68286b5f..e9d1d149e7fa3 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/sql-path.sql @@ -1,80 +1,410 @@ --- SPARK-56853: SQL Standard PATH golden file coverage. --- Covers the SET PATH grammar, CURRENT_PATH() reflection, path-driven --- routine/relation resolution, and the most common static error conditions. +-- ============================================================================ +-- SQL Standard PATH golden coverage +-- ============================================================================ +-- +-- This file is the readable, SQL-level reference for what the PATH feature +-- does. It is the primary place to look up "how does SET PATH behave when +-- I write ..." before reaching for the Scala unit suites. Tests that need +-- features not expressible in pure SQL (multi-threaded execution, session +-- cloning, view-metadata inspection, Connect/PySpark plumbing) live in the +-- matching Scala / Python suites. +-- +-- Table of Contents +-- ----------------- +-- 1. Default path observability (no SET PATH issued) +-- 2. SET PATH grammar +-- 2.1 Literal schema entries; case preservation; backtick quoting +-- 2.2 DEFAULT_PATH shortcut +-- 2.3 SYSTEM_PATH shortcut +-- 2.4 PATH keyword (append to live path) +-- 2.5 current_schema / current_database shortcuts +-- 3. CURRENT_PATH() builtin +-- 3.1 ANSI no-parens form equals current_path() +-- 3.2 Argument-count validation +-- 4. Static error conditions at SET PATH +-- 4.1 Literal duplicate +-- 4.2 DEFAULT_PATH expansion duplicate +-- 4.3 SYSTEM_PATH expansion duplicate +-- 4.4 current_database vs current_schema cross-alias duplicate +-- 4.5 Single-part schema reference rejected +-- 5. Routine resolution via PATH +-- 5.1 Persistent scalar function follows PATH +-- 5.2 Persistent table function follows PATH +-- 5.3 First-match ordering across two schemas on PATH +-- 5.4 Unqualified miss when schema is not on PATH +-- 6. Relation resolution via PATH +-- 6.1 Table resolved via PATH; first-match ordering +-- 6.2 Unqualified miss when schema is not on PATH +-- 7. Persisted view frozen-path behavior +-- 7.1 View body resolves via creation-time PATH (not invoker PATH) +-- 7.2 current_schema / current_path in view body use invoker context +-- 8. SQL function frozen-path behavior +-- 8.1 Scalar function body resolves via creation-time PATH +-- 8.2 Table function body resolves via creation-time PATH +-- 8.3 current_schema / current_path in function body use invoker context +-- 9. DEFAULT_PATH conf (spark.sql.defaultPath) +-- 9.1 Explicit SET PATH overrides the conf +-- 9.2 SET PATH = DEFAULT_PATH expands to the conf value +-- 9.3 Invalid conf value rejected +-- 10. PATH disabled +-- 10.1 current_path() still resolves (regular builtin) +-- 10.2 SET PATH itself is rejected +-- ============================================================================ --SET spark.sql.path.enabled=true --- Default path (no SET PATH issued, no DEFAULT_PATH conf): the spark-builtin --- default ordering with current_schema in the catalog slot. + +-- ============================================================================ +-- 1. Default path observability (no SET PATH issued) +-- ============================================================================ + +-- The session was opened with PATH enabled and no `SET PATH` issued, so the +-- effective path is the spark-builtin default ordering with current_schema in +-- the catalog slot. SELECT current_path(); --- A literal SET PATH that pins both a user schema and system.builtin. + +-- ============================================================================ +-- 2. SET PATH grammar +-- ============================================================================ + +-- 2.1 Literal schema entries; case preservation; backtick quoting ------------- + SET PATH = spark_catalog.default, system.builtin; SELECT current_path(); --- DEFAULT_PATH restores the spark-builtin default ordering for the session. +-- Case is preserved exactly as typed. +SET PATH = Spark_Catalog.Default, System.Builtin; +SELECT current_path(); + +-- Backtick-quoted identifiers that contain dots round-trip with quoting. +SET PATH = spark_catalog.`sch.b`, system.builtin; +SELECT current_path(); + +-- Multi-level namespace (3+ parts) is accepted by the grammar. The stored entry +-- is verified at the Scala layer (SetPathSuite) because the session catalog +-- only supports single-part namespaces, so calling current_path() while a +-- multi-level entry is on the path would surface that catalog limitation +-- rather than the PATH grammar property under test here. + +SET PATH = DEFAULT_PATH; + + +-- 2.2 DEFAULT_PATH shortcut --------------------------------------------------- + SET PATH = DEFAULT_PATH; SELECT current_path(); --- SYSTEM_PATH expands to the two system entries in the default order. + +-- 2.3 SYSTEM_PATH shortcut ---------------------------------------------------- + SET PATH = SYSTEM_PATH; SELECT current_path(); --- The PATH keyword reuses the live path; a new entry can be appended. + +-- 2.4 PATH keyword (append to live path) -------------------------------------- + SET PATH = spark_catalog.default, system.builtin; SET PATH = PATH, system.session; SELECT current_path(); --- current_schema / current_database expand to the live USE SCHEMA. + +-- 2.5 current_schema / current_database shortcuts ----------------------------- + +USE spark_catalog.default; SET PATH = current_schema, system.builtin; SELECT current_path(); + +-- current_database is a SQL alias for current_schema. SET PATH = current_database, system.builtin; SELECT current_path(); --- ANSI keyword form (no parens) returns the same string as current_path(). +SET PATH = DEFAULT_PATH; + + +-- ============================================================================ +-- 3. CURRENT_PATH() builtin +-- ============================================================================ + +-- 3.1 ANSI no-parens form equals current_path() ------------------------------ + SET PATH = spark_catalog.default, system.builtin; -SELECT CURRENT_PATH = current_path() AS same; +SELECT CURRENT_PATH = current_path() AS ansi_form_matches; + + +-- 3.2 Argument-count validation ---------------------------------------------- + +SELECT current_path(1); + +SET PATH = DEFAULT_PATH; + + +-- ============================================================================ +-- 4. Static error conditions at SET PATH +-- ============================================================================ + +-- 4.1 Literal duplicate ------------------------------------------------------- + +SET PATH = spark_catalog.default, spark_catalog.default; + +-- Case-insensitive duplicate is still flagged. +SET PATH = spark_catalog.DEFAULT, spark_catalog.default; + + +-- 4.2 DEFAULT_PATH expansion duplicate ---------------------------------------- + +-- DEFAULT_PATH already contains system.builtin; listing it again is a duplicate +-- after expansion. +SET PATH = DEFAULT_PATH, system.builtin; + + +-- 4.3 SYSTEM_PATH expansion duplicate ----------------------------------------- + +SET PATH = SYSTEM_PATH, SYSTEM_PATH; + + +-- 4.4 current_database vs current_schema cross-alias duplicate ---------------- + +SET PATH = current_database, current_schema; + + +-- 4.5 Single-part schema reference rejected ----------------------------------- + +SET PATH = my_schema_no_catalog; + + +-- ============================================================================ +-- 5. Routine resolution via PATH +-- ============================================================================ + +-- 5.1 Persistent scalar function follows PATH --------------------------------- --- Routine resolution follows the path. CREATE SCHEMA sql_path_routines; CREATE FUNCTION sql_path_routines.pick() RETURNS INT RETURN 7; SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin; SELECT pick(); SET PATH = DEFAULT_PATH; + + +-- 5.2 Persistent table function follows PATH ---------------------------------- + +CREATE FUNCTION sql_path_routines.pick_tvf() +RETURNS TABLE(val INT) +RETURN SELECT 7 AS val; +SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin; +SELECT * FROM pick_tvf(); +SET PATH = DEFAULT_PATH; + + +-- 5.3 First-match ordering across two schemas on PATH ------------------------ + +CREATE SCHEMA sql_path_routines_b; +CREATE FUNCTION sql_path_routines_b.pick() RETURNS INT RETURN 11; + +SET PATH = spark_catalog.sql_path_routines, spark_catalog.sql_path_routines_b, system.builtin; +SELECT pick() AS from_first_schema; +SET PATH = spark_catalog.sql_path_routines_b, spark_catalog.sql_path_routines, system.builtin; +SELECT pick() AS from_first_schema; +SET PATH = DEFAULT_PATH; + + +-- 5.4 Unqualified miss when schema is not on PATH ----------------------------- + +SET PATH = spark_catalog.default, system.builtin; +SELECT pick(); + +-- Cleanup section 5. +SET PATH = DEFAULT_PATH; DROP FUNCTION sql_path_routines.pick; +DROP FUNCTION sql_path_routines.pick_tvf; +DROP FUNCTION sql_path_routines_b.pick; DROP SCHEMA sql_path_routines; +DROP SCHEMA sql_path_routines_b; + --- Relation resolution follows the path (first-match wins). -CREATE SCHEMA sql_path_relations; -CREATE TABLE sql_path_relations.tbl USING parquet AS SELECT 42 AS id; -SET PATH = spark_catalog.sql_path_relations, spark_catalog.default, system.builtin; +-- ============================================================================ +-- 6. Relation resolution via PATH +-- ============================================================================ + +CREATE SCHEMA sql_path_relations_a; +CREATE SCHEMA sql_path_relations_b; +CREATE TABLE sql_path_relations_a.tbl USING parquet AS SELECT 1 AS id; +CREATE TABLE sql_path_relations_b.tbl USING parquet AS SELECT 2 AS id; + +-- 6.1 First-match ordering ---------------------------------------------------- + +SET PATH = spark_catalog.sql_path_relations_a, spark_catalog.sql_path_relations_b, system.builtin; +SELECT id FROM tbl AS from_first_schema; +SET PATH = spark_catalog.sql_path_relations_b, spark_catalog.sql_path_relations_a, system.builtin; +SELECT id FROM tbl AS from_first_schema; + + +-- 6.2 Unqualified miss when schema is not on PATH ----------------------------- + +SET PATH = spark_catalog.default, system.builtin; SELECT id FROM tbl; + +-- Cleanup section 6. SET PATH = DEFAULT_PATH; -DROP TABLE sql_path_relations.tbl; -DROP SCHEMA sql_path_relations; +DROP TABLE sql_path_relations_a.tbl; +DROP TABLE sql_path_relations_b.tbl; +DROP SCHEMA sql_path_relations_a; +DROP SCHEMA sql_path_relations_b; --- Static error cases --------------------------------------------------------- --- Static duplicate literal at SET PATH time. -SET PATH = spark_catalog.default, spark_catalog.default; +-- ============================================================================ +-- 7. Persisted view frozen-path behavior +-- ============================================================================ --- DEFAULT_PATH already contains system.builtin; listing it again is a duplicate. -SET PATH = DEFAULT_PATH, system.builtin; +CREATE SCHEMA sql_path_views_a; +CREATE SCHEMA sql_path_views_b; +CREATE TABLE sql_path_views_a.frozen_t USING parquet AS SELECT 1 AS id; +CREATE TABLE sql_path_views_b.frozen_t USING parquet AS SELECT 2 AS id; --- Single-part name in SET PATH is not a valid qualified schema reference. -SET PATH = my_schema_no_catalog; +-- 7.1 View body resolves via creation-time PATH (not invoker PATH) ------------ --- current_path() takes no arguments. -SELECT current_path(1); +SET PATH = spark_catalog.sql_path_views_a, system.builtin; +CREATE VIEW default.v_path_frozen AS SELECT id FROM frozen_t; + +-- Flip the live PATH; the view body's unqualified `frozen_t` must still +-- resolve through the schema captured at CREATE VIEW (sql_path_views_a, id=1). +-- A bare query against `frozen_t` from the session follows the LIVE PATH and +-- returns the other table's row (id=2). +SET PATH = spark_catalog.sql_path_views_b, system.builtin; +SELECT id FROM frozen_t AS bare_lookup_uses_live_path; +SELECT id FROM default.v_path_frozen AS view_body_uses_frozen_path; + + +-- 7.2 current_schema / current_path in view body use invoker context ---------- + +USE spark_catalog.sql_path_views_a; +CREATE VIEW sql_path_views_a.v_ctx AS +SELECT current_schema() AS cs, current_path() AS cp; + +USE spark_catalog.sql_path_views_b; +SET PATH = DEFAULT_PATH; +-- The view body re-evaluates current_schema() / current_path() on every +-- invocation against the INVOKER's context, not the creator's. The result +-- here must reflect sql_path_views_b (the invoker), not sql_path_views_a +-- (the creator's schema at CREATE VIEW). +SELECT cs, cp FROM sql_path_views_a.v_ctx; + +-- Cleanup section 7. +USE spark_catalog.default; +SET PATH = DEFAULT_PATH; +DROP VIEW default.v_path_frozen; +DROP VIEW sql_path_views_a.v_ctx; +DROP TABLE sql_path_views_a.frozen_t; +DROP TABLE sql_path_views_b.frozen_t; +DROP SCHEMA sql_path_views_a; +DROP SCHEMA sql_path_views_b; + + +-- ============================================================================ +-- 8. SQL function frozen-path behavior +-- ============================================================================ + +CREATE SCHEMA sql_path_fn_a; +CREATE SCHEMA sql_path_fn_b; +CREATE TABLE sql_path_fn_a.frozen_t USING parquet AS SELECT 10 AS id; +CREATE TABLE sql_path_fn_b.frozen_t USING parquet AS SELECT 20 AS id; + +-- 8.1 Scalar function body resolves via creation-time PATH -------------------- --- PATH disabled -------------------------------------------------------------- --- Flip the feature flag inline so the disabled behavior is exercised in the --- same golden run. +SET PATH = spark_catalog.sql_path_fn_a, system.builtin; +CREATE FUNCTION default.frozen_fn() +RETURNS INT +RETURN (SELECT MAX(id) FROM frozen_t); -SET spark.sql.path.enabled=false; +SET PATH = spark_catalog.sql_path_fn_b, system.builtin; +SELECT MAX(id) FROM frozen_t AS bare_lookup_uses_live_path; +SELECT default.frozen_fn() AS scalar_body_uses_frozen_path; + + +-- 8.2 Table function body resolves via creation-time PATH --------------------- + +SET PATH = spark_catalog.sql_path_fn_a, system.builtin; +CREATE FUNCTION default.frozen_tvf() +RETURNS TABLE(id INT) +RETURN SELECT MAX(id) AS id FROM frozen_t; + +SET PATH = spark_catalog.sql_path_fn_b, system.builtin; +SELECT * FROM default.frozen_tvf() AS table_body_uses_frozen_path; + + +-- 8.3 current_schema / current_path in function body use invoker context ----- + +USE spark_catalog.sql_path_fn_a; +CREATE FUNCTION sql_path_fn_a.f_ctx() +RETURNS STRING +RETURN concat(current_schema(), '::', current_path()); + +USE spark_catalog.sql_path_fn_b; +SET PATH = DEFAULT_PATH; +-- Like 7.2: current_schema() / current_path() in a SQL function body bind to +-- the INVOKER's context, not the creator's. +SELECT sql_path_fn_a.f_ctx() AS invoker_context; + +-- Cleanup section 8. +USE spark_catalog.default; +SET PATH = DEFAULT_PATH; +DROP FUNCTION default.frozen_fn; +DROP FUNCTION default.frozen_tvf; +DROP FUNCTION sql_path_fn_a.f_ctx; +DROP TABLE sql_path_fn_a.frozen_t; +DROP TABLE sql_path_fn_b.frozen_t; +DROP SCHEMA sql_path_fn_a; +DROP SCHEMA sql_path_fn_b; + + +-- ============================================================================ +-- 9. DEFAULT_PATH conf (spark.sql.defaultPath) +-- ============================================================================ +-- +-- The conf's RHS is captured as a raw string by the SQL `SET key = value` +-- form; keywords like `current_schema` and shortcut tokens like `SYSTEM_PATH` +-- must be written WITHOUT backticks so the conf's SET-PATH-grammar validator +-- recognizes them as path tokens rather than 1-part quoted identifiers. + +-- 9.1 Explicit SET PATH overrides the conf ------------------------------------ + +SET spark.sql.defaultPath = system.session, system.builtin; +SET PATH = system.builtin, system.session; +SELECT current_path() AS explicit_set_path_wins_over_conf; +SET PATH = DEFAULT_PATH; +RESET spark.sql.defaultPath; + + +-- 9.2 SET PATH = DEFAULT_PATH expands to the conf value ----------------------- + +SET spark.sql.defaultPath = system.session, system.builtin, current_schema; +USE spark_catalog.default; +SET PATH = DEFAULT_PATH; +SELECT current_path() AS default_path_expands_to_conf; +RESET spark.sql.defaultPath; +SET PATH = DEFAULT_PATH; + + +-- 9.3 Invalid conf value rejected at SET time --------------------------------- + +SET spark.sql.defaultPath = this is not a path; + +-- The PATH keyword is not allowed in the conf value (it would create a cycle). +SET spark.sql.defaultPath = PATH, system.builtin; + + +-- ============================================================================ +-- 10. PATH disabled +-- ============================================================================ + +SET spark.sql.path.enabled = false; + + +-- 10.1 current_path() still resolves (regular builtin) ------------------------ --- current_path() is still resolvable (it is a regular builtin). SELECT current_path() IS NOT NULL AS has_path; --- SET PATH itself is rejected with UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED. + +-- 10.2 SET PATH itself is rejected -------------------------------------------- + SET PATH = spark_catalog.default; diff --git a/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out index 7904272b6c6c1..52d01ccb80bad 100644 --- a/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/sql-path.sql.out @@ -23,6 +23,46 @@ struct spark_catalog.default,system.builtin +-- !query +SET PATH = Spark_Catalog.Default, System.Builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +Spark_Catalog.Default,System.Builtin + + +-- !query +SET PATH = spark_catalog.`sch.b`, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() +-- !query schema +struct +-- !query output +spark_catalog.`sch.b`,system.builtin + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + -- !query SET PATH = DEFAULT_PATH -- !query schema @@ -79,6 +119,14 @@ struct spark_catalog.default,system.builtin,system.session +-- !query +USE spark_catalog.default +-- !query schema +struct<> +-- !query output + + + -- !query SET PATH = current_schema, system.builtin -- !query schema @@ -111,6 +159,14 @@ struct spark_catalog.default,system.builtin +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + -- !query SET PATH = spark_catalog.default, system.builtin -- !query schema @@ -120,13 +176,136 @@ struct<> -- !query -SELECT CURRENT_PATH = current_path() AS same +SELECT CURRENT_PATH = current_path() AS ansi_form_matches -- !query schema -struct +struct -- !query output true +-- !query +SELECT current_path(1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "1", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "0", + "functionName" : "`current_path`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 22, + "fragment" : "current_path(1)" + } ] +} + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.default, spark_catalog.default +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "spark_catalog.default" + } +} + + +-- !query +SET PATH = spark_catalog.DEFAULT, spark_catalog.default +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "spark_catalog.default" + } +} + + +-- !query +SET PATH = DEFAULT_PATH, system.builtin +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "system.builtin" + } +} + + +-- !query +SET PATH = SYSTEM_PATH, SYSTEM_PATH +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "system.builtin" + } +} + + +-- !query +SET PATH = current_database, current_schema +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", + "sqlState" : "42732", + "messageParameters" : { + "pathEntry" : "current_schema" + } +} + + +-- !query +SET PATH = my_schema_no_catalog +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_PATH_SCHEMA_REFERENCE", + "sqlState" : "42601", + "messageParameters" : { + "qualifiedName" : "my_schema_no_catalog" + } +} + + -- !query CREATE SCHEMA sql_path_routines -- !query schema @@ -168,7 +347,9 @@ struct<> -- !query -DROP FUNCTION sql_path_routines.pick +CREATE FUNCTION sql_path_routines.pick_tvf() +RETURNS TABLE(val INT) +RETURN SELECT 7 AS val -- !query schema struct<> -- !query output @@ -176,7 +357,7 @@ struct<> -- !query -DROP SCHEMA sql_path_routines +SET PATH = spark_catalog.sql_path_routines, spark_catalog.default, system.builtin -- !query schema struct<> -- !query output @@ -184,15 +365,15 @@ struct<> -- !query -CREATE SCHEMA sql_path_relations +SELECT * FROM pick_tvf() -- !query schema -struct<> +struct -- !query output - +7 -- !query -CREATE TABLE sql_path_relations.tbl USING parquet AS SELECT 42 AS id +SET PATH = DEFAULT_PATH -- !query schema struct<> -- !query output @@ -200,7 +381,7 @@ struct<> -- !query -SET PATH = spark_catalog.sql_path_relations, spark_catalog.default, system.builtin +CREATE SCHEMA sql_path_routines_b -- !query schema struct<> -- !query output @@ -208,15 +389,15 @@ struct<> -- !query -SELECT id FROM tbl +CREATE FUNCTION sql_path_routines_b.pick() RETURNS INT RETURN 11 -- !query schema -struct +struct<> -- !query output -42 + -- !query -SET PATH = DEFAULT_PATH +SET PATH = spark_catalog.sql_path_routines, spark_catalog.sql_path_routines_b, system.builtin -- !query schema struct<> -- !query output @@ -224,15 +405,15 @@ struct<> -- !query -DROP TABLE sql_path_relations.tbl +SELECT pick() AS from_first_schema -- !query schema -struct<> +struct -- !query output - +7 -- !query -DROP SCHEMA sql_path_relations +SET PATH = spark_catalog.sql_path_routines_b, spark_catalog.sql_path_routines, system.builtin -- !query schema struct<> -- !query output @@ -240,77 +421,758 @@ struct<> -- !query -SET PATH = spark_catalog.default, spark_catalog.default +SELECT pick() AS from_first_schema -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", - "sqlState" : "42732", - "messageParameters" : { - "pathEntry" : "spark_catalog.default" - } -} +11 -- !query -SET PATH = DEFAULT_PATH, system.builtin +SET PATH = DEFAULT_PATH -- !query schema struct<> -- !query output -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "DUPLICATE_SQL_PATH_ENTRY", - "sqlState" : "42732", - "messageParameters" : { - "pathEntry" : "system.builtin" - } -} + -- !query -SET PATH = my_schema_no_catalog +SET PATH = spark_catalog.default, system.builtin -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "INVALID_SQL_PATH_SCHEMA_REFERENCE", - "sqlState" : "42601", - "messageParameters" : { - "qualifiedName" : "my_schema_no_catalog" - } -} + -- !query -SELECT current_path(1) +SELECT pick() -- !query schema struct<> -- !query output org.apache.spark.sql.AnalysisException { - "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", - "sqlState" : "42605", + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", "messageParameters" : { - "actualNum" : "1", - "docroot" : "https://spark.apache.org/docs/latest", - "expectedNum" : "0", - "functionName" : "`current_path`" + "routineName" : "`pick`", + "searchPath" : "[`spark_catalog`.`default`, `system`.`builtin`]" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 8, - "stopIndex" : 22, - "fragment" : "current_path(1)" + "stopIndex" : 13, + "fragment" : "pick()" } ] } -- !query -SET spark.sql.path.enabled=false +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION sql_path_routines.pick +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION sql_path_routines.pick_tvf +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION sql_path_routines_b.pick +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_routines +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_routines_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_relations_a +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_relations_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_relations_a.tbl USING parquet AS SELECT 1 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_relations_b.tbl USING parquet AS SELECT 2 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_relations_a, spark_catalog.sql_path_relations_b, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT id FROM tbl AS from_first_schema +-- !query schema +struct +-- !query output +1 + + +-- !query +SET PATH = spark_catalog.sql_path_relations_b, spark_catalog.sql_path_relations_a, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT id FROM tbl AS from_first_schema +-- !query schema +struct +-- !query output +2 + + +-- !query +SET PATH = spark_catalog.default, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT id FROM tbl +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`tbl`", + "searchPath" : "[`spark_catalog`.`default`, `system`.`builtin`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 16, + "stopIndex" : 18, + "fragment" : "tbl" + } ] +} + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_relations_a.tbl +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_relations_b.tbl +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_relations_a +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_relations_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_views_a +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_views_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_views_a.frozen_t USING parquet AS SELECT 1 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_views_b.frozen_t USING parquet AS SELECT 2 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_views_a, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE VIEW default.v_path_frozen AS SELECT id FROM frozen_t +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_views_b, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT id FROM frozen_t AS bare_lookup_uses_live_path +-- !query schema +struct +-- !query output +2 + + +-- !query +SELECT id FROM default.v_path_frozen AS view_body_uses_frozen_path +-- !query schema +struct +-- !query output +1 + + +-- !query +USE spark_catalog.sql_path_views_a +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE VIEW sql_path_views_a.v_ctx AS +SELECT current_schema() AS cs, current_path() AS cp +-- !query schema +struct<> +-- !query output + + + +-- !query +USE spark_catalog.sql_path_views_b +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT cs, cp FROM sql_path_views_a.v_ctx +-- !query schema +struct +-- !query output +sql_path_views_b system.builtin,system.session,spark_catalog.sql_path_views_b + + +-- !query +USE spark_catalog.default +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW default.v_path_frozen +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW sql_path_views_a.v_ctx +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_views_a.frozen_t +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_views_b.frozen_t +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_views_a +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_views_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_fn_a +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA sql_path_fn_b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_fn_a.frozen_t USING parquet AS SELECT 10 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_path_fn_b.frozen_t USING parquet AS SELECT 20 AS id +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_fn_a, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION default.frozen_fn() +RETURNS INT +RETURN (SELECT MAX(id) FROM frozen_t) +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_fn_b, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT MAX(id) FROM frozen_t AS bare_lookup_uses_live_path +-- !query schema +struct +-- !query output +20 + + +-- !query +SELECT default.frozen_fn() AS scalar_body_uses_frozen_path +-- !query schema +struct +-- !query output +10 + + +-- !query +SET PATH = spark_catalog.sql_path_fn_a, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION default.frozen_tvf() +RETURNS TABLE(id INT) +RETURN SELECT MAX(id) AS id FROM frozen_t +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = spark_catalog.sql_path_fn_b, system.builtin +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM default.frozen_tvf() AS table_body_uses_frozen_path +-- !query schema +struct +-- !query output +10 + + +-- !query +USE spark_catalog.sql_path_fn_a +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION sql_path_fn_a.f_ctx() +RETURNS STRING +RETURN concat(current_schema(), '::', current_path()) +-- !query schema +struct<> +-- !query output + + + +-- !query +USE spark_catalog.sql_path_fn_b +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT sql_path_fn_a.f_ctx() AS invoker_context +-- !query schema +struct +-- !query output +sql_path_fn_b::system.builtin,system.session,spark_catalog.sql_path_fn_b + + +-- !query +USE spark_catalog.default +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION default.frozen_fn +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION default.frozen_tvf +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION sql_path_fn_a.f_ctx +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_fn_a.frozen_t +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE sql_path_fn_b.frozen_t +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_fn_a +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA sql_path_fn_b +-- !query schema +struct<> +-- !query output + + + +-- !query +SET spark.sql.defaultPath = system.session, system.builtin +-- !query schema +struct +-- !query output +spark.sql.defaultPath system.session, system.builtin + + +-- !query +SET PATH = system.builtin, system.session +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() AS explicit_set_path_wins_over_conf +-- !query schema +struct +-- !query output +system.builtin,system.session + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +RESET spark.sql.defaultPath +-- !query schema +struct<> +-- !query output + + + +-- !query +SET spark.sql.defaultPath = system.session, system.builtin, current_schema +-- !query schema +struct +-- !query output +spark.sql.defaultPath system.session, system.builtin, current_schema + + +-- !query +USE spark_catalog.default +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT current_path() AS default_path_expands_to_conf +-- !query schema +struct +-- !query output +system.session,system.builtin,spark_catalog.default + + +-- !query +RESET spark.sql.defaultPath +-- !query schema +struct<> +-- !query output + + + +-- !query +SET PATH = DEFAULT_PATH +-- !query schema +struct<> +-- !query output + + + +-- !query +SET spark.sql.defaultPath = this is not a path +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkIllegalArgumentException +{ + "errorClass" : "INVALID_CONF_VALUE.REQUIREMENT", + "sqlState" : "22022", + "messageParameters" : { + "confName" : "spark.sql.defaultPath", + "confRequirement" : "The value must be empty or a comma-separated SET PATH element list (same grammar as SET PATH, except PATH is not allowed).", + "confValue" : "this is not a path" + } +} + + +-- !query +SET spark.sql.defaultPath = PATH, system.builtin +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkIllegalArgumentException +{ + "errorClass" : "INVALID_CONF_VALUE.REQUIREMENT", + "sqlState" : "22022", + "messageParameters" : { + "confName" : "spark.sql.defaultPath", + "confRequirement" : "The value must be empty or a comma-separated SET PATH element list (same grammar as SET PATH, except PATH is not allowed).", + "confValue" : "PATH, system.builtin" + } +} + + +-- !query +SET spark.sql.path.enabled = false -- !query schema struct -- !query output diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala index af6976d797e5a..7ed601b083894 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala @@ -503,7 +503,7 @@ class SetPathSuite extends SharedSparkSession { } } - // --- cloneSession() propagation matrix (SPARK-56853) ---------------------- + // --- cloneSession() propagation matrix -------------------------------------- // The cloned session is built via `BaseSessionStateBuilder` from a parent // `SessionState`. Per-component hand-offs on clone: // - `SessionCatalog.copyStateTo` copies `currentDb` and `tempViews`, @@ -887,7 +887,7 @@ class SetPathSuite extends SharedSparkSession { test("path-driven COUNT(*) rewrite gate: temp count shadowing builtin under SET PATH " + "(session-first) suppresses the * -> 1 rewrite") { - // SPARK-56853: `Analyzer.matchesFunctionName` consults + // `Analyzer.matchesFunctionName` consults // `FunctionResolution.isSessionBeforeBuiltinInPath` to decide whether COUNT(*) is the // builtin (eligible for the COUNT(*) -> COUNT(1) shortcut) or a user-defined override. // Default `sessionFunctionResolutionOrder` is "second", so creating a temp count while @@ -916,7 +916,7 @@ class SetPathSuite extends SharedSparkSession { } test("path-driven COUNT(*) rewrite gate: rewrite still applies for unrelated builtins") { - // SPARK-56853: the gate fires ONLY when a temp function with the same unqualified + // The gate fires ONLY when a temp function with the same unqualified // name as the builtin exists. A temp with a different name must not affect the // COUNT(*) -> COUNT(1) shortcut even when session is searched before builtin. withPathEnabled { @@ -934,7 +934,7 @@ class SetPathSuite extends SharedSparkSession { } test("PATH enabled: concurrent SET PATH and unqualified lookups do not deadlock") { - // SPARK-56853: SessionCatalog.lookupBuiltinOrTempFunction is intentionally NOT + // SessionCatalog.lookupBuiltinOrTempFunction is intentionally NOT // synchronized on SessionCatalog because the path-driven kinds provider acquires // CatalogManager.synchronized, and another thread holding that lock can call back // into SessionCatalog (e.g. via setCurrentNamespace). This test hammers both sides diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala index 8533de55019a1..0131e0a64be45 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala @@ -26,8 +26,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession /** - * SPARK-56853: end-to-end coverage of [[SQLConf.PATH_ENABLED]] resolution through - * non-session V2 catalogs. + * End-to-end coverage of [[SQLConf.PATH_ENABLED]] resolution through non-session V2 catalogs. * * Other path tests live in `SetPathSuite` (session catalog) and `ProcedureSuite` * (procedures via CALL). This suite specifically exercises: diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index bf1a91e344a7c..e1912fa4e5654 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -1453,14 +1453,19 @@ abstract class SQLViewSuite extends QueryTest { } } - test("SPARK-56853: stored view path is ignored when PATH is disabled at read time") { - // A view created with PATH enabled persists its frozen resolution path in metadata. - // If the reader's session has `spark.sql.path.enabled=false`, the pinned entries are - // intentionally dropped (`CatalogManager.resolutionPathEntriesForAnalysis`); the view - // body falls back to its catalog/namespace. Verify both directions: + test("stored view path is ignored when PATH is disabled at read time") { + // A view created with PATH enabled persists two things in metadata: the frozen + // resolution path AND the creator session's current catalog+namespace at CREATE + // VIEW time (the view's `viewCatalogAndNamespace` property). If the reader's + // session has `spark.sql.path.enabled=false`, the pinned entries are intentionally + // dropped (`CatalogManager.resolutionPathEntriesForAnalysis`); the view body's + // unqualified references fall back to that captured catalog+namespace, which is + // the creator's USE state at CREATE time -- NOT the schema the view physically + // lives in (the two coincide below only because the test runs + // `USE spark_catalog.compat_view_b` before CREATE VIEW). Verify both directions: // - fully-qualified bodies keep working (qualification doesn't depend on PATH), - // - unqualified bodies that relied on the frozen path now resolve through the - // view's home schema (and fail when the unqualified name isn't there). + // - unqualified bodies that relied on the frozen path now resolve via the + // captured viewCatalogAndNamespace. withDatabase("compat_view_a", "compat_view_b") { sql("CREATE DATABASE compat_view_a") sql("CREATE DATABASE compat_view_b") @@ -1508,10 +1513,12 @@ abstract class SQLViewSuite extends QueryTest { } } - test("SPARK-56853: stored view path with no fallback target fails clearly when PATH is off") { - // Same setup as above but the view's home schema does NOT contain the unqualified - // name; under PATH disabled the analyzer cannot fall back anywhere, so the lookup - // must raise TABLE_OR_VIEW_NOT_FOUND against the view's catalog/namespace. + test("stored view path with no fallback target fails clearly when PATH is off") { + // Same shape as the previous test, but the captured `viewCatalogAndNamespace` + // (the creator's USE state at CREATE VIEW time -- set here via + // `USE spark_catalog.compat_home_only`) does NOT contain the unqualified name. + // Under PATH disabled the analyzer cannot fall back anywhere, so the lookup + // must raise TABLE_OR_VIEW_NOT_FOUND against that captured catalog+namespace. withDatabase("compat_home_only", "compat_referenced") { sql("CREATE DATABASE compat_home_only") sql("CREATE DATABASE compat_referenced") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala index a78b0842e87b7..6b11748565291 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterViewSchemaBindingSuite.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.internal.SQLConf class AlterViewSchemaBindingSuite extends command.AlterViewSchemaBindingSuiteBase with ViewCommandSuiteBase { - test("SPARK-56853: ALTER VIEW ... WITH SCHEMA preserves the frozen SQL path") { + test("ALTER VIEW ... WITH SCHEMA preserves the frozen SQL path") { // `generateViewProperties(captureNewPath = false)` is the documented behavior for // ALTER VIEW WITH SCHEMA: the view's body resolution path must stay pinned to the // create-time PATH, not the caller's current PATH. This test creates the view under From ba5876e9816431f81152f758dc14e5b5dbe60540 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Thu, 14 May 2026 09:53:32 -0700 Subject: [PATCH 3/3] [SPARK-56853][SQL][TESTS][FOLLOWUP] Address review comments from @cloud-fan - SqlPathV2CatalogSuite: the V2-function first-match test was using two `StrLen` impls that both return `s.length`, so swapping path order did not change the row and the test only proved "neither catalog raised NOT_FOUND". Add a tiny distinguishable `StrLenTimes100` ScalarFunction next to the suite and register it on `pathcat2` so the result is `3` vs `300` depending on which catalog supplies the function -- now symmetric with the parallel table test (id=10 vs id=20). - SQLViewSuite: restore the `SPARK-56853:` prefix on the two new tests to match the file-local convention (all surrounding tests, including the SPARK-56639 feature tests right next to them, use the JIRA prefix). The PR description's own `-z SPARK-56853` command now matches. - SetPathSuite: the single-pass-resolver counterpart of the COUNT(*) rewrite gate IS actionable -- `FunctionResolverUtils.isUnqualifiedCount ShadowedByTemp` is wired into `isNonDistinctCount` / `handleStarInArguments`; it is only the `singlePassResolver.enabled` conf default that keeps the path dormant. Add a parallel test that enables `spark.sql.analyzer.singlePassResolver.enabled` for the SELECT (CREATE TEMPORARY FUNCTION and SET PATH stay on the fixed-point analyzer because the single-pass analyzer does not yet support those operators) and asserts the analyzed-plan output type: BIGINT when the rewrite fires (builtin count) vs INT when the gate suppresses it and the temp count(INT) RETURN x + 100 wins. Bypassing `checkAnswer` avoids the `DeserializeToObject` operator the single-pass analyzer also does not yet support, while still exercising the analyzer-side gate. --- .../org/apache/spark/sql/SetPathSuite.scala | 44 +++++++++++++++++++ .../sql/connector/SqlPathV2CatalogSuite.scala | 27 +++++++++--- .../spark/sql/execution/SQLViewSuite.scala | 4 +- 3 files changed, 67 insertions(+), 8 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala index 7ed601b083894..245398a4694ec 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SetPathSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql import org.apache.spark.SparkIllegalArgumentException import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{IntegerType, LongType} /** * Tests for SET PATH command and session path management. @@ -933,6 +934,49 @@ class SetPathSuite extends SharedSparkSession { } } + test("path-driven COUNT(*) rewrite gate: single-pass resolver suppresses the rewrite " + + "under SET PATH (session-first)") { + // The single-pass resolver mirrors the fixed-point gate via + // `FunctionResolverUtils.isUnqualifiedCountShadowedByTemp`, which is wired into + // `isNonDistinctCount` and consulted by `handleStarInArguments`. + // + // Setup (`CREATE TEMPORARY FUNCTION`, `SET PATH`) and execution (Dataset collect via + // checkAnswer, which inserts a `DeserializeToObject` node the single-pass analyzer + // does not yet support) are run under the fixed-point analyzer; only the actual + // count(*) analysis is run under the single-pass analyzer, and we assert against the + // analyzed plan's output schema. The builtin count returns BIGINT (rewrite applied); + // the temp count(INT) returns INT (rewrite suppressed and the star expansion routes + // through the temp), so the schema's first-field dataType tells us which branch fired. + withPathEnabled { + sql("CREATE TEMPORARY FUNCTION count(x INT) RETURNS INT RETURN x + 100") + try { + val countStarSql = "SELECT count(*) FROM VALUES (1) AS t(a)" + + // PATH builtin-first: the single-pass gate reports + // `isUnqualifiedCountShadowedByTemp = false`, the shortcut fires, and the analyzed + // output is the BIGINT builtin count. + withSQLConf(SQLConf.ANALYZER_SINGLE_PASS_RESOLVER_ENABLED.key -> "true") { + val tpe = spark.sql(countStarSql).queryExecution.analyzed.schema.head.dataType + assert(tpe == LongType, + s"Expected BIGINT (builtin count rewrite); got: $tpe") + } + + sql("SET PATH = system.session, system.builtin") + + // PATH session-first: the gate reports true, the rewrite is suppressed, the star + // expands against `a`, and the temp count(INT) wins; analyzed output is INT. + withSQLConf(SQLConf.ANALYZER_SINGLE_PASS_RESOLVER_ENABLED.key -> "true") { + val tpe = spark.sql(countStarSql).queryExecution.analyzed.schema.head.dataType + assert(tpe == IntegerType, + s"Expected INT (temp count; rewrite suppressed); got: $tpe") + } + } finally { + sql("SET PATH = DEFAULT_PATH") + sql("DROP TEMPORARY FUNCTION IF EXISTS count") + } + } + } + test("PATH enabled: concurrent SET PATH and unqualified lookups do not deadlock") { // SessionCatalog.lookupBuiltinOrTempFunction is intentionally NOT // synchronized on SessionCatalog because the path-driven kinds provider acquires diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala index 0131e0a64be45..9e365c720266a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SqlPathV2CatalogSuite.scala @@ -20,10 +20,12 @@ package org.apache.spark.sql.connector import java.util.Collections import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryCatalog, SupportsNamespaces} -import org.apache.spark.sql.connector.catalog.functions.UnboundFunction +import org.apache.spark.sql.connector.catalog.functions.{ScalarFunction, UnboundFunction} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{DataType, IntegerType, StringType} /** * End-to-end coverage of [[SQLConf.PATH_ENABLED]] resolution through non-session V2 catalogs. @@ -119,19 +121,20 @@ class SqlPathV2CatalogSuite extends SharedSparkSession { test("V2 catalogs on SET PATH: unqualified function follows first match") { withSQLConf(SQLConf.PATH_ENABLED.key -> "true") { - // Two V2 catalogs each register a `strlen` function; resolution must follow path order. + // Two V2 catalogs each register a `strlen` function under the same name but with + // distinguishable return values: pathcat returns the true length, pathcat2 returns + // the length times 100. The result distinguishes which catalog supplied the + // function for the same argument, so swapping the path order must change the row. createV2Namespace("pathcat", "fns") createV2Namespace("pathcat2", "fns") addV2Function("pathcat", "fns", "strlen", StrLen(StrLenDefault)) - addV2Function("pathcat2", "fns", "strlen", StrLen(StrLenMagic)) + addV2Function("pathcat2", "fns", "strlen", StrLen(StrLenTimes100)) try { sql("SET PATH = pathcat.fns, pathcat2.fns, system.builtin") - // Both backing impls return the same numeric length, so a correct result here - // also implies neither catalog raised "not found" -- the path drove resolution. checkAnswer(sql("SELECT strlen('abc')"), Row(3)) sql("SET PATH = pathcat2.fns, pathcat.fns, system.builtin") - checkAnswer(sql("SELECT strlen('hello')"), Row(5)) + checkAnswer(sql("SELECT strlen('abc')"), Row(300)) } finally { sql("SET PATH = DEFAULT_PATH") v2Catalog("pathcat").clearFunctions() @@ -140,3 +143,15 @@ class SqlPathV2CatalogSuite extends SharedSparkSession { } } } + +/** + * A small distinguishable companion to `StrLenDefault` (in `DataSourceV2FunctionSuite.scala`): + * returns `s.length * 100` so V2-function resolution tests across catalogs can verify which + * catalog supplied the function from the result row alone. + */ +case object StrLenTimes100 extends ScalarFunction[Int] { + override def inputTypes(): Array[DataType] = Array(StringType) + override def resultType(): DataType = IntegerType + override def name(): String = "strlen_times_100" + override def produceResult(input: InternalRow): Int = input.getString(0).length * 100 +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index e1912fa4e5654..3fb54d7c43d58 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -1453,7 +1453,7 @@ abstract class SQLViewSuite extends QueryTest { } } - test("stored view path is ignored when PATH is disabled at read time") { + test("SPARK-56853: stored view path is ignored when PATH is disabled at read time") { // A view created with PATH enabled persists two things in metadata: the frozen // resolution path AND the creator session's current catalog+namespace at CREATE // VIEW time (the view's `viewCatalogAndNamespace` property). If the reader's @@ -1513,7 +1513,7 @@ abstract class SQLViewSuite extends QueryTest { } } - test("stored view path with no fallback target fails clearly when PATH is off") { + test("SPARK-56853: stored view path with no fallback target fails clearly when PATH is off") { // Same shape as the previous test, but the captured `viewCatalogAndNamespace` // (the creator's USE state at CREATE VIEW time -- set here via // `USE spark_catalog.compat_home_only`) does NOT contain the unqualified name.