From e0fe3409f0363b81ff0a20754af955351f650621 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Fri, 24 Apr 2026 15:35:19 +0000 Subject: [PATCH 01/36] [SPARK-56619][CONNECT][TESTS] Add Connect repeated SQL refresh tests Adds 6 tests to verify that repeated sql() calls in Spark Connect mode always see the latest table state. This mirrors the classic-mode tests added to DataSourceV2DataFrameSuite, covering: 1. Session write: INSERT via SQL, next SELECT sees new row 2. External write: data added via catalog API, next SELECT sees it 3. Session schema change: ADD COLUMN via SQL, next SELECT sees new schema 4. External schema change: column added via catalog API, next SELECT sees it 5. Session drop/recreate: DROP+CREATE via SQL, next SELECT sees empty table 6. External drop/recreate: drop+create via catalog API, next SELECT sees empty In Connect, every sql() call creates a fresh plan re-analyzed on the server, so all modifications are always visible. Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 180 ++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala new file mode 100644 index 0000000000000..13487c327631f --- /dev/null +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connect + +import java.util +import java.util.Collections + +import org.apache.spark.SparkConf +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.connector.catalog.{BufferedRows, Column, Identifier, InMemoryBaseTable, InMemoryTableCatalog} +import org.apache.spark.sql.connector.catalog.{TableChange, TableWritePrivilege} +import org.apache.spark.sql.types.{IntegerType, StructType} + +/** + * Connect-mode equivalent of the repeated-sql() tests added to + * DataSourceV2DataFrameSuite in the classic path. + * + * In Connect, every sql() call creates a fresh plan that is + * re-analyzed on the server, so it always sees the latest data, + * schema, and table identity. + */ +class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { + + override def sparkConf: SparkConf = super.sparkConf + .set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName) + .set("spark.sql.catalog.testcat.copyOnLoad", "true") + + private val T = "testcat.ns1.ns2.tbl" + private val ident = Identifier.of(Array("ns1", "ns2"), "tbl") + + private def assertRows(actual: Array[Row], expected: Seq[Row]): Unit = { + val actualStrs = actual.map(_.toString()).toSet + val expectedStrs = expected.map(_.toString()).toSet + assert(actualStrs == expectedStrs, + s"Expected ${expected.mkString(", ")} but got ${actual.mkString(", ")}") + } + + // Scenario 1: external writes + + test("[connect] repeated sql() reflects session write") { + withSession { s => + s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + s.sql(s"INSERT INTO $T VALUES (2, 200)").collect() + assertRows( + s.sql(s"SELECT * FROM $T").collect(), + Seq(Row(1, 100), Row(2, 200))) + + s.sql(s"DROP TABLE IF EXISTS $T").collect() + } + } + + test("[connect] repeated sql() reflects external write") { + withSession { s => + s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + // external writer adds (2, 200) + val serverSession = getServerSession(s) + val cat = serverSession.sessionState.catalogManager + .catalog("testcat").asInstanceOf[InMemoryTableCatalog] + val schema2 = StructType.fromDDL("id INT, salary INT") + val extTable = cat.loadTable(ident, + util.Set.of(TableWritePrivilege.INSERT)).asInstanceOf[InMemoryBaseTable] + extTable.withData(Array( + new BufferedRows(Seq.empty, schema2).withRow(InternalRow(2, 200)))) + + assertRows( + s.sql(s"SELECT * FROM $T").collect(), + Seq(Row(1, 100), Row(2, 200))) + + s.sql(s"DROP TABLE IF EXISTS $T").collect() + } + } + + // Scenario 2: external schema changes + + test("[connect] repeated sql() reflects session schema change") { + withSession { s => + s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + s.sql(s"ALTER TABLE $T ADD COLUMN new_col INT").collect() + s.sql(s"INSERT INTO $T VALUES (2, 200, -1)").collect() + assertRows( + s.sql(s"SELECT * FROM $T").collect(), + Seq(Row(1, 100, null), Row(2, 200, -1))) + + s.sql(s"DROP TABLE IF EXISTS $T").collect() + } + } + + test("[connect] repeated sql() reflects external schema change") { + withSession { s => + s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + // external schema change + data write via catalog API + val serverSession = getServerSession(s) + val cat = serverSession.sessionState.catalogManager + .catalog("testcat").asInstanceOf[InMemoryTableCatalog] + val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) + cat.alterTable(ident, addCol) + + val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") + val extTable = cat.loadTable(ident, + util.Set.of(TableWritePrivilege.INSERT)).asInstanceOf[InMemoryBaseTable] + extTable.withData(Array( + new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) + + assertRows( + s.sql(s"SELECT * FROM $T").collect(), + Seq(Row(1, 100, null), Row(2, 200, -1))) + + s.sql(s"DROP TABLE IF EXISTS $T").collect() + } + } + + // Scenario 3: drop and recreate table + + test("[connect] repeated sql() reflects session drop/recreate") { + withSession { s => + s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + s.sql(s"DROP TABLE $T").collect() + s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq.empty) + + s.sql(s"DROP TABLE IF EXISTS $T").collect() + } + } + + test("[connect] repeated sql() reflects external drop/recreate") { + withSession { s => + s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + // external drop and recreate via catalog API + val serverSession = getServerSession(s) + val cat = serverSession.sessionState.catalogManager + .catalog("testcat").asInstanceOf[InMemoryTableCatalog] + cat.dropTable(ident) + cat.createTable( + ident, + Array( + Column.create("id", IntegerType), + Column.create("salary", IntegerType)), + Array.empty, + Collections.emptyMap[String, String]) + + assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq.empty) + + s.sql(s"DROP TABLE IF EXISTS $T").collect() + } + } +} From 2c7c08fa591ece913394c8b17ad293a12d81f12a Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Mon, 27 Apr 2026 20:08:06 +0000 Subject: [PATCH 02/36] Apply scalafmt formatting Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 58 +++++++++---------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index 13487c327631f..d56b57036a617 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -28,12 +28,11 @@ import org.apache.spark.sql.connector.catalog.{TableChange, TableWritePrivilege} import org.apache.spark.sql.types.{IntegerType, StructType} /** - * Connect-mode equivalent of the repeated-sql() tests added to - * DataSourceV2DataFrameSuite in the classic path. + * Connect-mode equivalent of the repeated-sql() tests added to DataSourceV2DataFrameSuite in the + * classic path. * - * In Connect, every sql() call creates a fresh plan that is - * re-analyzed on the server, so it always sees the latest data, - * schema, and table identity. + * In Connect, every sql() call creates a fresh plan that is re-analyzed on the server, so it + * always sees the latest data, schema, and table identity. */ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { @@ -47,7 +46,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { private def assertRows(actual: Array[Row], expected: Seq[Row]): Unit = { val actualStrs = actual.map(_.toString()).toSet val expectedStrs = expected.map(_.toString()).toSet - assert(actualStrs == expectedStrs, + assert( + actualStrs == expectedStrs, s"Expected ${expected.mkString(", ")} but got ${actual.mkString(", ")}") } @@ -60,9 +60,7 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) s.sql(s"INSERT INTO $T VALUES (2, 200)").collect() - assertRows( - s.sql(s"SELECT * FROM $T").collect(), - Seq(Row(1, 100), Row(2, 200))) + assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) s.sql(s"DROP TABLE IF EXISTS $T").collect() } @@ -77,16 +75,15 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // external writer adds (2, 200) val serverSession = getServerSession(s) val cat = serverSession.sessionState.catalogManager - .catalog("testcat").asInstanceOf[InMemoryTableCatalog] + .catalog("testcat") + .asInstanceOf[InMemoryTableCatalog] val schema2 = StructType.fromDDL("id INT, salary INT") - val extTable = cat.loadTable(ident, - util.Set.of(TableWritePrivilege.INSERT)).asInstanceOf[InMemoryBaseTable] - extTable.withData(Array( - new BufferedRows(Seq.empty, schema2).withRow(InternalRow(2, 200)))) + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData(Array(new BufferedRows(Seq.empty, schema2).withRow(InternalRow(2, 200)))) - assertRows( - s.sql(s"SELECT * FROM $T").collect(), - Seq(Row(1, 100), Row(2, 200))) + assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) s.sql(s"DROP TABLE IF EXISTS $T").collect() } @@ -102,9 +99,7 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { s.sql(s"ALTER TABLE $T ADD COLUMN new_col INT").collect() s.sql(s"INSERT INTO $T VALUES (2, 200, -1)").collect() - assertRows( - s.sql(s"SELECT * FROM $T").collect(), - Seq(Row(1, 100, null), Row(2, 200, -1))) + assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100, null), Row(2, 200, -1))) s.sql(s"DROP TABLE IF EXISTS $T").collect() } @@ -119,19 +114,19 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // external schema change + data write via catalog API val serverSession = getServerSession(s) val cat = serverSession.sessionState.catalogManager - .catalog("testcat").asInstanceOf[InMemoryTableCatalog] + .catalog("testcat") + .asInstanceOf[InMemoryTableCatalog] val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) cat.alterTable(ident, addCol) val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") - val extTable = cat.loadTable(ident, - util.Set.of(TableWritePrivilege.INSERT)).asInstanceOf[InMemoryBaseTable] - extTable.withData(Array( - new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData( + Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) - assertRows( - s.sql(s"SELECT * FROM $T").collect(), - Seq(Row(1, 100, null), Row(2, 200, -1))) + assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100, null), Row(2, 200, -1))) s.sql(s"DROP TABLE IF EXISTS $T").collect() } @@ -162,13 +157,12 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // external drop and recreate via catalog API val serverSession = getServerSession(s) val cat = serverSession.sessionState.catalogManager - .catalog("testcat").asInstanceOf[InMemoryTableCatalog] + .catalog("testcat") + .asInstanceOf[InMemoryTableCatalog] cat.dropTable(ident) cat.createTable( ident, - Array( - Column.create("id", IntegerType), - Column.create("salary", IntegerType)), + Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), Array.empty, Collections.emptyMap[String, String]) From f36aeb1204a05c33dc4256a56413fe56b10222d9 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Wed, 29 Apr 2026 17:29:13 +0000 Subject: [PATCH 03/36] Add DataFrame reuse tests to prove Connect re-analysis behavior Add 3 new tests that create a DataFrame ONCE and reuse it across external mutations (data write, schema change, drop/recreate). These tests would FAIL in classic Spark (where the resolved plan is captured at DataFrame creation time) but PASS in Connect (where each action re-sends the plan for fresh server-side analysis). This makes the suite genuinely Connect-specific, not just a copy of the classic tests with Connect infrastructure. Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 96 ++++++++++++++++++- 1 file changed, 93 insertions(+), 3 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index d56b57036a617..3f7f2b74351ac 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -33,6 +33,11 @@ import org.apache.spark.sql.types.{IntegerType, StructType} * * In Connect, every sql() call creates a fresh plan that is re-analyzed on the server, so it * always sees the latest data, schema, and table identity. + * + * The "DataFrame reuse" tests (at the bottom) test Connect-specific behavior: reusing the same + * DataFrame across external mutations. In classic Spark, the resolved plan is captured at + * DataFrame creation time, so reusing a DF after schema changes would fail. In Connect, each + * action re-sends the plan to the server for fresh analysis. */ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { @@ -44,13 +49,17 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { private val ident = Identifier.of(Array("ns1", "ns2"), "tbl") private def assertRows(actual: Array[Row], expected: Seq[Row]): Unit = { - val actualStrs = actual.map(_.toString()).toSet - val expectedStrs = expected.map(_.toString()).toSet assert( - actualStrs == expectedStrs, + actual.map(_.toString()).toSet == expected.map(_.toString()).toSet, s"Expected ${expected.mkString(", ")} but got ${actual.mkString(", ")}") } + private def serverCatalog( + serverSession: org.apache.spark.sql.classic.SparkSession): InMemoryTableCatalog = + serverSession.sessionState.catalogManager + .catalog("testcat") + .asInstanceOf[InMemoryTableCatalog] + // Scenario 1: external writes test("[connect] repeated sql() reflects session write") { @@ -171,4 +180,85 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { s.sql(s"DROP TABLE IF EXISTS $T").collect() } } + + // DataFrame reuse tests: these test Connect-specific behavior where reusing the same + // DataFrame object across mutations still sees fresh data, because Connect re-sends + // the plan for fresh analysis on every action. + + test("[connect] reused DataFrame reflects external write") { + withSession { s => + s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + + val df = s.sql(s"SELECT * FROM $T") + assertRows(df.collect(), Seq(Row(1, 100))) + + // external write via catalog API + val serverSession = getServerSession(s) + val cat = serverCatalog(serverSession) + val schema2 = StructType.fromDDL("id INT, salary INT") + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData(Array(new BufferedRows(Seq.empty, schema2).withRow(InternalRow(2, 200)))) + + // same df object, Connect re-analyzes and sees the new row + assertRows(df.collect(), Seq(Row(1, 100), Row(2, 200))) + + s.sql(s"DROP TABLE IF EXISTS $T").collect() + } + } + + test("[connect] reused DataFrame reflects external schema change") { + withSession { s => + s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + + val df = s.sql(s"SELECT * FROM $T") + assertRows(df.collect(), Seq(Row(1, 100))) + + // external schema change + write via catalog API + val serverSession = getServerSession(s) + val cat = serverCatalog(serverSession) + val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) + cat.alterTable(ident, addCol) + + val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData( + Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) + + // same df object, Connect re-analyzes and sees the new schema + assertRows(df.collect(), Seq(Row(1, 100, null), Row(2, 200, -1))) + + s.sql(s"DROP TABLE IF EXISTS $T").collect() + } + } + + test("[connect] reused DataFrame reflects external drop/recreate") { + withSession { s => + s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + + val df = s.sql(s"SELECT * FROM $T") + assertRows(df.collect(), Seq(Row(1, 100))) + + // external drop and recreate via catalog API + val serverSession = getServerSession(s) + val cat = serverCatalog(serverSession) + cat.dropTable(ident) + cat.createTable( + ident, + Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), + Array.empty, + Collections.emptyMap[String, String]) + + // same df object, Connect re-analyzes against the new empty table + assertRows(df.collect(), Seq.empty) + + s.sql(s"DROP TABLE IF EXISTS $T").collect() + } + } } From 5c5ccc3efb2bdec65062a01f3d7a22c226f97046 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Wed, 29 Apr 2026 20:04:35 +0000 Subject: [PATCH 04/36] Rename session variable from s to session for clarity Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 132 +++++++++--------- 1 file changed, 68 insertions(+), 64 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index 3f7f2b74351ac..abdd93e2042e6 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -63,26 +63,26 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // Scenario 1: external writes test("[connect] repeated sql() reflects session write") { - withSession { s => - s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + withSession { session => + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - s.sql(s"INSERT INTO $T VALUES (2, 200)").collect() - assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) + session.sql(s"INSERT INTO $T VALUES (2, 200)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) - s.sql(s"DROP TABLE IF EXISTS $T").collect() + session.sql(s"DROP TABLE IF EXISTS $T").collect() } } test("[connect] repeated sql() reflects external write") { - withSession { s => - s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + withSession { session => + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) // external writer adds (2, 200) - val serverSession = getServerSession(s) + val serverSession = getServerSession(session) val cat = serverSession.sessionState.catalogManager .catalog("testcat") .asInstanceOf[InMemoryTableCatalog] @@ -92,36 +92,38 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { .asInstanceOf[InMemoryBaseTable] extTable.withData(Array(new BufferedRows(Seq.empty, schema2).withRow(InternalRow(2, 200)))) - assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) - s.sql(s"DROP TABLE IF EXISTS $T").collect() + session.sql(s"DROP TABLE IF EXISTS $T").collect() } } // Scenario 2: external schema changes test("[connect] repeated sql() reflects session schema change") { - withSession { s => - s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - s.sql(s"ALTER TABLE $T ADD COLUMN new_col INT").collect() - s.sql(s"INSERT INTO $T VALUES (2, 200, -1)").collect() - assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100, null), Row(2, 200, -1))) - - s.sql(s"DROP TABLE IF EXISTS $T").collect() + withSession { session => + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + session.sql(s"ALTER TABLE $T ADD COLUMN new_col INT").collect() + session.sql(s"INSERT INTO $T VALUES (2, 200, -1)").collect() + assertRows( + session.sql(s"SELECT * FROM $T").collect(), + Seq(Row(1, 100, null), Row(2, 200, -1))) + + session.sql(s"DROP TABLE IF EXISTS $T").collect() } } test("[connect] repeated sql() reflects external schema change") { - withSession { s => - s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + withSession { session => + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) // external schema change + data write via catalog API - val serverSession = getServerSession(s) + val serverSession = getServerSession(session) val cat = serverSession.sessionState.catalogManager .catalog("testcat") .asInstanceOf[InMemoryTableCatalog] @@ -135,36 +137,38 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { extTable.withData( Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) - assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100, null), Row(2, 200, -1))) + assertRows( + session.sql(s"SELECT * FROM $T").collect(), + Seq(Row(1, 100, null), Row(2, 200, -1))) - s.sql(s"DROP TABLE IF EXISTS $T").collect() + session.sql(s"DROP TABLE IF EXISTS $T").collect() } } // Scenario 3: drop and recreate table test("[connect] repeated sql() reflects session drop/recreate") { - withSession { s => - s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + withSession { session => + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - s.sql(s"DROP TABLE $T").collect() - s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq.empty) + session.sql(s"DROP TABLE $T").collect() + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq.empty) - s.sql(s"DROP TABLE IF EXISTS $T").collect() + session.sql(s"DROP TABLE IF EXISTS $T").collect() } } test("[connect] repeated sql() reflects external drop/recreate") { - withSession { s => - s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + withSession { session => + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) // external drop and recreate via catalog API - val serverSession = getServerSession(s) + val serverSession = getServerSession(session) val cat = serverSession.sessionState.catalogManager .catalog("testcat") .asInstanceOf[InMemoryTableCatalog] @@ -175,9 +179,9 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { Array.empty, Collections.emptyMap[String, String]) - assertRows(s.sql(s"SELECT * FROM $T").collect(), Seq.empty) + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq.empty) - s.sql(s"DROP TABLE IF EXISTS $T").collect() + session.sql(s"DROP TABLE IF EXISTS $T").collect() } } @@ -186,15 +190,15 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // the plan for fresh analysis on every action. test("[connect] reused DataFrame reflects external write") { - withSession { s => - s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + withSession { session => + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - val df = s.sql(s"SELECT * FROM $T") + val df = session.sql(s"SELECT * FROM $T") assertRows(df.collect(), Seq(Row(1, 100))) // external write via catalog API - val serverSession = getServerSession(s) + val serverSession = getServerSession(session) val cat = serverCatalog(serverSession) val schema2 = StructType.fromDDL("id INT, salary INT") val extTable = cat @@ -205,20 +209,20 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // same df object, Connect re-analyzes and sees the new row assertRows(df.collect(), Seq(Row(1, 100), Row(2, 200))) - s.sql(s"DROP TABLE IF EXISTS $T").collect() + session.sql(s"DROP TABLE IF EXISTS $T").collect() } } test("[connect] reused DataFrame reflects external schema change") { - withSession { s => - s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + withSession { session => + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - val df = s.sql(s"SELECT * FROM $T") + val df = session.sql(s"SELECT * FROM $T") assertRows(df.collect(), Seq(Row(1, 100))) // external schema change + write via catalog API - val serverSession = getServerSession(s) + val serverSession = getServerSession(session) val cat = serverCatalog(serverSession) val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) cat.alterTable(ident, addCol) @@ -233,20 +237,20 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // same df object, Connect re-analyzes and sees the new schema assertRows(df.collect(), Seq(Row(1, 100, null), Row(2, 200, -1))) - s.sql(s"DROP TABLE IF EXISTS $T").collect() + session.sql(s"DROP TABLE IF EXISTS $T").collect() } } test("[connect] reused DataFrame reflects external drop/recreate") { - withSession { s => - s.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - s.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + withSession { session => + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - val df = s.sql(s"SELECT * FROM $T") + val df = session.sql(s"SELECT * FROM $T") assertRows(df.collect(), Seq(Row(1, 100))) // external drop and recreate via catalog API - val serverSession = getServerSession(s) + val serverSession = getServerSession(session) val cat = serverCatalog(serverSession) cat.dropTable(ident) cat.createTable( @@ -258,7 +262,7 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // same df object, Connect re-analyzes against the new empty table assertRows(df.collect(), Seq.empty) - s.sql(s"DROP TABLE IF EXISTS $T").collect() + session.sql(s"DROP TABLE IF EXISTS $T").collect() } } } From 251422282310de42ac52c9f2873a280f6b7beb9b Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Wed, 29 Apr 2026 23:48:14 +0000 Subject: [PATCH 05/36] Add connector-w/-cache repeated sql() tests for Spark Connect Adds 3 Connect tests verifying that when a DSv2 connector caches table state, external changes are invisible through repeated sql() calls. Co-authored-by: Isaac --- .../catalog/CachingInMemoryTableCatalog.scala | 35 ++++---- .../DataSourceV2RepeatedSQLConnectSuite.scala | 90 ++++++++++++++++++- 2 files changed, 109 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala index f8e3224fa7e12..abcb04615d249 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala @@ -21,30 +21,35 @@ import java.util.concurrent.ConcurrentHashMap /** * An InMemoryTableCatalog that simulates a caching connector like - * Iceberg's CachingCatalog. On first [[loadTable]], returns a fresh + * Iceberg's CachingCatalog. On first loadTable, returns a fresh * copy. On subsequent loads, returns the CACHED (stale) copy, * making external changes invisible. * - * Session writes go through the write-variant [[loadTable]], which is not - * cached, so they modify the underlying table directly. Cached [[loadTable]] - * results may still be stale until [[clearCache]] or REFRESH TABLE (which - * invokes [[invalidateTable]]) is called. + * Session writes go through the SQL path which modifies the + * original table and invalidates, but direct catalog API + * modifications are not visible until the cache is cleared. * - * Only the primary [[loadTable(ident:org\.apache\.spark\.sql\.connector\.catalog\.Identifier)*]] - * overload is cached. Version and timestamp overloads bypass the cache, matching - * time-travel semantics. [[dropTable]], [[createTable]], and [[alterTable]] do not - * invalidate the cache, matching the behavior of real caching connectors. + * Call [[CachingInMemoryTableCatalog.clearCache()]] to simulate + * cache expiration (like Iceberg's 30-second TTL). */ class CachingInMemoryTableCatalog extends InMemoryTableCatalog { - private val cachedTables = new ConcurrentHashMap[Identifier, Table]() + import CachingInMemoryTableCatalog._ - override def loadTable(ident: Identifier): Table = - cachedTables.computeIfAbsent(ident, _ => super.loadTable(ident)) + override def loadTable(ident: Identifier): Table = { + cachedTables.computeIfAbsent(cacheKey(name, ident), _ => { + super.loadTable(ident) + }) + } - override def invalidateTable(ident: Identifier): Unit = { - super.invalidateTable(ident) - cachedTables.remove(ident) + private def cacheKey( + catalog: String, ident: Identifier): String = { + s"$catalog.${ident.toString}" } +} + +object CachingInMemoryTableCatalog { + private val cachedTables = + new ConcurrentHashMap[String, Table]() def clearCache(): Unit = cachedTables.clear() } diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index abdd93e2042e6..2aef7bd83c411 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -23,7 +23,7 @@ import java.util.Collections import org.apache.spark.SparkConf import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.connector.catalog.{BufferedRows, Column, Identifier, InMemoryBaseTable, InMemoryTableCatalog} +import org.apache.spark.sql.connector.catalog.{BufferedRows, CachingInMemoryTableCatalog, Column, Identifier, InMemoryBaseTable, InMemoryTableCatalog} import org.apache.spark.sql.connector.catalog.{TableChange, TableWritePrivilege} import org.apache.spark.sql.types.{IntegerType, StructType} @@ -44,8 +44,11 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { override def sparkConf: SparkConf = super.sparkConf .set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName) .set("spark.sql.catalog.testcat.copyOnLoad", "true") + .set("spark.sql.catalog.cachingcat", classOf[CachingInMemoryTableCatalog].getName) + .set("spark.sql.catalog.cachingcat.copyOnLoad", "true") private val T = "testcat.ns1.ns2.tbl" + private val CT = "cachingcat.ns1.ns2.tbl" private val ident = Identifier.of(Array("ns1", "ns2"), "tbl") private def assertRows(actual: Array[Row], expected: Seq[Row]): Unit = { @@ -60,6 +63,12 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { .catalog("testcat") .asInstanceOf[InMemoryTableCatalog] + private def serverCachingCatalog( + serverSession: org.apache.spark.sql.classic.SparkSession): CachingInMemoryTableCatalog = + serverSession.sessionState.catalogManager + .catalog("cachingcat") + .asInstanceOf[CachingInMemoryTableCatalog] + // Scenario 1: external writes test("[connect] repeated sql() reflects session write") { @@ -265,4 +274,83 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { session.sql(s"DROP TABLE IF EXISTS $T").collect() } } + + // Connector w/ cache: repeated sql() tests. + // CachingInMemoryTableCatalog caches the first loadTable result. + // External changes go to the original table but reads return the + // cached (stale) copy, so external mutations are invisible. + + test("[connect] connector w/ cache: repeated sql() stale after external write") { + withSession { session => + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + // external writer adds (2, 200) via catalog API (bypasses cache) + val serverSession = getServerSession(session) + val cat = serverCachingCatalog(serverSession) + val schema = StructType.fromDDL("id INT, salary INT") + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData(Array(new BufferedRows(Seq.empty, schema).withRow(InternalRow(2, 200)))) + + // Caching connector returns stale table: external write invisible + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + CachingInMemoryTableCatalog.clearCache() + session.sql(s"DROP TABLE IF EXISTS $CT").collect() + } + } + + test("[connect] connector w/ cache: repeated sql() stale after external schema change") { + withSession { session => + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + // external schema change + data via catalog API + val serverSession = getServerSession(session) + val cat = serverCachingCatalog(serverSession) + val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) + cat.alterTable(ident, addCol) + + val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData( + Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) + + // Caching connector returns stale table: external changes invisible + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + CachingInMemoryTableCatalog.clearCache() + session.sql(s"DROP TABLE IF EXISTS $CT").collect() + } + } + + test("[connect] connector w/ cache: repeated sql() stale after external drop/recreate") { + withSession { session => + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + // external drop and recreate via catalog API + val serverSession = getServerSession(session) + val cat = serverCachingCatalog(serverSession) + cat.dropTable(ident) + cat.createTable( + ident, + Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), + Array.empty, + Collections.emptyMap[String, String]) + + // Caching connector returns stale table: drop/recreate invisible + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + CachingInMemoryTableCatalog.clearCache() + session.sql(s"DROP TABLE IF EXISTS $CT").collect() + } + } } From 6415e2cb3e617e293e8bc32352d4a58c3d81fb15 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Thu, 30 Apr 2026 01:31:49 +0000 Subject: [PATCH 06/36] Retrigger CI: flaky protobuf test failure (unrelated) Co-authored-by: Isaac From bdc4f9a6cae55b8280f9d5c41fc9dc3c53a7fb90 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Thu, 30 Apr 2026 02:33:29 +0000 Subject: [PATCH 07/36] Fix cache cleanup order: clear cache after dropping table CachingInMemoryTableCatalog.clearCache() must run AFTER DROP TABLE, not before. Otherwise DROP TABLE re-caches the table via loadTable(), and the next test's CREATE TABLE hits a stale cache entry. Co-authored-by: Isaac --- .../sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index 2aef7bd83c411..85a74bfd278c7 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -298,8 +298,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // Caching connector returns stale table: external write invisible assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - CachingInMemoryTableCatalog.clearCache() session.sql(s"DROP TABLE IF EXISTS $CT").collect() + CachingInMemoryTableCatalog.clearCache() } } @@ -325,8 +325,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // Caching connector returns stale table: external changes invisible assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - CachingInMemoryTableCatalog.clearCache() session.sql(s"DROP TABLE IF EXISTS $CT").collect() + CachingInMemoryTableCatalog.clearCache() } } @@ -349,8 +349,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // Caching connector returns stale table: drop/recreate invisible assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - CachingInMemoryTableCatalog.clearCache() session.sql(s"DROP TABLE IF EXISTS $CT").collect() + CachingInMemoryTableCatalog.clearCache() } } } From f177d59a7b8cac220c1e360a8a9910b05f1662e0 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Thu, 30 Apr 2026 05:31:43 +0000 Subject: [PATCH 08/36] Retrigger CI: flaky transactional checks tests (unrelated) Co-authored-by: Isaac From a8e26adbe9bc712856d1773b157476b5e0a794d1 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Thu, 30 Apr 2026 09:28:56 +0000 Subject: [PATCH 09/36] Retrigger CI: flaky transactional checks tests (unrelated) Co-authored-by: Isaac From 9fc78bf821e8ca8b7f94ca82f1c85670a2b87b54 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Thu, 30 Apr 2026 11:52:27 +0000 Subject: [PATCH 10/36] Add REFRESH TABLE verification to connector-w/-cache Connect repeated sql() tests After asserting stale data with the caching connector, REFRESH TABLE invalidates the connector cache and verifies external changes become visible. Also adds invalidateTable override to CachingInMemoryTableCatalog. Co-authored-by: Isaac --- .../catalog/CachingInMemoryTableCatalog.scala | 5 +++++ .../DataSourceV2RepeatedSQLConnectSuite.scala | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala index abcb04615d249..f19f81a50121a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala @@ -41,6 +41,11 @@ class CachingInMemoryTableCatalog extends InMemoryTableCatalog { }) } + override def invalidateTable(ident: Identifier): Unit = { + super.invalidateTable(ident) + cachedTables.remove(cacheKey(name, ident)) + } + private def cacheKey( catalog: String, ident: Identifier): String = { s"$catalog.${ident.toString}" diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index 85a74bfd278c7..3c06f73464aac 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -298,6 +298,10 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // Caching connector returns stale table: external write invisible assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + // REFRESH TABLE invalidates the connector cache, external write becomes visible + session.sql(s"REFRESH TABLE $CT").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100), Row(2, 200))) + session.sql(s"DROP TABLE IF EXISTS $CT").collect() CachingInMemoryTableCatalog.clearCache() } @@ -325,6 +329,12 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // Caching connector returns stale table: external changes invisible assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + // REFRESH TABLE invalidates the connector cache, schema change + data visible + session.sql(s"REFRESH TABLE $CT").collect() + assertRows( + session.sql(s"SELECT * FROM $CT").collect(), + Seq(Row(1, 100, null), Row(2, 200, -1))) + session.sql(s"DROP TABLE IF EXISTS $CT").collect() CachingInMemoryTableCatalog.clearCache() } @@ -349,6 +359,10 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // Caching connector returns stale table: drop/recreate invisible assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + // REFRESH TABLE invalidates the connector cache, new empty table visible + session.sql(s"REFRESH TABLE $CT").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq.empty) + session.sql(s"DROP TABLE IF EXISTS $CT").collect() CachingInMemoryTableCatalog.clearCache() } From 20ef58766ea271c67d877bcff5d0629c8aedf214 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Thu, 30 Apr 2026 13:08:30 +0000 Subject: [PATCH 11/36] Reorganize connector-w/-cache tests to be side-by-side with non-caching tests Move each cache test next to its corresponding external test with matching section numbers. Also adds invalidateTable override to CachingInMemoryTableCatalog for REFRESH TABLE support. Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 180 +++++++++--------- 1 file changed, 88 insertions(+), 92 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index 3c06f73464aac..a6222332ea14e 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -107,6 +107,33 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { } } + // Scenario 1 connector w/ cache (external write, caching connector) + test("[connect] connector w/ cache: repeated sql() stale after external write") { + withSession { session => + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + val serverSession = getServerSession(session) + val cat = serverCachingCatalog(serverSession) + val schema = StructType.fromDDL("id INT, salary INT") + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData(Array(new BufferedRows(Seq.empty, schema).withRow(InternalRow(2, 200)))) + + // Caching connector returns stale table: external write invisible + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + // REFRESH TABLE invalidates the connector cache, external write becomes visible + session.sql(s"REFRESH TABLE $CT").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100), Row(2, 200))) + + session.sql(s"DROP TABLE IF EXISTS $CT").collect() + CachingInMemoryTableCatalog.clearCache() + } + } + // Scenario 2: external schema changes test("[connect] repeated sql() reflects session schema change") { @@ -154,6 +181,39 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { } } + // Scenario 2 connector w/ cache (external schema change, caching connector) + test("[connect] connector w/ cache: repeated sql() stale after external schema change") { + withSession { session => + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + val serverSession = getServerSession(session) + val cat = serverCachingCatalog(serverSession) + val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) + cat.alterTable(ident, addCol) + + val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData( + Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) + + // Caching connector returns stale table: external changes invisible + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + // REFRESH TABLE invalidates the connector cache, schema change + data visible + session.sql(s"REFRESH TABLE $CT").collect() + assertRows( + session.sql(s"SELECT * FROM $CT").collect(), + Seq(Row(1, 100, null), Row(2, 200, -1))) + + session.sql(s"DROP TABLE IF EXISTS $CT").collect() + CachingInMemoryTableCatalog.clearCache() + } + } + // Scenario 3: drop and recreate table test("[connect] repeated sql() reflects session drop/recreate") { @@ -194,6 +254,34 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { } } + // Scenario 3 connector w/ cache (external drop/recreate, caching connector) + test("[connect] connector w/ cache: repeated sql() stale after external drop/recreate") { + withSession { session => + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + val serverSession = getServerSession(session) + val cat = serverCachingCatalog(serverSession) + cat.dropTable(ident) + cat.createTable( + ident, + Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), + Array.empty, + Collections.emptyMap[String, String]) + + // Caching connector returns stale table: drop/recreate invisible + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + // REFRESH TABLE invalidates the connector cache, new empty table visible + session.sql(s"REFRESH TABLE $CT").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq.empty) + + session.sql(s"DROP TABLE IF EXISTS $CT").collect() + CachingInMemoryTableCatalog.clearCache() + } + } + // DataFrame reuse tests: these test Connect-specific behavior where reusing the same // DataFrame object across mutations still sees fresh data, because Connect re-sends // the plan for fresh analysis on every action. @@ -275,96 +363,4 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { } } - // Connector w/ cache: repeated sql() tests. - // CachingInMemoryTableCatalog caches the first loadTable result. - // External changes go to the original table but reads return the - // cached (stale) copy, so external mutations are invisible. - - test("[connect] connector w/ cache: repeated sql() stale after external write") { - withSession { session => - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // external writer adds (2, 200) via catalog API (bypasses cache) - val serverSession = getServerSession(session) - val cat = serverCachingCatalog(serverSession) - val schema = StructType.fromDDL("id INT, salary INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData(Array(new BufferedRows(Seq.empty, schema).withRow(InternalRow(2, 200)))) - - // Caching connector returns stale table: external write invisible - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // REFRESH TABLE invalidates the connector cache, external write becomes visible - session.sql(s"REFRESH TABLE $CT").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100), Row(2, 200))) - - session.sql(s"DROP TABLE IF EXISTS $CT").collect() - CachingInMemoryTableCatalog.clearCache() - } - } - - test("[connect] connector w/ cache: repeated sql() stale after external schema change") { - withSession { session => - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // external schema change + data via catalog API - val serverSession = getServerSession(session) - val cat = serverCachingCatalog(serverSession) - val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) - cat.alterTable(ident, addCol) - - val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData( - Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) - - // Caching connector returns stale table: external changes invisible - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // REFRESH TABLE invalidates the connector cache, schema change + data visible - session.sql(s"REFRESH TABLE $CT").collect() - assertRows( - session.sql(s"SELECT * FROM $CT").collect(), - Seq(Row(1, 100, null), Row(2, 200, -1))) - - session.sql(s"DROP TABLE IF EXISTS $CT").collect() - CachingInMemoryTableCatalog.clearCache() - } - } - - test("[connect] connector w/ cache: repeated sql() stale after external drop/recreate") { - withSession { session => - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // external drop and recreate via catalog API - val serverSession = getServerSession(session) - val cat = serverCachingCatalog(serverSession) - cat.dropTable(ident) - cat.createTable( - ident, - Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), - Array.empty, - Collections.emptyMap[String, String]) - - // Caching connector returns stale table: drop/recreate invisible - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // REFRESH TABLE invalidates the connector cache, new empty table visible - session.sql(s"REFRESH TABLE $CT").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq.empty) - - session.sql(s"DROP TABLE IF EXISTS $CT").collect() - CachingInMemoryTableCatalog.clearCache() - } - } } From ec643ad58e2a13e9cf8129fec270f007624d65f9 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Thu, 30 Apr 2026 22:25:39 +0000 Subject: [PATCH 12/36] Retrigger CI From 6151018938431f7d704402addc04b59e628fc580 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Fri, 1 May 2026 19:01:21 +0000 Subject: [PATCH 13/36] Empty commit to retrigger CI Co-authored-by: Isaac From e39c09ba78461b36b83e967880a5742a02af86e0 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Fri, 8 May 2026 12:36:13 +0000 Subject: [PATCH 14/36] Address review: fix assertRows, generic serverCatalog, try/finally cleanup Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 473 +++++++++--------- 1 file changed, 239 insertions(+), 234 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index a6222332ea14e..da0f13f86ad1e 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -20,10 +20,12 @@ package org.apache.spark.sql.connect import java.util import java.util.Collections +import scala.reflect.ClassTag + import org.apache.spark.SparkConf -import org.apache.spark.sql.Row +import org.apache.spark.sql.{classic, Row, SparkSession} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.connector.catalog.{BufferedRows, CachingInMemoryTableCatalog, Column, Identifier, InMemoryBaseTable, InMemoryTableCatalog} +import org.apache.spark.sql.connector.catalog.{BufferedRows, CachingInMemoryTableCatalog, Column, Identifier, InMemoryBaseTable, InMemoryTableCatalog, TableCatalog} import org.apache.spark.sql.connector.catalog.{TableChange, TableWritePrivilege} import org.apache.spark.sql.types.{IntegerType, StructType} @@ -53,84 +55,84 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { private def assertRows(actual: Array[Row], expected: Seq[Row]): Unit = { assert( - actual.map(_.toString()).toSet == expected.map(_.toString()).toSet, + actual.toSeq.sortBy(_.toString()) == expected.sortBy(_.toString()), s"Expected ${expected.mkString(", ")} but got ${actual.mkString(", ")}") } - private def serverCatalog( - serverSession: org.apache.spark.sql.classic.SparkSession): InMemoryTableCatalog = - serverSession.sessionState.catalogManager - .catalog("testcat") - .asInstanceOf[InMemoryTableCatalog] + private def serverCatalog[T <: TableCatalog: ClassTag]( + serverSession: classic.SparkSession, name: String): T = + serverSession.sessionState.catalogManager.catalog(name).asInstanceOf[T] - private def serverCachingCatalog( - serverSession: org.apache.spark.sql.classic.SparkSession): CachingInMemoryTableCatalog = - serverSession.sessionState.catalogManager - .catalog("cachingcat") - .asInstanceOf[CachingInMemoryTableCatalog] + private def withCleanup(session: SparkSession, table: String)(fn: => Unit): Unit = { + try { fn } finally { session.sql(s"DROP TABLE IF EXISTS $table").collect() } + } // Scenario 1: external writes test("[connect] repeated sql() reflects session write") { withSession { session => - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - session.sql(s"INSERT INTO $T VALUES (2, 200)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) - - session.sql(s"DROP TABLE IF EXISTS $T").collect() + withCleanup(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + session.sql(s"INSERT INTO $T VALUES (2, 200)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) + } } } test("[connect] repeated sql() reflects external write") { withSession { session => - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - // external writer adds (2, 200) - val serverSession = getServerSession(session) - val cat = serverSession.sessionState.catalogManager - .catalog("testcat") - .asInstanceOf[InMemoryTableCatalog] - val schema2 = StructType.fromDDL("id INT, salary INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData(Array(new BufferedRows(Seq.empty, schema2).withRow(InternalRow(2, 200)))) - - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) - - session.sql(s"DROP TABLE IF EXISTS $T").collect() + withCleanup(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + // external writer adds (2, 200) + val serverSession = getServerSession(session) + val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") + val schema2 = StructType.fromDDL("id INT, salary INT") + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData( + Array(new BufferedRows(Seq.empty, schema2).withRow(InternalRow(2, 200)))) + + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) + } } } // Scenario 1 connector w/ cache (external write, caching connector) test("[connect] connector w/ cache: repeated sql() stale after external write") { withSession { session => - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - val serverSession = getServerSession(session) - val cat = serverCachingCatalog(serverSession) - val schema = StructType.fromDDL("id INT, salary INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData(Array(new BufferedRows(Seq.empty, schema).withRow(InternalRow(2, 200)))) - - // Caching connector returns stale table: external write invisible - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // REFRESH TABLE invalidates the connector cache, external write becomes visible - session.sql(s"REFRESH TABLE $CT").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100), Row(2, 200))) - - session.sql(s"DROP TABLE IF EXISTS $CT").collect() - CachingInMemoryTableCatalog.clearCache() + withCleanup(session, CT) { + try { + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + val serverSession = getServerSession(session) + val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") + val schema = StructType.fromDDL("id INT, salary INT") + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData( + Array(new BufferedRows(Seq.empty, schema).withRow(InternalRow(2, 200)))) + + // Caching connector returns stale table: external write invisible + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + // REFRESH TABLE invalidates the connector cache, external write becomes visible + session.sql(s"REFRESH TABLE $CT").collect() + assertRows( + session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100), Row(2, 200))) + } finally { + CachingInMemoryTableCatalog.clearCache() + } + } } } @@ -138,79 +140,80 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { test("[connect] repeated sql() reflects session schema change") { withSession { session => - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - session.sql(s"ALTER TABLE $T ADD COLUMN new_col INT").collect() - session.sql(s"INSERT INTO $T VALUES (2, 200, -1)").collect() - assertRows( - session.sql(s"SELECT * FROM $T").collect(), - Seq(Row(1, 100, null), Row(2, 200, -1))) - - session.sql(s"DROP TABLE IF EXISTS $T").collect() + withCleanup(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + session.sql(s"ALTER TABLE $T ADD COLUMN new_col INT").collect() + session.sql(s"INSERT INTO $T VALUES (2, 200, -1)").collect() + assertRows( + session.sql(s"SELECT * FROM $T").collect(), + Seq(Row(1, 100, null), Row(2, 200, -1))) + } } } test("[connect] repeated sql() reflects external schema change") { withSession { session => - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - // external schema change + data write via catalog API - val serverSession = getServerSession(session) - val cat = serverSession.sessionState.catalogManager - .catalog("testcat") - .asInstanceOf[InMemoryTableCatalog] - val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) - cat.alterTable(ident, addCol) - - val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData( - Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) - - assertRows( - session.sql(s"SELECT * FROM $T").collect(), - Seq(Row(1, 100, null), Row(2, 200, -1))) - - session.sql(s"DROP TABLE IF EXISTS $T").collect() + withCleanup(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + // external schema change + data write via catalog API + val serverSession = getServerSession(session) + val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") + val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) + cat.alterTable(ident, addCol) + + val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData( + Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) + + assertRows( + session.sql(s"SELECT * FROM $T").collect(), + Seq(Row(1, 100, null), Row(2, 200, -1))) + } } } // Scenario 2 connector w/ cache (external schema change, caching connector) test("[connect] connector w/ cache: repeated sql() stale after external schema change") { withSession { session => - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - val serverSession = getServerSession(session) - val cat = serverCachingCatalog(serverSession) - val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) - cat.alterTable(ident, addCol) - - val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData( - Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) - - // Caching connector returns stale table: external changes invisible - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // REFRESH TABLE invalidates the connector cache, schema change + data visible - session.sql(s"REFRESH TABLE $CT").collect() - assertRows( - session.sql(s"SELECT * FROM $CT").collect(), - Seq(Row(1, 100, null), Row(2, 200, -1))) - - session.sql(s"DROP TABLE IF EXISTS $CT").collect() - CachingInMemoryTableCatalog.clearCache() + withCleanup(session, CT) { + try { + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + val serverSession = getServerSession(session) + val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") + val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) + cat.alterTable(ident, addCol) + + val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData( + Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) + + // Caching connector returns stale table: external changes invisible + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + // REFRESH TABLE invalidates the connector cache, schema change + data visible + session.sql(s"REFRESH TABLE $CT").collect() + assertRows( + session.sql(s"SELECT * FROM $CT").collect(), + Seq(Row(1, 100, null), Row(2, 200, -1))) + } finally { + CachingInMemoryTableCatalog.clearCache() + } + } } } @@ -218,67 +221,68 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { test("[connect] repeated sql() reflects session drop/recreate") { withSession { session => - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - session.sql(s"DROP TABLE $T").collect() - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq.empty) - - session.sql(s"DROP TABLE IF EXISTS $T").collect() + withCleanup(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + session.sql(s"DROP TABLE $T").collect() + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq.empty) + } } } test("[connect] repeated sql() reflects external drop/recreate") { withSession { session => - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - // external drop and recreate via catalog API - val serverSession = getServerSession(session) - val cat = serverSession.sessionState.catalogManager - .catalog("testcat") - .asInstanceOf[InMemoryTableCatalog] - cat.dropTable(ident) - cat.createTable( - ident, - Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), - Array.empty, - Collections.emptyMap[String, String]) - - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq.empty) - - session.sql(s"DROP TABLE IF EXISTS $T").collect() + withCleanup(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) + + // external drop and recreate via catalog API + val serverSession = getServerSession(session) + val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") + cat.dropTable(ident) + cat.createTable( + ident, + Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), + Array.empty, + Collections.emptyMap[String, String]) + + assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq.empty) + } } } // Scenario 3 connector w/ cache (external drop/recreate, caching connector) test("[connect] connector w/ cache: repeated sql() stale after external drop/recreate") { withSession { session => - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - val serverSession = getServerSession(session) - val cat = serverCachingCatalog(serverSession) - cat.dropTable(ident) - cat.createTable( - ident, - Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), - Array.empty, - Collections.emptyMap[String, String]) - - // Caching connector returns stale table: drop/recreate invisible - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // REFRESH TABLE invalidates the connector cache, new empty table visible - session.sql(s"REFRESH TABLE $CT").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq.empty) - - session.sql(s"DROP TABLE IF EXISTS $CT").collect() - CachingInMemoryTableCatalog.clearCache() + withCleanup(session, CT) { + try { + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + val serverSession = getServerSession(session) + val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") + cat.dropTable(ident) + cat.createTable( + ident, + Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), + Array.empty, + Collections.emptyMap[String, String]) + + // Caching connector returns stale table: drop/recreate invisible + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + // REFRESH TABLE invalidates the connector cache, new empty table visible + session.sql(s"REFRESH TABLE $CT").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq.empty) + } finally { + CachingInMemoryTableCatalog.clearCache() + } + } } } @@ -288,78 +292,79 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { test("[connect] reused DataFrame reflects external write") { withSession { session => - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - - val df = session.sql(s"SELECT * FROM $T") - assertRows(df.collect(), Seq(Row(1, 100))) - - // external write via catalog API - val serverSession = getServerSession(session) - val cat = serverCatalog(serverSession) - val schema2 = StructType.fromDDL("id INT, salary INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData(Array(new BufferedRows(Seq.empty, schema2).withRow(InternalRow(2, 200)))) - - // same df object, Connect re-analyzes and sees the new row - assertRows(df.collect(), Seq(Row(1, 100), Row(2, 200))) - - session.sql(s"DROP TABLE IF EXISTS $T").collect() + withCleanup(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + + val df = session.sql(s"SELECT * FROM $T") + assertRows(df.collect(), Seq(Row(1, 100))) + + // external write via catalog API + val serverSession = getServerSession(session) + val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") + val schema2 = StructType.fromDDL("id INT, salary INT") + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData( + Array(new BufferedRows(Seq.empty, schema2).withRow(InternalRow(2, 200)))) + + // same df object, Connect re-analyzes and sees the new row + assertRows(df.collect(), Seq(Row(1, 100), Row(2, 200))) + } } } test("[connect] reused DataFrame reflects external schema change") { withSession { session => - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - - val df = session.sql(s"SELECT * FROM $T") - assertRows(df.collect(), Seq(Row(1, 100))) - - // external schema change + write via catalog API - val serverSession = getServerSession(session) - val cat = serverCatalog(serverSession) - val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) - cat.alterTable(ident, addCol) - - val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData( - Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) - - // same df object, Connect re-analyzes and sees the new schema - assertRows(df.collect(), Seq(Row(1, 100, null), Row(2, 200, -1))) - - session.sql(s"DROP TABLE IF EXISTS $T").collect() + withCleanup(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + + val df = session.sql(s"SELECT * FROM $T") + assertRows(df.collect(), Seq(Row(1, 100))) + + // external schema change + write via catalog API + val serverSession = getServerSession(session) + val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") + val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) + cat.alterTable(ident, addCol) + + val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData( + Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) + + // same df object, Connect re-analyzes and sees the new schema + assertRows(df.collect(), Seq(Row(1, 100, null), Row(2, 200, -1))) + } } } test("[connect] reused DataFrame reflects external drop/recreate") { withSession { session => - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - - val df = session.sql(s"SELECT * FROM $T") - assertRows(df.collect(), Seq(Row(1, 100))) - - // external drop and recreate via catalog API - val serverSession = getServerSession(session) - val cat = serverCatalog(serverSession) - cat.dropTable(ident) - cat.createTable( - ident, - Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), - Array.empty, - Collections.emptyMap[String, String]) - - // same df object, Connect re-analyzes against the new empty table - assertRows(df.collect(), Seq.empty) - - session.sql(s"DROP TABLE IF EXISTS $T").collect() + withCleanup(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + + val df = session.sql(s"SELECT * FROM $T") + assertRows(df.collect(), Seq(Row(1, 100))) + + // external drop and recreate via catalog API + val serverSession = getServerSession(session) + val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") + cat.dropTable(ident) + cat.createTable( + ident, + Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), + Array.empty, + Collections.emptyMap[String, String]) + + // same df object, Connect re-analyzes against the new empty table + assertRows(df.collect(), Seq.empty) + } } } From a2c8d5011a2379713f4dd1f7637d81b23bc6f555 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Fri, 8 May 2026 19:56:09 +0000 Subject: [PATCH 15/36] Address review: remove ClassTag, add externalAppend helper Remove unnecessary ClassTag from serverCatalog, add externalAppend helper method, and use it to reduce boilerplate in test bodies. Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 59 ++++++++----------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index da0f13f86ad1e..f21afdd099278 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -20,8 +20,6 @@ package org.apache.spark.sql.connect import java.util import java.util.Collections -import scala.reflect.ClassTag - import org.apache.spark.SparkConf import org.apache.spark.sql.{classic, Row, SparkSession} import org.apache.spark.sql.catalyst.InternalRow @@ -59,10 +57,23 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { s"Expected ${expected.mkString(", ")} but got ${actual.mkString(", ")}") } - private def serverCatalog[T <: TableCatalog: ClassTag]( + /** Get a catalog from the server-side session by name. */ + private def serverCatalog[T <: TableCatalog]( serverSession: classic.SparkSession, name: String): T = serverSession.sessionState.catalogManager.catalog(name).asInstanceOf[T] + /** Appends a row to a DSv2 table via the catalog API, bypassing the session. */ + private def externalAppend( + cat: TableCatalog, + ident: Identifier, + schema: StructType, + row: InternalRow): Unit = { + val extTable = cat + .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) + .asInstanceOf[InMemoryBaseTable] + extTable.withData(Array(new BufferedRows(Seq.empty, schema).withRow(row))) + } + private def withCleanup(session: SparkSession, table: String)(fn: => Unit): Unit = { try { fn } finally { session.sql(s"DROP TABLE IF EXISTS $table").collect() } } @@ -93,11 +104,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { val serverSession = getServerSession(session) val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") val schema2 = StructType.fromDDL("id INT, salary INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData( - Array(new BufferedRows(Seq.empty, schema2).withRow(InternalRow(2, 200)))) + externalAppend( + cat = cat, ident = ident, schema = schema2, row = InternalRow(2, 200)) assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) } @@ -116,11 +124,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { val serverSession = getServerSession(session) val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") val schema = StructType.fromDDL("id INT, salary INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData( - Array(new BufferedRows(Seq.empty, schema).withRow(InternalRow(2, 200)))) + externalAppend( + cat = cat, ident = ident, schema = schema, row = InternalRow(2, 200)) // Caching connector returns stale table: external write invisible assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) @@ -168,11 +173,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { cat.alterTable(ident, addCol) val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData( - Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) + externalAppend( + cat = cat, ident = ident, schema = schema3, row = InternalRow(2, 200, -1)) assertRows( session.sql(s"SELECT * FROM $T").collect(), @@ -196,11 +198,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { cat.alterTable(ident, addCol) val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData( - Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) + externalAppend( + cat = cat, ident = ident, schema = schema3, row = InternalRow(2, 200, -1)) // Caching connector returns stale table: external changes invisible assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) @@ -303,11 +302,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { val serverSession = getServerSession(session) val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") val schema2 = StructType.fromDDL("id INT, salary INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData( - Array(new BufferedRows(Seq.empty, schema2).withRow(InternalRow(2, 200)))) + externalAppend( + cat = cat, ident = ident, schema = schema2, row = InternalRow(2, 200)) // same df object, Connect re-analyzes and sees the new row assertRows(df.collect(), Seq(Row(1, 100), Row(2, 200))) @@ -331,11 +327,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { cat.alterTable(ident, addCol) val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData( - Array(new BufferedRows(Seq.empty, schema3).withRow(InternalRow(2, 200, -1)))) + externalAppend( + cat = cat, ident = ident, schema = schema3, row = InternalRow(2, 200, -1)) // same df object, Connect re-analyzes and sees the new schema assertRows(df.collect(), Seq(Row(1, 100, null), Row(2, 200, -1))) From 4bd573a024085b7f282ee29a4249853872784431 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Wed, 13 May 2026 12:04:10 +0000 Subject: [PATCH 16/36] Address review: remove empty lines, rename schema vars, move clearCache to withCleanup Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 151 ++++++++---------- 1 file changed, 69 insertions(+), 82 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index f21afdd099278..f4cbd709dc1b5 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -75,11 +75,13 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { } private def withCleanup(session: SparkSession, table: String)(fn: => Unit): Unit = { - try { fn } finally { session.sql(s"DROP TABLE IF EXISTS $table").collect() } + try { fn } finally { + session.sql(s"DROP TABLE IF EXISTS $table").collect() + CachingInMemoryTableCatalog.clearCache() + } } // Scenario 1: external writes - test("[connect] repeated sql() reflects session write") { withSession { session => withCleanup(session, T) { @@ -103,9 +105,9 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // external writer adds (2, 200) val serverSession = getServerSession(session) val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") - val schema2 = StructType.fromDDL("id INT, salary INT") + val newSchema = StructType.fromDDL("id INT, salary INT") externalAppend( - cat = cat, ident = ident, schema = schema2, row = InternalRow(2, 200)) + cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200)) assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) } @@ -116,33 +118,28 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { test("[connect] connector w/ cache: repeated sql() stale after external write") { withSession { session => withCleanup(session, CT) { - try { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - val serverSession = getServerSession(session) - val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") - val schema = StructType.fromDDL("id INT, salary INT") - externalAppend( - cat = cat, ident = ident, schema = schema, row = InternalRow(2, 200)) - - // Caching connector returns stale table: external write invisible - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // REFRESH TABLE invalidates the connector cache, external write becomes visible - session.sql(s"REFRESH TABLE $CT").collect() - assertRows( - session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100), Row(2, 200))) - } finally { - CachingInMemoryTableCatalog.clearCache() - } + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + val serverSession = getServerSession(session) + val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") + val newSchema = StructType.fromDDL("id INT, salary INT") + externalAppend( + cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200)) + + // Caching connector returns stale table: external write invisible + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + // REFRESH TABLE invalidates the connector cache, external write becomes visible + session.sql(s"REFRESH TABLE $CT").collect() + assertRows( + session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100), Row(2, 200))) } } } // Scenario 2: external schema changes - test("[connect] repeated sql() reflects session schema change") { withSession { session => withCleanup(session, T) { @@ -172,9 +169,9 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) cat.alterTable(ident, addCol) - val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") + val newSchema = StructType.fromDDL("id INT, salary INT, new_col INT") externalAppend( - cat = cat, ident = ident, schema = schema3, row = InternalRow(2, 200, -1)) + cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200, -1)) assertRows( session.sql(s"SELECT * FROM $T").collect(), @@ -187,37 +184,32 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { test("[connect] connector w/ cache: repeated sql() stale after external schema change") { withSession { session => withCleanup(session, CT) { - try { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - val serverSession = getServerSession(session) - val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") - val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) - cat.alterTable(ident, addCol) - - val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") - externalAppend( - cat = cat, ident = ident, schema = schema3, row = InternalRow(2, 200, -1)) - - // Caching connector returns stale table: external changes invisible - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // REFRESH TABLE invalidates the connector cache, schema change + data visible - session.sql(s"REFRESH TABLE $CT").collect() - assertRows( - session.sql(s"SELECT * FROM $CT").collect(), - Seq(Row(1, 100, null), Row(2, 200, -1))) - } finally { - CachingInMemoryTableCatalog.clearCache() - } + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + val serverSession = getServerSession(session) + val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") + val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) + cat.alterTable(ident, addCol) + + val newSchema = StructType.fromDDL("id INT, salary INT, new_col INT") + externalAppend( + cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200, -1)) + + // Caching connector returns stale table: external changes invisible + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + // REFRESH TABLE invalidates the connector cache, schema change + data visible + session.sql(s"REFRESH TABLE $CT").collect() + assertRows( + session.sql(s"SELECT * FROM $CT").collect(), + Seq(Row(1, 100, null), Row(2, 200, -1))) } } } // Scenario 3: drop and recreate table - test("[connect] repeated sql() reflects session drop/recreate") { withSession { session => withCleanup(session, T) { @@ -258,29 +250,25 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { test("[connect] connector w/ cache: repeated sql() stale after external drop/recreate") { withSession { session => withCleanup(session, CT) { - try { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - val serverSession = getServerSession(session) - val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") - cat.dropTable(ident) - cat.createTable( - ident, - Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), - Array.empty, - Collections.emptyMap[String, String]) - - // Caching connector returns stale table: drop/recreate invisible - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // REFRESH TABLE invalidates the connector cache, new empty table visible - session.sql(s"REFRESH TABLE $CT").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq.empty) - } finally { - CachingInMemoryTableCatalog.clearCache() - } + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + val serverSession = getServerSession(session) + val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") + cat.dropTable(ident) + cat.createTable( + ident, + Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), + Array.empty, + Collections.emptyMap[String, String]) + + // Caching connector returns stale table: drop/recreate invisible + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + + // REFRESH TABLE invalidates the connector cache, new empty table visible + session.sql(s"REFRESH TABLE $CT").collect() + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq.empty) } } } @@ -301,9 +289,9 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { // external write via catalog API val serverSession = getServerSession(session) val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") - val schema2 = StructType.fromDDL("id INT, salary INT") + val newSchema = StructType.fromDDL("id INT, salary INT") externalAppend( - cat = cat, ident = ident, schema = schema2, row = InternalRow(2, 200)) + cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200)) // same df object, Connect re-analyzes and sees the new row assertRows(df.collect(), Seq(Row(1, 100), Row(2, 200))) @@ -326,9 +314,9 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) cat.alterTable(ident, addCol) - val schema3 = StructType.fromDDL("id INT, salary INT, new_col INT") + val newSchema = StructType.fromDDL("id INT, salary INT, new_col INT") externalAppend( - cat = cat, ident = ident, schema = schema3, row = InternalRow(2, 200, -1)) + cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200, -1)) // same df object, Connect re-analyzes and sees the new schema assertRows(df.collect(), Seq(Row(1, 100, null), Row(2, 200, -1))) @@ -360,5 +348,4 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { } } } - } From d0d2e6b75c3b4e5f8c84acf1ba578ab2032fcfd1 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 19 May 2026 12:15:20 +0000 Subject: [PATCH 17/36] Fix scalafmt formatting Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index f4cbd709dc1b5..6e8a8caee8983 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -59,7 +59,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { /** Get a catalog from the server-side session by name. */ private def serverCatalog[T <: TableCatalog]( - serverSession: classic.SparkSession, name: String): T = + serverSession: classic.SparkSession, + name: String): T = serverSession.sessionState.catalogManager.catalog(name).asInstanceOf[T] /** Appends a row to a DSv2 table via the catalog API, bypassing the session. */ @@ -75,7 +76,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { } private def withCleanup(session: SparkSession, table: String)(fn: => Unit): Unit = { - try { fn } finally { + try { fn } + finally { session.sql(s"DROP TABLE IF EXISTS $table").collect() CachingInMemoryTableCatalog.clearCache() } @@ -106,8 +108,7 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { val serverSession = getServerSession(session) val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") val newSchema = StructType.fromDDL("id INT, salary INT") - externalAppend( - cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200)) + externalAppend(cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200)) assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) } @@ -125,16 +126,14 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { val serverSession = getServerSession(session) val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") val newSchema = StructType.fromDDL("id INT, salary INT") - externalAppend( - cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200)) + externalAppend(cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200)) // Caching connector returns stale table: external write invisible assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) // REFRESH TABLE invalidates the connector cache, external write becomes visible session.sql(s"REFRESH TABLE $CT").collect() - assertRows( - session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100), Row(2, 200))) + assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100), Row(2, 200))) } } } @@ -171,7 +170,10 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { val newSchema = StructType.fromDDL("id INT, salary INT, new_col INT") externalAppend( - cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200, -1)) + cat = cat, + ident = ident, + schema = newSchema, + row = InternalRow(2, 200, -1)) assertRows( session.sql(s"SELECT * FROM $T").collect(), @@ -195,7 +197,10 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { val newSchema = StructType.fromDDL("id INT, salary INT, new_col INT") externalAppend( - cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200, -1)) + cat = cat, + ident = ident, + schema = newSchema, + row = InternalRow(2, 200, -1)) // Caching connector returns stale table: external changes invisible assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) @@ -290,8 +295,7 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { val serverSession = getServerSession(session) val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") val newSchema = StructType.fromDDL("id INT, salary INT") - externalAppend( - cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200)) + externalAppend(cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200)) // same df object, Connect re-analyzes and sees the new row assertRows(df.collect(), Seq(Row(1, 100), Row(2, 200))) @@ -316,7 +320,10 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { val newSchema = StructType.fromDDL("id INT, salary INT, new_col INT") externalAppend( - cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200, -1)) + cat = cat, + ident = ident, + schema = newSchema, + row = InternalRow(2, 200, -1)) // same df object, Connect re-analyzes and sees the new schema assertRows(df.collect(), Seq(Row(1, 100, null), Row(2, 200, -1))) From e1480108d4623cf8ade40412a23d25eb4a8e1e83 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 19 May 2026 21:41:30 +0000 Subject: [PATCH 18/36] Address review: document cache design decisions Co-authored-by: Isaac --- .../catalog/CachingInMemoryTableCatalog.scala | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala index f19f81a50121a..e065ffd4f0ae8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala @@ -35,6 +35,16 @@ import java.util.concurrent.ConcurrentHashMap class CachingInMemoryTableCatalog extends InMemoryTableCatalog { import CachingInMemoryTableCatalog._ + // Note: The write-path loadTable(ident, writePrivileges) is NOT overridden, + // so writes bypass the cache and go directly to the underlying table. + // This is intentional: it simulates connectors where writes update the real + // table while reads may return stale cached copies. + // + // Note: dropTable, createTable, and alterTable are NOT overridden, so they + // do not invalidate the cache. Only explicit invalidateTable calls (e.g., + // via REFRESH TABLE) clear cached entries. This simulates real-world + // caching connectors where external DDL is invisible until refresh. + override def loadTable(ident: Identifier): Table = { cachedTables.computeIfAbsent(cacheKey(name, ident), _ => { super.loadTable(ident) From b2e792567de7e601809aeb04b2e380ae42fb8de9 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Wed, 20 May 2026 14:03:27 +0000 Subject: [PATCH 19/36] Remove CachingInMemoryTableCatalog.scala from this PR (already in master) Co-authored-by: Isaac --- .../catalog/CachingInMemoryTableCatalog.scala | 70 ------------------- 1 file changed, 70 deletions(-) delete mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala deleted file mode 100644 index e065ffd4f0ae8..0000000000000 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.catalog - -import java.util.concurrent.ConcurrentHashMap - -/** - * An InMemoryTableCatalog that simulates a caching connector like - * Iceberg's CachingCatalog. On first loadTable, returns a fresh - * copy. On subsequent loads, returns the CACHED (stale) copy, - * making external changes invisible. - * - * Session writes go through the SQL path which modifies the - * original table and invalidates, but direct catalog API - * modifications are not visible until the cache is cleared. - * - * Call [[CachingInMemoryTableCatalog.clearCache()]] to simulate - * cache expiration (like Iceberg's 30-second TTL). - */ -class CachingInMemoryTableCatalog extends InMemoryTableCatalog { - import CachingInMemoryTableCatalog._ - - // Note: The write-path loadTable(ident, writePrivileges) is NOT overridden, - // so writes bypass the cache and go directly to the underlying table. - // This is intentional: it simulates connectors where writes update the real - // table while reads may return stale cached copies. - // - // Note: dropTable, createTable, and alterTable are NOT overridden, so they - // do not invalidate the cache. Only explicit invalidateTable calls (e.g., - // via REFRESH TABLE) clear cached entries. This simulates real-world - // caching connectors where external DDL is invisible until refresh. - - override def loadTable(ident: Identifier): Table = { - cachedTables.computeIfAbsent(cacheKey(name, ident), _ => { - super.loadTable(ident) - }) - } - - override def invalidateTable(ident: Identifier): Unit = { - super.invalidateTable(ident) - cachedTables.remove(cacheKey(name, ident)) - } - - private def cacheKey( - catalog: String, ident: Identifier): String = { - s"$catalog.${ident.toString}" - } -} - -object CachingInMemoryTableCatalog { - private val cachedTables = - new ConcurrentHashMap[String, Table]() - - def clearCache(): Unit = cachedTables.clear() -} From a150faa61407b13887f65fd610c6b37da1a3a365 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Wed, 20 May 2026 14:04:13 +0000 Subject: [PATCH 20/36] Restore CachingInMemoryTableCatalog.scala to master version Co-authored-by: Isaac --- .../catalog/CachingInMemoryTableCatalog.scala | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala new file mode 100644 index 0000000000000..f8e3224fa7e12 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CachingInMemoryTableCatalog.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.catalog + +import java.util.concurrent.ConcurrentHashMap + +/** + * An InMemoryTableCatalog that simulates a caching connector like + * Iceberg's CachingCatalog. On first [[loadTable]], returns a fresh + * copy. On subsequent loads, returns the CACHED (stale) copy, + * making external changes invisible. + * + * Session writes go through the write-variant [[loadTable]], which is not + * cached, so they modify the underlying table directly. Cached [[loadTable]] + * results may still be stale until [[clearCache]] or REFRESH TABLE (which + * invokes [[invalidateTable]]) is called. + * + * Only the primary [[loadTable(ident:org\.apache\.spark\.sql\.connector\.catalog\.Identifier)*]] + * overload is cached. Version and timestamp overloads bypass the cache, matching + * time-travel semantics. [[dropTable]], [[createTable]], and [[alterTable]] do not + * invalidate the cache, matching the behavior of real caching connectors. + */ +class CachingInMemoryTableCatalog extends InMemoryTableCatalog { + private val cachedTables = new ConcurrentHashMap[Identifier, Table]() + + override def loadTable(ident: Identifier): Table = + cachedTables.computeIfAbsent(ident, _ => super.loadTable(ident)) + + override def invalidateTable(ident: Identifier): Unit = { + super.invalidateTable(ident) + cachedTables.remove(ident) + } + + def clearCache(): Unit = cachedTables.clear() +} From 64d4a67c5331051ca7b002eeb10070020ca30139 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Wed, 20 May 2026 14:12:48 +0000 Subject: [PATCH 21/36] Fix compilation error: call clearCache() on catalog instance, not companion object CachingInMemoryTableCatalog has no companion object, so the static call CachingInMemoryTableCatalog.clearCache() does not compile. Fixed by getting the server-side catalog instance and calling clearCache() on it, matching the pattern used by the classic DataSourceV2DataFrameSuite. Co-authored-by: Isaac --- .../sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index 6e8a8caee8983..7407d73570166 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -79,7 +79,8 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { try { fn } finally { session.sql(s"DROP TABLE IF EXISTS $table").collect() - CachingInMemoryTableCatalog.clearCache() + val serverSession = getServerSession(session) + serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat").clearCache() } } From 227cfeabc0b1d195a85c60930bbf806dfe928d24 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Wed, 20 May 2026 14:19:02 +0000 Subject: [PATCH 22/36] Fix Scaladoc: use neutral cross-reference to classic-path tests Co-authored-by: Isaac --- .../sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index 7407d73570166..19563d77960e0 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -28,8 +28,8 @@ import org.apache.spark.sql.connector.catalog.{TableChange, TableWritePrivilege} import org.apache.spark.sql.types.{IntegerType, StructType} /** - * Connect-mode equivalent of the repeated-sql() tests added to DataSourceV2DataFrameSuite in the - * classic path. + * Connect-mode counterpart of the repeated-sql() tests in DataSourceV2DataFrameSuite (classic + * path). * * In Connect, every sql() call creates a fresh plan that is re-analyzed on the server, so it * always sees the latest data, schema, and table identity. From 2f91fa06300d82dee3bf336c1d8ca660245d5587 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Wed, 20 May 2026 15:59:20 +0000 Subject: [PATCH 23/36] Fix CI: only clear cachingcat cache when the test uses cachingcat The withCleanup method was unconditionally accessing the cachingcat catalog to clear its cache, but the Connect server session only loads a catalog when first accessed via SQL. Tests using testcat never trigger loading cachingcat, causing CatalogNotFoundException. Co-authored-by: Isaac --- .../sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index 19563d77960e0..8ae4aa66ee3d3 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -79,8 +79,10 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { try { fn } finally { session.sql(s"DROP TABLE IF EXISTS $table").collect() - val serverSession = getServerSession(session) - serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat").clearCache() + if (table.startsWith("cachingcat")) { + val serverSession = getServerSession(session) + serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat").clearCache() + } } } From 02daa1403087bff5f82d333a9e62f5c8cd1adacc Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 08:03:26 +0000 Subject: [PATCH 24/36] Extract DSv2RepeatedTableAccessTests trait, mix into classic and Connect suites Move the 9 shared repeated-sql() test cases into a new DSv2RepeatedTableAccessTests trait (extending DSv2ExternalMutationTestBase), matching the pattern established by DSv2TempViewWithStoredPlanTests. Classic mode: mixed into DataSourceV2DataFrameSuite (testPrefix = ""). Connect mode: DataSourceV2RepeatedSQLConnectSuite now extends the trait and only keeps the 3 Connect-specific DataFrame-reuse tests inline. Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 340 ++++-------------- .../DSv2RepeatedTableAccessTests.scala | 217 +++++++++++ .../DataSourceV2DataFrameSuite.scala | 3 +- 3 files changed, 283 insertions(+), 277 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index 8ae4aa66ee3d3..3e1e2a7ec0493 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -17,29 +17,27 @@ package org.apache.spark.sql.connect -import java.util -import java.util.Collections +import scala.reflect.ClassTag import org.apache.spark.SparkConf -import org.apache.spark.sql.{classic, Row, SparkSession} +import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.connector.catalog.{BufferedRows, CachingInMemoryTableCatalog, Column, Identifier, InMemoryBaseTable, InMemoryTableCatalog, TableCatalog} -import org.apache.spark.sql.connector.catalog.{TableChange, TableWritePrivilege} -import org.apache.spark.sql.types.{IntegerType, StructType} +import org.apache.spark.sql.connector.DSv2RepeatedTableAccessTests +import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Column, Identifier, InMemoryTableCatalog, TableCatalog, TableChange, TableInfo} +import org.apache.spark.sql.types.IntegerType /** - * Connect-mode counterpart of the repeated-sql() tests in DataSourceV2DataFrameSuite (classic - * path). + * Connect-mode runner for [[DSv2RepeatedTableAccessTests]]. All shared test logic lives in the + * trait; this class provides the Connect-specific session, catalog access, and result comparison. * - * In Connect, every sql() call creates a fresh plan that is re-analyzed on the server, so it - * always sees the latest data, schema, and table identity. - * - * The "DataFrame reuse" tests (at the bottom) test Connect-specific behavior: reusing the same - * DataFrame across external mutations. In classic Spark, the resolved plan is captured at - * DataFrame creation time, so reusing a DF after schema changes would fail. In Connect, each - * action re-sends the plan to the server for fresh analysis. + * The "DataFrame reuse" tests at the bottom are Connect-specific: reusing the same DataFrame + * object across external mutations still sees fresh data, because Connect re-sends the plan + * to the server for fresh analysis on every action. In classic Spark, the resolved plan is + * captured at DataFrame creation time, so reusing a DF after schema changes would fail. */ -class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { +class DataSourceV2RepeatedSQLConnectSuite + extends SparkConnectServerTest + with DSv2RepeatedTableAccessTests { override def sparkConf: SparkConf = super.sparkConf .set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName) @@ -47,314 +45,104 @@ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest { .set("spark.sql.catalog.cachingcat", classOf[CachingInMemoryTableCatalog].getName) .set("spark.sql.catalog.cachingcat.copyOnLoad", "true") - private val T = "testcat.ns1.ns2.tbl" - private val CT = "cachingcat.ns1.ns2.tbl" - private val ident = Identifier.of(Array("ns1", "ns2"), "tbl") + override protected def testPrefix: String = "[connect] " - private def assertRows(actual: Array[Row], expected: Seq[Row]): Unit = { - assert( - actual.toSeq.sortBy(_.toString()) == expected.sortBy(_.toString()), - s"Expected ${expected.mkString(", ")} but got ${actual.mkString(", ")}") - } + override protected def withTestSession(fn: SparkSession => Unit): Unit = + withSession(fn) - /** Get a catalog from the server-side session by name. */ - private def serverCatalog[T <: TableCatalog]( - serverSession: classic.SparkSession, - name: String): T = - serverSession.sessionState.catalogManager.catalog(name).asInstanceOf[T] + override protected def checkRows(df: => DataFrame, expected: Seq[Row]): Unit = + QueryTest.sameRows(expected, df.collect().toSeq).foreach(msg => fail(msg)) - /** Appends a row to a DSv2 table via the catalog API, bypassing the session. */ - private def externalAppend( - cat: TableCatalog, - ident: Identifier, - schema: StructType, - row: InternalRow): Unit = { - val extTable = cat - .loadTable(ident, util.Set.of(TableWritePrivilege.INSERT)) - .asInstanceOf[InMemoryBaseTable] - extTable.withData(Array(new BufferedRows(Seq.empty, schema).withRow(row))) + override protected def getTableCatalog[C <: TableCatalog: ClassTag]( + session: SparkSession, + catalogName: String): C = { + val serverSession = getServerSession(session) + val catalog = serverSession.sessionState.catalogManager.catalog(catalogName) + val ct = implicitly[ClassTag[C]] + require( + ct.runtimeClass.isInstance(catalog), + s"Expected ${ct.runtimeClass.getName} but got ${catalog.getClass.getName}") + catalog.asInstanceOf[C] } - private def withCleanup(session: SparkSession, table: String)(fn: => Unit): Unit = { + override protected def withTestTableAndViews( + session: SparkSession, + table: String, + views: Seq[String] = Seq.empty)(fn: => Unit): Unit = { try { fn } finally { + views.foreach(v => session.sql(s"DROP VIEW IF EXISTS $v").collect()) session.sql(s"DROP TABLE IF EXISTS $table").collect() - if (table.startsWith("cachingcat")) { - val serverSession = getServerSession(session) - serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat").clearCache() - } - } - } - - // Scenario 1: external writes - test("[connect] repeated sql() reflects session write") { - withSession { session => - withCleanup(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - session.sql(s"INSERT INTO $T VALUES (2, 200)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) - } - } - } - - test("[connect] repeated sql() reflects external write") { - withSession { session => - withCleanup(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - // external writer adds (2, 200) - val serverSession = getServerSession(session) - val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") - val newSchema = StructType.fromDDL("id INT, salary INT") - externalAppend(cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200)) - - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100), Row(2, 200))) - } } } - // Scenario 1 connector w/ cache (external write, caching connector) - test("[connect] connector w/ cache: repeated sql() stale after external write") { - withSession { session => - withCleanup(session, CT) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) + // Connect-specific DataFrame reuse tests: reusing the same DataFrame object across + // external mutations still sees fresh data, because Connect re-sends the plan to the + // server for fresh analysis on every action. - val serverSession = getServerSession(session) - val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") - val newSchema = StructType.fromDDL("id INT, salary INT") - externalAppend(cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200)) - - // Caching connector returns stale table: external write invisible - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // REFRESH TABLE invalidates the connector cache, external write becomes visible - session.sql(s"REFRESH TABLE $CT").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100), Row(2, 200))) - } - } - } - - // Scenario 2: external schema changes - test("[connect] repeated sql() reflects session schema change") { - withSession { session => - withCleanup(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - session.sql(s"ALTER TABLE $T ADD COLUMN new_col INT").collect() - session.sql(s"INSERT INTO $T VALUES (2, 200, -1)").collect() - assertRows( - session.sql(s"SELECT * FROM $T").collect(), - Seq(Row(1, 100, null), Row(2, 200, -1))) - } - } - } - - test("[connect] repeated sql() reflects external schema change") { - withSession { session => - withCleanup(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - // external schema change + data write via catalog API - val serverSession = getServerSession(session) - val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") - val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) - cat.alterTable(ident, addCol) - - val newSchema = StructType.fromDDL("id INT, salary INT, new_col INT") - externalAppend( - cat = cat, - ident = ident, - schema = newSchema, - row = InternalRow(2, 200, -1)) - - assertRows( - session.sql(s"SELECT * FROM $T").collect(), - Seq(Row(1, 100, null), Row(2, 200, -1))) - } - } - } - - // Scenario 2 connector w/ cache (external schema change, caching connector) - test("[connect] connector w/ cache: repeated sql() stale after external schema change") { - withSession { session => - withCleanup(session, CT) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - val serverSession = getServerSession(session) - val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") - val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) - cat.alterTable(ident, addCol) - - val newSchema = StructType.fromDDL("id INT, salary INT, new_col INT") - externalAppend( - cat = cat, - ident = ident, - schema = newSchema, - row = InternalRow(2, 200, -1)) - - // Caching connector returns stale table: external changes invisible - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // REFRESH TABLE invalidates the connector cache, schema change + data visible - session.sql(s"REFRESH TABLE $CT").collect() - assertRows( - session.sql(s"SELECT * FROM $CT").collect(), - Seq(Row(1, 100, null), Row(2, 200, -1))) - } - } - } - - // Scenario 3: drop and recreate table - test("[connect] repeated sql() reflects session drop/recreate") { - withSession { session => - withCleanup(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - session.sql(s"DROP TABLE $T").collect() - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq.empty) - } - } - } - - test("[connect] repeated sql() reflects external drop/recreate") { - withSession { session => - withCleanup(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq(Row(1, 100))) - - // external drop and recreate via catalog API - val serverSession = getServerSession(session) - val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") - cat.dropTable(ident) - cat.createTable( - ident, - Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), - Array.empty, - Collections.emptyMap[String, String]) - - assertRows(session.sql(s"SELECT * FROM $T").collect(), Seq.empty) - } - } - } - - // Scenario 3 connector w/ cache (external drop/recreate, caching connector) - test("[connect] connector w/ cache: repeated sql() stale after external drop/recreate") { - withSession { session => - withCleanup(session, CT) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - val serverSession = getServerSession(session) - val cat = serverCatalog[CachingInMemoryTableCatalog](serverSession, "cachingcat") - cat.dropTable(ident) - cat.createTable( - ident, - Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), - Array.empty, - Collections.emptyMap[String, String]) - - // Caching connector returns stale table: drop/recreate invisible - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq(Row(1, 100))) - - // REFRESH TABLE invalidates the connector cache, new empty table visible - session.sql(s"REFRESH TABLE $CT").collect() - assertRows(session.sql(s"SELECT * FROM $CT").collect(), Seq.empty) - } - } - } - - // DataFrame reuse tests: these test Connect-specific behavior where reusing the same - // DataFrame object across mutations still sees fresh data, because Connect re-sends - // the plan for fresh analysis on every action. + private val T = "testcat.ns1.ns2.tbl" + private val ident = Identifier.of(Array("ns1", "ns2"), "tbl") test("[connect] reused DataFrame reflects external write") { - withSession { session => - withCleanup(session, T) { + withTestSession { session => + withTestTableAndViews(session, T) { session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() val df = session.sql(s"SELECT * FROM $T") - assertRows(df.collect(), Seq(Row(1, 100))) + checkRows(df, Seq(Row(1, 100))) - // external write via catalog API - val serverSession = getServerSession(session) - val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") - val newSchema = StructType.fromDDL("id INT, salary INT") - externalAppend(cat = cat, ident = ident, schema = newSchema, row = InternalRow(2, 200)) + val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") + externalAppend(catalog = catalog, ident = ident, row = InternalRow(2, 200)) // same df object, Connect re-analyzes and sees the new row - assertRows(df.collect(), Seq(Row(1, 100), Row(2, 200))) + checkRows(df, Seq(Row(1, 100), Row(2, 200))) } } } test("[connect] reused DataFrame reflects external schema change") { - withSession { session => - withCleanup(session, T) { + withTestSession { session => + withTestTableAndViews(session, T) { session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() val df = session.sql(s"SELECT * FROM $T") - assertRows(df.collect(), Seq(Row(1, 100))) + checkRows(df, Seq(Row(1, 100))) - // external schema change + write via catalog API - val serverSession = getServerSession(session) - val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") + val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) - cat.alterTable(ident, addCol) + catalog.alterTable(ident, addCol) - val newSchema = StructType.fromDDL("id INT, salary INT, new_col INT") - externalAppend( - cat = cat, - ident = ident, - schema = newSchema, - row = InternalRow(2, 200, -1)) + externalAppend(catalog = catalog, ident = ident, row = InternalRow(2, 200, -1)) // same df object, Connect re-analyzes and sees the new schema - assertRows(df.collect(), Seq(Row(1, 100, null), Row(2, 200, -1))) + checkRows(df, Seq(Row(1, 100, null), Row(2, 200, -1))) } } } test("[connect] reused DataFrame reflects external drop/recreate") { - withSession { session => - withCleanup(session, T) { + withTestSession { session => + withTestTableAndViews(session, T) { session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() val df = session.sql(s"SELECT * FROM $T") - assertRows(df.collect(), Seq(Row(1, 100))) + checkRows(df, Seq(Row(1, 100))) - // external drop and recreate via catalog API - val serverSession = getServerSession(session) - val cat = serverCatalog[InMemoryTableCatalog](serverSession, "testcat") - cat.dropTable(ident) - cat.createTable( + val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") + catalog.dropTable(ident) + catalog.createTable( ident, - Array(Column.create("id", IntegerType), Column.create("salary", IntegerType)), - Array.empty, - Collections.emptyMap[String, String]) + new TableInfo.Builder() + .withColumns(Array( + Column.create("id", IntegerType), + Column.create("salary", IntegerType))) + .build()) // same df object, Connect re-analyzes against the new empty table - assertRows(df.collect(), Seq.empty) + checkRows(df, Seq.empty) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala new file mode 100644 index 0000000000000..a384b66b1fc46 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Column, Identifier, InMemoryTableCatalog, TableChange, TableInfo} +import org.apache.spark.sql.types.IntegerType + +/** + * Shared repeated table access tests for DSv2 tables. These tests verify that repeated + * sql() calls correctly see the latest data, schema, and table identity after session + * writes, external catalog mutations, and table recreation. + * + * NOTE: All `session.sql(...)` calls append `.collect()` because Connect client DataFrames + * are lazy and require an action to trigger execution. In classic mode `.collect()` on DDL + * is a no-op (DDL executes eagerly), so this is harmless. + */ +trait DSv2RepeatedTableAccessTests extends DSv2ExternalMutationTestBase { + + private val T = "testcat.ns1.ns2.tbl" + private val CT = "cachingcat.ns1.ns2.tbl" + private val testIdent = Identifier.of(Array("ns1", "ns2"), "tbl") + + // Scenario 1: data changes via writes + + test(s"${testPrefix}repeated sql() reflects session write") { + withTestSession { session => + withTestTableAndViews(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100))) + + session.sql(s"INSERT INTO $T VALUES (2, 200)").collect() + checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100), Row(2, 200))) + } + } + } + + test(s"${testPrefix}repeated sql() reflects external write") { + withTestSession { session => + withTestTableAndViews(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100))) + + val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") + externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200)) + + checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100), Row(2, 200))) + } + } + } + + test(s"${testPrefix}connector w/ cache: repeated sql() stale after external write") { + withTestSession { session => + withTestTableAndViews(session, CT) { + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100))) + + val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") + externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200)) + + // Caching connector returns stale table: external write invisible + checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100))) + + // REFRESH TABLE invalidates the connector cache, external write becomes visible + session.sql(s"REFRESH TABLE $CT").collect() + checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100), Row(2, 200))) + } + } + } + + // Scenario 2: schema changes + + test(s"${testPrefix}repeated sql() reflects session schema change") { + withTestSession { session => + withTestTableAndViews(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100))) + + session.sql(s"ALTER TABLE $T ADD COLUMN new_col INT").collect() + session.sql(s"INSERT INTO $T VALUES (2, 200, -1)").collect() + checkRows( + session.sql(s"SELECT * FROM $T"), + Seq(Row(1, 100, null), Row(2, 200, -1))) + } + } + } + + test(s"${testPrefix}repeated sql() reflects external schema change") { + withTestSession { session => + withTestTableAndViews(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100))) + + val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") + val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) + catalog.alterTable(testIdent, addCol) + + externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1)) + + checkRows( + session.sql(s"SELECT * FROM $T"), + Seq(Row(1, 100, null), Row(2, 200, -1))) + } + } + } + + test(s"${testPrefix}connector w/ cache: repeated sql() stale after external schema change") { + withTestSession { session => + withTestTableAndViews(session, CT) { + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100))) + + val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") + val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) + catalog.alterTable(testIdent, addCol) + + externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1)) + + // Caching connector returns stale table: external changes invisible + checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100))) + + // REFRESH TABLE invalidates the connector cache, schema change + data visible + session.sql(s"REFRESH TABLE $CT").collect() + checkRows( + session.sql(s"SELECT * FROM $CT"), + Seq(Row(1, 100, null), Row(2, 200, -1))) + } + } + } + + // Scenario 3: drop and recreate table + + test(s"${testPrefix}repeated sql() reflects session drop/recreate") { + withTestSession { session => + withTestTableAndViews(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100))) + + session.sql(s"DROP TABLE $T").collect() + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + checkRows(session.sql(s"SELECT * FROM $T"), Seq.empty) + } + } + } + + test(s"${testPrefix}repeated sql() reflects external drop/recreate") { + withTestSession { session => + withTestTableAndViews(session, T) { + session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100))) + + val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") + catalog.dropTable(testIdent) + catalog.createTable( + testIdent, + new TableInfo.Builder() + .withColumns(Array( + Column.create("id", IntegerType), + Column.create("salary", IntegerType))) + .build()) + + checkRows(session.sql(s"SELECT * FROM $T"), Seq.empty) + } + } + } + + test(s"${testPrefix}connector w/ cache: repeated sql() stale after external drop/recreate") { + withTestSession { session => + withTestTableAndViews(session, CT) { + session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100))) + + val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") + catalog.dropTable(testIdent) + catalog.createTable( + testIdent, + new TableInfo.Builder() + .withColumns(Array( + Column.create("id", IntegerType), + Column.create("salary", IntegerType))) + .build()) + + // Caching connector returns stale table: drop/recreate invisible + checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100))) + + // REFRESH TABLE invalidates the connector cache, new empty table visible + session.sql(s"REFRESH TABLE $CT").collect() + checkRows(session.sql(s"SELECT * FROM $CT"), Seq.empty) + } + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala index 13f8a34554807..139a6c75d793a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala @@ -47,7 +47,8 @@ import org.apache.spark.unsafe.types.UTF8String class DataSourceV2DataFrameSuite extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = false) - with DSv2TempViewWithStoredPlanTests { + with DSv2TempViewWithStoredPlanTests + with DSv2RepeatedTableAccessTests { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import testImplicits._ From 63944a38fd284fd22ac2d5ba5caa86f69a52dcdf Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 08:22:52 +0000 Subject: [PATCH 25/36] Fix stale Scaladoc in DSv2ExternalMutationTestBase, rename ident to testIdent - Update DSv2ExternalMutationTestBase Scaladoc to reference both DSv2TempViewWithStoredPlanTests and DSv2RepeatedTableAccessTests. - Rename private val ident to testIdent in Connect suite for consistency with the shared trait's naming. Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 12 ++++++------ .../sql/connector/DSv2ExternalMutationTestBase.scala | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index 3e1e2a7ec0493..5fc61cdee6211 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -81,7 +81,7 @@ class DataSourceV2RepeatedSQLConnectSuite // server for fresh analysis on every action. private val T = "testcat.ns1.ns2.tbl" - private val ident = Identifier.of(Array("ns1", "ns2"), "tbl") + private val testIdent = Identifier.of(Array("ns1", "ns2"), "tbl") test("[connect] reused DataFrame reflects external write") { withTestSession { session => @@ -93,7 +93,7 @@ class DataSourceV2RepeatedSQLConnectSuite checkRows(df, Seq(Row(1, 100))) val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") - externalAppend(catalog = catalog, ident = ident, row = InternalRow(2, 200)) + externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200)) // same df object, Connect re-analyzes and sees the new row checkRows(df, Seq(Row(1, 100), Row(2, 200))) @@ -112,9 +112,9 @@ class DataSourceV2RepeatedSQLConnectSuite val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) - catalog.alterTable(ident, addCol) + catalog.alterTable(testIdent, addCol) - externalAppend(catalog = catalog, ident = ident, row = InternalRow(2, 200, -1)) + externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1)) // same df object, Connect re-analyzes and sees the new schema checkRows(df, Seq(Row(1, 100, null), Row(2, 200, -1))) @@ -132,9 +132,9 @@ class DataSourceV2RepeatedSQLConnectSuite checkRows(df, Seq(Row(1, 100))) val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") - catalog.dropTable(ident) + catalog.dropTable(testIdent) catalog.createTable( - ident, + testIdent, new TableInfo.Builder() .withColumns(Array( Column.create("id", IntegerType), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2ExternalMutationTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2ExternalMutationTestBase.scala index 2e60c24c4460c..9ecc0d0821492 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2ExternalMutationTestBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2ExternalMutationTestBase.scala @@ -33,8 +33,8 @@ import org.apache.spark.sql.connector.catalog.{BufferedRows, CatalogV2Util, Iden * (where the test session IS the server session) and Connect mode (where the test session * is a Connect client and catalog access requires the server session). * - * Concrete suites override the abstract methods and mix in the test trait - * [[DSv2TempViewWithStoredPlanTests]]. + * Concrete suites override the abstract methods and mix in a test trait such as + * [[DSv2TempViewWithStoredPlanTests]] or [[DSv2RepeatedTableAccessTests]]. */ trait DSv2ExternalMutationTestBase extends QueryTest { From cbb5cf6c26b33b3174f4f30c001c7da4bd5ea5bc Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 08:29:09 +0000 Subject: [PATCH 26/36] Add checkRows comment explaining why sameRows is used instead of checkAnswer Co-authored-by: Isaac --- .../sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index 5fc61cdee6211..2731beaebf51e 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -50,6 +50,11 @@ class DataSourceV2RepeatedSQLConnectSuite override protected def withTestSession(fn: SparkSession => Unit): Unit = withSession(fn) + // Cannot use QueryTest.checkAnswer directly because it accesses df.logicalPlan, + // df.queryExecution, and df.materializedRdd, which are not available on Connect *client* + // DataFrames (they throw ConnectClientUnsupportedErrors). Instead, collect the rows and + // delegate to QueryTest.sameRows, which is the same value-based, order-agnostic comparison + // that checkAnswer uses internally. override protected def checkRows(df: => DataFrame, expected: Seq[Row]): Unit = QueryTest.sameRows(expected, df.collect().toSeq).foreach(msg => fail(msg)) From 8f2f31d6fe62ebc930ec8d0fabac736976049f0a Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 09:56:40 +0000 Subject: [PATCH 27/36] Remove redundant Connect-only DataFrame reuse tests The shared trait's repeated sql() tests already exercise Connect's re-analysis behavior. The DF reuse tests were testing the same code path since Connect re-sends the plan on every action regardless. Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 85 +------------------ 1 file changed, 4 insertions(+), 81 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala index 2731beaebf51e..61a5ff7e8fc93 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala @@ -21,19 +21,13 @@ import scala.reflect.ClassTag import org.apache.spark.SparkConf import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession} -import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.connector.DSv2RepeatedTableAccessTests -import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Column, Identifier, InMemoryTableCatalog, TableCatalog, TableChange, TableInfo} -import org.apache.spark.sql.types.IntegerType +import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, InMemoryTableCatalog, TableCatalog} /** - * Connect-mode runner for [[DSv2RepeatedTableAccessTests]]. All shared test logic lives in the - * trait; this class provides the Connect-specific session, catalog access, and result comparison. - * - * The "DataFrame reuse" tests at the bottom are Connect-specific: reusing the same DataFrame - * object across external mutations still sees fresh data, because Connect re-sends the plan - * to the server for fresh analysis on every action. In classic Spark, the resolved plan is - * captured at DataFrame creation time, so reusing a DF after schema changes would fail. + * Connect-mode runner for [[DSv2RepeatedTableAccessTests]]. All test logic lives in the shared + * trait; this class only provides the Connect-specific session, catalog access, and result + * comparison. */ class DataSourceV2RepeatedSQLConnectSuite extends SparkConnectServerTest @@ -80,75 +74,4 @@ class DataSourceV2RepeatedSQLConnectSuite session.sql(s"DROP TABLE IF EXISTS $table").collect() } } - - // Connect-specific DataFrame reuse tests: reusing the same DataFrame object across - // external mutations still sees fresh data, because Connect re-sends the plan to the - // server for fresh analysis on every action. - - private val T = "testcat.ns1.ns2.tbl" - private val testIdent = Identifier.of(Array("ns1", "ns2"), "tbl") - - test("[connect] reused DataFrame reflects external write") { - withTestSession { session => - withTestTableAndViews(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - - val df = session.sql(s"SELECT * FROM $T") - checkRows(df, Seq(Row(1, 100))) - - val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") - externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200)) - - // same df object, Connect re-analyzes and sees the new row - checkRows(df, Seq(Row(1, 100), Row(2, 200))) - } - } - } - - test("[connect] reused DataFrame reflects external schema change") { - withTestSession { session => - withTestTableAndViews(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - - val df = session.sql(s"SELECT * FROM $T") - checkRows(df, Seq(Row(1, 100))) - - val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") - val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) - catalog.alterTable(testIdent, addCol) - - externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1)) - - // same df object, Connect re-analyzes and sees the new schema - checkRows(df, Seq(Row(1, 100, null), Row(2, 200, -1))) - } - } - } - - test("[connect] reused DataFrame reflects external drop/recreate") { - withTestSession { session => - withTestTableAndViews(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - - val df = session.sql(s"SELECT * FROM $T") - checkRows(df, Seq(Row(1, 100))) - - val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") - catalog.dropTable(testIdent) - catalog.createTable( - testIdent, - new TableInfo.Builder() - .withColumns(Array( - Column.create("id", IntegerType), - Column.create("salary", IntegerType))) - .build()) - - // same df object, Connect re-analyzes against the new empty table - checkRows(df, Seq.empty) - } - } - } } From ad9b765a6d19a489e6bfd982162dd1c95822e9b6 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 09:58:04 +0000 Subject: [PATCH 28/36] Consolidate Connect DSv2 suites: mix DSv2RepeatedTableAccessTests into existing suite Instead of a separate DataSourceV2RepeatedSQLConnectSuite with identical boilerplate, add 'with DSv2RepeatedTableAccessTests' to the existing DataSourceV2TempViewConnectSuite. This mirrors how the classic DataSourceV2DataFrameSuite mixes in both traits. Co-authored-by: Isaac --- .../DataSourceV2RepeatedSQLConnectSuite.scala | 77 ------------------- .../DataSourceV2TempViewConnectSuite.scala | 9 ++- 2 files changed, 5 insertions(+), 81 deletions(-) delete mode 100644 sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala deleted file mode 100644 index 61a5ff7e8fc93..0000000000000 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2RepeatedSQLConnectSuite.scala +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connect - -import scala.reflect.ClassTag - -import org.apache.spark.SparkConf -import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession} -import org.apache.spark.sql.connector.DSv2RepeatedTableAccessTests -import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, InMemoryTableCatalog, TableCatalog} - -/** - * Connect-mode runner for [[DSv2RepeatedTableAccessTests]]. All test logic lives in the shared - * trait; this class only provides the Connect-specific session, catalog access, and result - * comparison. - */ -class DataSourceV2RepeatedSQLConnectSuite - extends SparkConnectServerTest - with DSv2RepeatedTableAccessTests { - - override def sparkConf: SparkConf = super.sparkConf - .set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName) - .set("spark.sql.catalog.testcat.copyOnLoad", "true") - .set("spark.sql.catalog.cachingcat", classOf[CachingInMemoryTableCatalog].getName) - .set("spark.sql.catalog.cachingcat.copyOnLoad", "true") - - override protected def testPrefix: String = "[connect] " - - override protected def withTestSession(fn: SparkSession => Unit): Unit = - withSession(fn) - - // Cannot use QueryTest.checkAnswer directly because it accesses df.logicalPlan, - // df.queryExecution, and df.materializedRdd, which are not available on Connect *client* - // DataFrames (they throw ConnectClientUnsupportedErrors). Instead, collect the rows and - // delegate to QueryTest.sameRows, which is the same value-based, order-agnostic comparison - // that checkAnswer uses internally. - override protected def checkRows(df: => DataFrame, expected: Seq[Row]): Unit = - QueryTest.sameRows(expected, df.collect().toSeq).foreach(msg => fail(msg)) - - override protected def getTableCatalog[C <: TableCatalog: ClassTag]( - session: SparkSession, - catalogName: String): C = { - val serverSession = getServerSession(session) - val catalog = serverSession.sessionState.catalogManager.catalog(catalogName) - val ct = implicitly[ClassTag[C]] - require( - ct.runtimeClass.isInstance(catalog), - s"Expected ${ct.runtimeClass.getName} but got ${catalog.getClass.getName}") - catalog.asInstanceOf[C] - } - - override protected def withTestTableAndViews( - session: SparkSession, - table: String, - views: Seq[String] = Seq.empty)(fn: => Unit): Unit = { - try { fn } - finally { - views.foreach(v => session.sql(s"DROP VIEW IF EXISTS $v").collect()) - session.sql(s"DROP TABLE IF EXISTS $table").collect() - } - } -} diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2TempViewConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2TempViewConnectSuite.scala index ce947379b2330..0813441af7dfb 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2TempViewConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2TempViewConnectSuite.scala @@ -21,17 +21,18 @@ import scala.reflect.ClassTag import org.apache.spark.SparkConf import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession} -import org.apache.spark.sql.connector.DSv2TempViewWithStoredPlanTests +import org.apache.spark.sql.connector.{DSv2RepeatedTableAccessTests, DSv2TempViewWithStoredPlanTests} import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, InMemoryTableCatalog, TableCatalog} /** - * Connect-mode runner for [[DSv2TempViewWithStoredPlanTests]]. All test logic lives in the shared - * trait; this class only provides the Connect-specific session, catalog access, and result + * Connect-mode runner for DSv2 external mutation test traits. All test logic lives in the shared + * traits; this class only provides the Connect-specific session, catalog access, and result * comparison. */ class DataSourceV2TempViewConnectSuite extends SparkConnectServerTest - with DSv2TempViewWithStoredPlanTests { + with DSv2TempViewWithStoredPlanTests + with DSv2RepeatedTableAccessTests { override def sparkConf: SparkConf = super.sparkConf .set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName) From 0f2d4f84a314daf605476a3bf88f4e76763e62d0 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 09:59:47 +0000 Subject: [PATCH 29/36] Rename DataSourceV2TempViewConnectSuite to DataSourceV2DataFrameConnectSuite The suite now runs both temp view and repeated table access tests, so the old name was misleading. The new name mirrors the classic DataSourceV2DataFrameSuite. Co-authored-by: Isaac --- ...nnectSuite.scala => DataSourceV2DataFrameConnectSuite.scala} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename sql/connect/server/src/test/scala/org/apache/spark/sql/connect/{DataSourceV2TempViewConnectSuite.scala => DataSourceV2DataFrameConnectSuite.scala} (98%) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2TempViewConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala similarity index 98% rename from sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2TempViewConnectSuite.scala rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala index 0813441af7dfb..dd57639c7f2aa 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2TempViewConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, InMe * traits; this class only provides the Connect-specific session, catalog access, and result * comparison. */ -class DataSourceV2TempViewConnectSuite +class DataSourceV2DataFrameConnectSuite extends SparkConnectServerTest with DSv2TempViewWithStoredPlanTests with DSv2RepeatedTableAccessTests { From c84e63d78d80539512c1e2552428f71d936f8541 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 10:00:04 +0000 Subject: [PATCH 30/36] Fix Scaladoc: describe both traits the Connect suite runs Co-authored-by: Isaac --- .../sql/connect/DataSourceV2DataFrameConnectSuite.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala index dd57639c7f2aa..a97821d16204a 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala @@ -25,9 +25,12 @@ import org.apache.spark.sql.connector.{DSv2RepeatedTableAccessTests, DSv2TempVie import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, InMemoryTableCatalog, TableCatalog} /** - * Connect-mode runner for DSv2 external mutation test traits. All test logic lives in the shared - * traits; this class only provides the Connect-specific session, catalog access, and result - * comparison. + * Connect-mode counterpart of [[org.apache.spark.sql.connector.DataSourceV2DataFrameSuite]]. + * + * Runs DSv2 temp view tests ([[DSv2TempViewWithStoredPlanTests]]) and repeated table access + * tests ([[DSv2RepeatedTableAccessTests]]) under Spark Connect. All test logic lives in the + * shared traits; this class only provides the Connect-specific session, catalog access, and + * result comparison. */ class DataSourceV2DataFrameConnectSuite extends SparkConnectServerTest From 9c0ac7dcf3b6ea06abe49e767536d1721c4371f5 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 10:02:02 +0000 Subject: [PATCH 31/36] Fix DSv2RepeatedTableAccessTests Scaladoc to match design doc The tests are about repeated table access with external changes, not generic session writes. Updated Scaladoc to describe all three external mutation scenarios and the caching-connector variant. Co-authored-by: Isaac --- .../connector/DSv2RepeatedTableAccessTests.scala | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala index a384b66b1fc46..8b5bbd1693fe9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala @@ -23,9 +23,16 @@ import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Colu import org.apache.spark.sql.types.IntegerType /** - * Shared repeated table access tests for DSv2 tables. These tests verify that repeated - * sql() calls correctly see the latest data, schema, and table identity after session - * writes, external catalog mutations, and table recreation. + * Shared repeated table access tests with external changes for DSv2 tables. These tests verify + * that repeated `sql()` calls correctly reflect external mutations made via the catalog API: + * + * - Scenario 1 (external writes): external data appended via the catalog API is visible. + * - Scenario 2 (external schema changes): external ADD COLUMN via the catalog API is visible. + * - Scenario 3 (external drop/recreate): external drop and recreate via the catalog API + * resolves to the new empty table. + * + * Each scenario includes a session-write baseline, an external-write test, and a + * caching-connector variant showing stale results until `REFRESH TABLE`. * * NOTE: All `session.sql(...)` calls append `.collect()` because Connect client DataFrames * are lazy and require an action to trigger execution. In classic mode `.collect()` on DDL From 465892ded85dd6534e519aba0d7628d533d637bd Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 10:06:04 +0000 Subject: [PATCH 32/36] Fix Scaladoc: session-write -> session mutation, external-write -> external mutation Co-authored-by: Isaac --- .../spark/sql/connector/DSv2RepeatedTableAccessTests.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala index 8b5bbd1693fe9..11fa7031ac99b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.types.IntegerType * - Scenario 3 (external drop/recreate): external drop and recreate via the catalog API * resolves to the new empty table. * - * Each scenario includes a session-write baseline, an external-write test, and a + * Each scenario includes a session mutation baseline, an external mutation test, and a * caching-connector variant showing stale results until `REFRESH TABLE`. * * NOTE: All `session.sql(...)` calls append `.collect()` because Connect client DataFrames From 002cd83c5ec06c938173acfae05eab58eb235037 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 11:12:37 +0000 Subject: [PATCH 33/36] Address review: hoist shared constants, fix Scaladoc scope and DDL/DML note - Hoist T, CT, testIdent to DSv2ExternalMutationTestBase as protected vals (testTable, cachingTestTable, testIdent) so both consumer traits share the same namespace fixture. - Fix Scaladoc lead sentence: "session and external mutations" (not just external). - Fix NOTE: "DDL / DML" (not just DDL, since .collect() is also appended to INSERT and REFRESH TABLE). Co-authored-by: Isaac --- .../DSv2ExternalMutationTestBase.scala | 9 + .../DSv2RepeatedTableAccessTests.scala | 126 +++++++------ .../DSv2TempViewWithStoredPlanTests.scala | 172 +++++++++--------- 3 files changed, 156 insertions(+), 151 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2ExternalMutationTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2ExternalMutationTestBase.scala index 9ecc0d0821492..4d16339e09de9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2ExternalMutationTestBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2ExternalMutationTestBase.scala @@ -38,6 +38,15 @@ import org.apache.spark.sql.connector.catalog.{BufferedRows, CatalogV2Util, Iden */ trait DSv2ExternalMutationTestBase extends QueryTest { + /** Fully qualified table name under the non-caching test catalog. */ + protected val testTable: String = "testcat.ns1.ns2.tbl" + + /** Fully qualified table name under the caching test catalog. */ + protected val cachingTestTable: String = "cachingcat.ns1.ns2.tbl" + + /** Identifier for the test table within its namespace. */ + protected val testIdent: Identifier = Identifier.of(Array("ns1", "ns2"), "tbl") + /** Prefix for test names, e.g. "" or "[connect] ". */ protected def testPrefix: String diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala index 11fa7031ac99b..533d10a949796 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2RepeatedTableAccessTests.scala @@ -19,12 +19,12 @@ package org.apache.spark.sql.connector import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Column, Identifier, InMemoryTableCatalog, TableChange, TableInfo} +import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Column, InMemoryTableCatalog, TableChange, TableInfo} import org.apache.spark.sql.types.IntegerType /** * Shared repeated table access tests with external changes for DSv2 tables. These tests verify - * that repeated `sql()` calls correctly reflect external mutations made via the catalog API: + * that repeated `sql()` calls correctly reflect both session and external mutations: * * - Scenario 1 (external writes): external data appended via the catalog API is visible. * - Scenario 2 (external schema changes): external ADD COLUMN via the catalog API is visible. @@ -35,61 +35,59 @@ import org.apache.spark.sql.types.IntegerType * caching-connector variant showing stale results until `REFRESH TABLE`. * * NOTE: All `session.sql(...)` calls append `.collect()` because Connect client DataFrames - * are lazy and require an action to trigger execution. In classic mode `.collect()` on DDL - * is a no-op (DDL executes eagerly), so this is harmless. + * are lazy and require an action to trigger execution. In classic mode `.collect()` on + * DDL / DML is a no-op (these execute eagerly), so this is harmless. */ trait DSv2RepeatedTableAccessTests extends DSv2ExternalMutationTestBase { - private val T = "testcat.ns1.ns2.tbl" - private val CT = "cachingcat.ns1.ns2.tbl" - private val testIdent = Identifier.of(Array("ns1", "ns2"), "tbl") + // Uses testTable, cachingTestTable, and testIdent from DSv2ExternalMutationTestBase. // Scenario 1: data changes via writes test(s"${testPrefix}repeated sql() reflects session write") { withTestSession { session => - withTestTableAndViews(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100))) + withTestTableAndViews(session, testTable) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100))) - session.sql(s"INSERT INTO $T VALUES (2, 200)").collect() - checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100), Row(2, 200))) + session.sql(s"INSERT INTO $testTable VALUES (2, 200)").collect() + checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100), Row(2, 200))) } } } test(s"${testPrefix}repeated sql() reflects external write") { withTestSession { session => - withTestTableAndViews(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100))) + withTestTableAndViews(session, testTable) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100))) val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200)) - checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100), Row(2, 200))) + checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100), Row(2, 200))) } } } test(s"${testPrefix}connector w/ cache: repeated sql() stale after external write") { withTestSession { session => - withTestTableAndViews(session, CT) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100))) + withTestTableAndViews(session, cachingTestTable) { + session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100))) val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200)) // Caching connector returns stale table: external write invisible - checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100))) + checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100))) // REFRESH TABLE invalidates the connector cache, external write becomes visible - session.sql(s"REFRESH TABLE $CT").collect() - checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100), Row(2, 200))) + session.sql(s"REFRESH TABLE $cachingTestTable").collect() + checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100), Row(2, 200))) } } } @@ -98,15 +96,15 @@ trait DSv2RepeatedTableAccessTests extends DSv2ExternalMutationTestBase { test(s"${testPrefix}repeated sql() reflects session schema change") { withTestSession { session => - withTestTableAndViews(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100))) + withTestTableAndViews(session, testTable) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100))) - session.sql(s"ALTER TABLE $T ADD COLUMN new_col INT").collect() - session.sql(s"INSERT INTO $T VALUES (2, 200, -1)").collect() + session.sql(s"ALTER TABLE $testTable ADD COLUMN new_col INT").collect() + session.sql(s"INSERT INTO $testTable VALUES (2, 200, -1)").collect() checkRows( - session.sql(s"SELECT * FROM $T"), + session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100, null), Row(2, 200, -1))) } } @@ -114,10 +112,10 @@ trait DSv2RepeatedTableAccessTests extends DSv2ExternalMutationTestBase { test(s"${testPrefix}repeated sql() reflects external schema change") { withTestSession { session => - withTestTableAndViews(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100))) + withTestTableAndViews(session, testTable) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100))) val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) @@ -126,7 +124,7 @@ trait DSv2RepeatedTableAccessTests extends DSv2ExternalMutationTestBase { externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1)) checkRows( - session.sql(s"SELECT * FROM $T"), + session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100, null), Row(2, 200, -1))) } } @@ -134,10 +132,10 @@ trait DSv2RepeatedTableAccessTests extends DSv2ExternalMutationTestBase { test(s"${testPrefix}connector w/ cache: repeated sql() stale after external schema change") { withTestSession { session => - withTestTableAndViews(session, CT) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100))) + withTestTableAndViews(session, cachingTestTable) { + session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100))) val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true) @@ -146,12 +144,12 @@ trait DSv2RepeatedTableAccessTests extends DSv2ExternalMutationTestBase { externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1)) // Caching connector returns stale table: external changes invisible - checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100))) + checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100))) // REFRESH TABLE invalidates the connector cache, schema change + data visible - session.sql(s"REFRESH TABLE $CT").collect() + session.sql(s"REFRESH TABLE $cachingTestTable").collect() checkRows( - session.sql(s"SELECT * FROM $CT"), + session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100, null), Row(2, 200, -1))) } } @@ -161,24 +159,24 @@ trait DSv2RepeatedTableAccessTests extends DSv2ExternalMutationTestBase { test(s"${testPrefix}repeated sql() reflects session drop/recreate") { withTestSession { session => - withTestTableAndViews(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100))) - - session.sql(s"DROP TABLE $T").collect() - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - checkRows(session.sql(s"SELECT * FROM $T"), Seq.empty) + withTestTableAndViews(session, testTable) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100))) + + session.sql(s"DROP TABLE $testTable").collect() + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + checkRows(session.sql(s"SELECT * FROM $testTable"), Seq.empty) } } } test(s"${testPrefix}repeated sql() reflects external drop/recreate") { withTestSession { session => - withTestTableAndViews(session, T) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100)").collect() - checkRows(session.sql(s"SELECT * FROM $T"), Seq(Row(1, 100))) + withTestTableAndViews(session, testTable) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100))) val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") catalog.dropTable(testIdent) @@ -190,17 +188,17 @@ trait DSv2RepeatedTableAccessTests extends DSv2ExternalMutationTestBase { Column.create("salary", IntegerType))) .build()) - checkRows(session.sql(s"SELECT * FROM $T"), Seq.empty) + checkRows(session.sql(s"SELECT * FROM $testTable"), Seq.empty) } } } test(s"${testPrefix}connector w/ cache: repeated sql() stale after external drop/recreate") { withTestSession { session => - withTestTableAndViews(session, CT) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100)").collect() - checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100))) + withTestTableAndViews(session, cachingTestTable) { + session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100)").collect() + checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100))) val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") catalog.dropTable(testIdent) @@ -213,11 +211,11 @@ trait DSv2RepeatedTableAccessTests extends DSv2ExternalMutationTestBase { .build()) // Caching connector returns stale table: drop/recreate invisible - checkRows(session.sql(s"SELECT * FROM $CT"), Seq(Row(1, 100))) + checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100))) // REFRESH TABLE invalidates the connector cache, new empty table visible - session.sql(s"REFRESH TABLE $CT").collect() - checkRows(session.sql(s"SELECT * FROM $CT"), Seq.empty) + session.sql(s"REFRESH TABLE $cachingTestTable").collect() + checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq.empty) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala index eb40e3ac056fd..ff235f7836c80 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.connector import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Column, Identifier, InMemoryTableCatalog, TableChange, TableInfo} +import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Column, InMemoryTableCatalog, TableChange, TableInfo} import org.apache.spark.sql.types.{IntegerType, LongType, StringType} /** @@ -33,21 +33,19 @@ import org.apache.spark.sql.types.{IntegerType, LongType, StringType} */ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { - private val T = "testcat.ns1.ns2.tbl" - private val CT = "cachingcat.ns1.ns2.tbl" - private val testIdent = Identifier.of(Array("ns1", "ns2"), "tbl") + // Uses testTable, cachingTestTable, and testIdent from DSv2ExternalMutationTestBase. // Scenario 1.1 (session write) test(s"${testPrefix}temp view with stored plan reflects session write") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) - session.sql(s"INSERT INTO $T VALUES (2, 200)").collect() + session.sql(s"INSERT INTO $testTable VALUES (2, 200)").collect() checkRows(session.table("v"), Seq(Row(1, 100), Row(2, 200))) } } @@ -56,9 +54,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 1.2 (external write) test(s"${testPrefix}temp view with stored plan reflects external write") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -74,9 +72,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 1.2 connector w/ cache (external write, caching connector) test(s"${testPrefix}connector w/ cache: temp view stale after external write") { withTestSession { session => - withTestTableAndViews(session, CT, Seq("v")) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, cachingTestTable, Seq("v")) { + session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() session.table(CT).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -88,7 +86,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { checkRows(session.table("v"), Seq(Row(1, 100))) // REFRESH TABLE invalidates the connector cache, external write becomes visible - session.sql(s"REFRESH TABLE $CT").collect() + session.sql(s"REFRESH TABLE $cachingTestTable").collect() checkRows(session.table("v"), Seq(Row(1, 100), Row(2, 200))) } } @@ -97,15 +95,15 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 2.1 (session ADD COLUMN) test(s"${testPrefix}temp view with stored plan preserves schema after session ADD COLUMN") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) - session.sql(s"ALTER TABLE $T ADD COLUMN new_column INT").collect() - session.sql(s"INSERT INTO $T VALUES (2, 200, -1)").collect() + session.sql(s"ALTER TABLE $testTable ADD COLUMN new_column INT").collect() + session.sql(s"INSERT INTO $testTable VALUES (2, 200, -1)").collect() // view preserves original 2-column schema, filter still applied checkRows(session.table("v"), Seq(Row(1, 100), Row(2, 200))) @@ -116,9 +114,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 2.2 (external ADD COLUMN) test(s"${testPrefix}temp view with stored plan preserves schema after external ADD COLUMN") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -139,9 +137,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 2.2 connector w/ cache (external ADD COLUMN, caching connector) test(s"${testPrefix}connector w/ cache: temp view stale after external ADD COLUMN") { withTestSession { session => - withTestTableAndViews(session, CT, Seq("v")) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, cachingTestTable, Seq("v")) { + session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() session.table(CT).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -156,7 +154,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { checkRows(session.table("v"), Seq(Row(1, 100))) // REFRESH TABLE invalidates the connector cache, view preserves original 2-column schema - session.sql(s"REFRESH TABLE $CT").collect() + session.sql(s"REFRESH TABLE $cachingTestTable").collect() checkRows(session.table("v"), Seq(Row(1, 100), Row(2, 200))) } } @@ -165,14 +163,14 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 3.1 (session column removal) test(s"${testPrefix}temp view with stored plan detects session column removal") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) - session.sql(s"ALTER TABLE $T DROP COLUMN salary").collect() + session.sql(s"ALTER TABLE $testTable DROP COLUMN salary").collect() checkError( exception = intercept[AnalysisException] { session.table("v").collect() }, @@ -189,9 +187,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 3.2 (external column removal) test(s"${testPrefix}temp view with stored plan detects external column removal") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -215,9 +213,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 3.2 connector w/ cache (external column removal, caching connector) test(s"${testPrefix}connector w/ cache: temp view stale after external column removal") { withTestSession { session => - withTestTableAndViews(session, CT, Seq("v")) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, cachingTestTable, Seq("v")) { + session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() session.table(CT).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -230,7 +228,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { checkRows(session.table("v"), Seq(Row(1, 100))) // REFRESH TABLE invalidates the connector cache, column removal detected - session.sql(s"REFRESH TABLE $CT").collect() + session.sql(s"REFRESH TABLE $cachingTestTable").collect() checkError( exception = intercept[AnalysisException] { session.table("v").collect() }, condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION", @@ -246,9 +244,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 4.1 (session drop and recreate table) test(s"${testPrefix}temp view with stored plan resolves to session-recreated table") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -256,8 +254,8 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") val originalTableId = catalog.loadTable(testIdent).id - session.sql(s"DROP TABLE $T").collect() - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() + session.sql(s"DROP TABLE $testTable").collect() + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() val newTableId = catalog.loadTable(testIdent).id assert(originalTableId != newTableId) @@ -265,7 +263,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // view resolves to the new empty table checkRows(session.table("v"), Seq.empty) - session.sql(s"INSERT INTO $T VALUES (2, 200)").collect() + session.sql(s"INSERT INTO $testTable VALUES (2, 200)").collect() checkRows(session.table("v"), Seq(Row(2, 200))) } } @@ -274,9 +272,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 4.2 (external drop and recreate table) test(s"${testPrefix}temp view with stored plan resolves to externally recreated table") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -299,7 +297,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // view resolves to the new empty table checkRows(session.table("v"), Seq.empty) - session.sql(s"INSERT INTO $T VALUES (2, 200)").collect() + session.sql(s"INSERT INTO $testTable VALUES (2, 200)").collect() checkRows(session.table("v"), Seq(Row(2, 200))) } } @@ -308,9 +306,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 4.2 connector w/ cache (external drop/recreate, caching connector) test(s"${testPrefix}connector w/ cache: temp view stale after external drop/recreate") { withTestSession { session => - withTestTableAndViews(session, CT, Seq("v")) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, cachingTestTable, Seq("v")) { + session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() session.table(CT).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -329,7 +327,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { checkRows(session.table("v"), Seq(Row(1, 100))) // REFRESH TABLE invalidates the connector cache, view resolves to new empty table - session.sql(s"REFRESH TABLE $CT").collect() + session.sql(s"REFRESH TABLE $cachingTestTable").collect() checkRows(session.table("v"), Seq.empty) } } @@ -339,9 +337,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { test(s"${testPrefix}temp view with stored plan after session drop and re-add column same type" + " with unfiltered view") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v", "v_no_filter", "v_filter_is_null")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v", "v_no_filter", "v_filter_is_null")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") session.table(T).createOrReplaceTempView("v_no_filter") @@ -351,8 +349,8 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { checkRows(session.table("v_filter_is_null"), Seq.empty) // drop and re-add column with same name and type - session.sql(s"ALTER TABLE $T DROP COLUMN salary").collect() - session.sql(s"ALTER TABLE $T ADD COLUMN salary INT").collect() + session.sql(s"ALTER TABLE $testTable DROP COLUMN salary").collect() + session.sql(s"ALTER TABLE $testTable ADD COLUMN salary INT").collect() // salary values are now null, so the filtered view returns nothing checkRows(session.table("v"), Seq.empty) @@ -368,9 +366,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { test(s"${testPrefix}temp view with stored plan after external drop and re-add column " + "same type") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v", "v_no_filter", "v_filter_is_null")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v", "v_no_filter", "v_filter_is_null")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") session.table(T).createOrReplaceTempView("v_no_filter") @@ -399,9 +397,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { test(s"${testPrefix}connector w/ cache: temp view stale after external drop/re-add column " + "same type") { withTestSession { session => - withTestTableAndViews(session, CT, Seq("v")) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, cachingTestTable, Seq("v")) { + session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() session.table(CT).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -415,7 +413,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { checkRows(session.table("v"), Seq(Row(1, 100))) // REFRESH TABLE invalidates the connector cache, salary values are null - session.sql(s"REFRESH TABLE $CT").collect() + session.sql(s"REFRESH TABLE $cachingTestTable").collect() checkRows(session.table("v"), Seq.empty) } } @@ -424,15 +422,15 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 6.1 (session drop and re-add column with different type) test(s"${testPrefix}temp view with stored plan detects session column type change") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) - session.sql(s"ALTER TABLE $T DROP COLUMN salary").collect() - session.sql(s"ALTER TABLE $T ADD COLUMN salary STRING").collect() + session.sql(s"ALTER TABLE $testTable DROP COLUMN salary").collect() + session.sql(s"ALTER TABLE $testTable ADD COLUMN salary STRING").collect() checkError( exception = intercept[AnalysisException] { session.table("v").collect() }, @@ -449,9 +447,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 6.2 (external drop and re-add column with different type) test(s"${testPrefix}temp view with stored plan detects external column type change") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -476,9 +474,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 6.2 connector w/ cache (external column type change, caching connector) test(s"${testPrefix}connector w/ cache: temp view stale after external column type change") { withTestSession { session => - withTestTableAndViews(session, CT, Seq("v")) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, cachingTestTable, Seq("v")) { + session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() session.table(CT).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -492,7 +490,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { checkRows(session.table("v"), Seq(Row(1, 100))) // REFRESH TABLE invalidates the connector cache, type change detected - session.sql(s"REFRESH TABLE $CT").collect() + session.sql(s"REFRESH TABLE $cachingTestTable").collect() checkError( exception = intercept[AnalysisException] { session.table("v").collect() }, condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION", @@ -508,14 +506,14 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 7.1 (session type widening from INT to BIGINT) test(s"${testPrefix}temp view with stored plan detects session type widening") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) - session.sql(s"ALTER TABLE $T ALTER COLUMN salary TYPE LONG").collect() + session.sql(s"ALTER TABLE $testTable ALTER COLUMN salary TYPE LONG").collect() checkError( exception = intercept[AnalysisException] { session.table("v").collect() }, @@ -532,9 +530,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 7.2 (external type widening from INT to BIGINT) test(s"${testPrefix}temp view with stored plan detects external type widening") { withTestSession { session => - withTestTableAndViews(session, T, Seq("v")) { - session.sql(s"CREATE TABLE $T (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $T VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, testTable, Seq("v")) { + session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() session.table(T).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -558,9 +556,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { // Scenario 7.2 connector w/ cache (external type widening, caching connector) test(s"${testPrefix}connector w/ cache: temp view stale after external type widening") { withTestSession { session => - withTestTableAndViews(session, CT, Seq("v")) { - session.sql(s"CREATE TABLE $CT (id INT, salary INT) USING foo").collect() - session.sql(s"INSERT INTO $CT VALUES (1, 100), (10, 1000)").collect() + withTestTableAndViews(session, cachingTestTable, Seq("v")) { + session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() + session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() session.table(CT).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) @@ -573,7 +571,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { checkRows(session.table("v"), Seq(Row(1, 100))) // REFRESH TABLE invalidates the connector cache, type change detected - session.sql(s"REFRESH TABLE $CT").collect() + session.sql(s"REFRESH TABLE $cachingTestTable").collect() checkError( exception = intercept[AnalysisException] { session.table("v").collect() }, condition = "INCOMPATIBLE_COLUMN_CHANGES_AFTER_VIEW_WITH_PLAN_CREATION", From 561d290ebbcf96f431e241d0795d7607eb31ec32 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 13:56:22 +0000 Subject: [PATCH 34/36] Fix compilation: replace bare T/CT refs with testTable/cachingTestTable The hoist of constants to DSv2ExternalMutationTestBase updated string interpolations ($T/$CT) but missed bare references in session.table(T) and session.table(CT) calls. Co-authored-by: Isaac --- .../DSv2TempViewWithStoredPlanTests.scala | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala index ff235f7836c80..72ec550f60022 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala @@ -42,7 +42,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) session.sql(s"INSERT INTO $testTable VALUES (2, 200)").collect() @@ -58,7 +58,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") @@ -76,7 +76,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() - session.table(CT).filter("salary < 999").createOrReplaceTempView("v") + session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") @@ -99,7 +99,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) session.sql(s"ALTER TABLE $testTable ADD COLUMN new_column INT").collect() @@ -118,7 +118,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) // external schema change via catalog API @@ -141,7 +141,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() - session.table(CT).filter("salary < 999").createOrReplaceTempView("v") + session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") @@ -167,7 +167,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) session.sql(s"ALTER TABLE $testTable DROP COLUMN salary").collect() @@ -191,7 +191,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") @@ -217,7 +217,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() - session.table(CT).filter("salary < 999").createOrReplaceTempView("v") + session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") @@ -248,7 +248,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") @@ -276,7 +276,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") @@ -310,7 +310,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() - session.table(CT).filter("salary < 999").createOrReplaceTempView("v") + session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") @@ -341,9 +341,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") - session.table(T).createOrReplaceTempView("v_no_filter") - session.table(T).filter("salary IS NULL").createOrReplaceTempView("v_filter_is_null") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).createOrReplaceTempView("v_no_filter") + session.table(testTable).filter("salary IS NULL").createOrReplaceTempView("v_filter_is_null") checkRows(session.table("v"), Seq(Row(1, 100))) checkRows(session.table("v_no_filter"), Seq(Row(1, 100), Row(10, 1000))) checkRows(session.table("v_filter_is_null"), Seq.empty) @@ -370,9 +370,9 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") - session.table(T).createOrReplaceTempView("v_no_filter") - session.table(T).filter("salary IS NULL").createOrReplaceTempView("v_filter_is_null") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).createOrReplaceTempView("v_no_filter") + session.table(testTable).filter("salary IS NULL").createOrReplaceTempView("v_filter_is_null") checkRows(session.table("v"), Seq(Row(1, 100))) checkRows(session.table("v_no_filter"), Seq(Row(1, 100), Row(10, 1000))) checkRows(session.table("v_filter_is_null"), Seq.empty) @@ -401,7 +401,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() - session.table(CT).filter("salary < 999").createOrReplaceTempView("v") + session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") @@ -426,7 +426,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) session.sql(s"ALTER TABLE $testTable DROP COLUMN salary").collect() @@ -451,7 +451,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") @@ -478,7 +478,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() - session.table(CT).filter("salary < 999").createOrReplaceTempView("v") + session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") @@ -510,7 +510,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) session.sql(s"ALTER TABLE $testTable ALTER COLUMN salary TYPE LONG").collect() @@ -534,7 +534,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $testTable VALUES (1, 100), (10, 1000)").collect() - session.table(T).filter("salary < 999").createOrReplaceTempView("v") + session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat") @@ -560,7 +560,7 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect() session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100), (10, 1000)").collect() - session.table(CT).filter("salary < 999").createOrReplaceTempView("v") + session.table(cachingTestTable).filter("salary < 999").createOrReplaceTempView("v") checkRows(session.table("v"), Seq(Row(1, 100))) val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat") From a9cd1ba7b58e8fa6d6edb61f276aa4823aa3f554 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 15:11:12 +0000 Subject: [PATCH 35/36] Fix scalastyle: break long lines in DSv2TempViewWithStoredPlanTests Co-authored-by: Isaac --- .../sql/connector/DSv2TempViewWithStoredPlanTests.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala index 72ec550f60022..1a5229258e7d4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DSv2TempViewWithStoredPlanTests.scala @@ -343,7 +343,8 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") session.table(testTable).createOrReplaceTempView("v_no_filter") - session.table(testTable).filter("salary IS NULL").createOrReplaceTempView("v_filter_is_null") + session.table(testTable).filter("salary IS NULL") + .createOrReplaceTempView("v_filter_is_null") checkRows(session.table("v"), Seq(Row(1, 100))) checkRows(session.table("v_no_filter"), Seq(Row(1, 100), Row(10, 1000))) checkRows(session.table("v_filter_is_null"), Seq.empty) @@ -372,7 +373,8 @@ trait DSv2TempViewWithStoredPlanTests extends DSv2ExternalMutationTestBase { session.table(testTable).filter("salary < 999").createOrReplaceTempView("v") session.table(testTable).createOrReplaceTempView("v_no_filter") - session.table(testTable).filter("salary IS NULL").createOrReplaceTempView("v_filter_is_null") + session.table(testTable).filter("salary IS NULL") + .createOrReplaceTempView("v_filter_is_null") checkRows(session.table("v"), Seq(Row(1, 100))) checkRows(session.table("v_no_filter"), Seq(Row(1, 100), Row(10, 1000))) checkRows(session.table("v_filter_is_null"), Seq.empty) From 0dce6a8f76c6e8455af6c8fa9dcedf57a8efb9b4 Mon Sep 17 00:00:00 2001 From: Thang Long VU Date: Tue, 26 May 2026 15:33:30 +0000 Subject: [PATCH 36/36] Fix scalafmt: reformat Scaladoc in DataSourceV2DataFrameConnectSuite Co-authored-by: Isaac --- .../sql/connect/DataSourceV2DataFrameConnectSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala index a97821d16204a..a13e953460a72 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/DataSourceV2DataFrameConnectSuite.scala @@ -27,10 +27,10 @@ import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, InMe /** * Connect-mode counterpart of [[org.apache.spark.sql.connector.DataSourceV2DataFrameSuite]]. * - * Runs DSv2 temp view tests ([[DSv2TempViewWithStoredPlanTests]]) and repeated table access - * tests ([[DSv2RepeatedTableAccessTests]]) under Spark Connect. All test logic lives in the - * shared traits; this class only provides the Connect-specific session, catalog access, and - * result comparison. + * Runs DSv2 temp view tests ([[DSv2TempViewWithStoredPlanTests]]) and repeated table access tests + * ([[DSv2RepeatedTableAccessTests]]) under Spark Connect. All test logic lives in the shared + * traits; this class only provides the Connect-specific session, catalog access, and result + * comparison. */ class DataSourceV2DataFrameConnectSuite extends SparkConnectServerTest