Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
e0fe340
[SPARK-56619][CONNECT][TESTS] Add Connect repeated SQL refresh tests
longvu-db Apr 24, 2026
2c7c08f
Apply scalafmt formatting
longvu-db Apr 27, 2026
f36aeb1
Add DataFrame reuse tests to prove Connect re-analysis behavior
longvu-db Apr 29, 2026
5c5ccc3
Rename session variable from s to session for clarity
longvu-db Apr 29, 2026
2514222
Add connector-w/-cache repeated sql() tests for Spark Connect
longvu-db Apr 29, 2026
6415e2c
Retrigger CI: flaky protobuf test failure (unrelated)
longvu-db Apr 30, 2026
bdc4f9a
Fix cache cleanup order: clear cache after dropping table
longvu-db Apr 30, 2026
f177d59
Retrigger CI: flaky transactional checks tests (unrelated)
longvu-db Apr 30, 2026
a8e26ad
Retrigger CI: flaky transactional checks tests (unrelated)
longvu-db Apr 30, 2026
9fc78bf
Add REFRESH TABLE verification to connector-w/-cache Connect repeated…
longvu-db Apr 30, 2026
20ef587
Reorganize connector-w/-cache tests to be side-by-side with non-cachi…
longvu-db Apr 30, 2026
ec643ad
Retrigger CI
longvu-db Apr 30, 2026
6151018
Empty commit to retrigger CI
longvu-db May 1, 2026
e39c09b
Address review: fix assertRows, generic serverCatalog, try/finally cl…
longvu-db May 8, 2026
a2c8d50
Address review: remove ClassTag, add externalAppend helper
longvu-db May 8, 2026
4bd573a
Address review: remove empty lines, rename schema vars, move clearCac…
longvu-db May 13, 2026
d0d2e6b
Fix scalafmt formatting
longvu-db May 19, 2026
e148010
Address review: document cache design decisions
longvu-db May 19, 2026
b2e7925
Remove CachingInMemoryTableCatalog.scala from this PR (already in mas…
longvu-db May 20, 2026
a150faa
Restore CachingInMemoryTableCatalog.scala to master version
longvu-db May 20, 2026
64d4a67
Fix compilation error: call clearCache() on catalog instance, not com…
longvu-db May 20, 2026
227cfea
Fix Scaladoc: use neutral cross-reference to classic-path tests
longvu-db May 20, 2026
2f91fa0
Fix CI: only clear cachingcat cache when the test uses cachingcat
longvu-db May 20, 2026
02daa14
Extract DSv2RepeatedTableAccessTests trait, mix into classic and Conn…
longvu-db May 26, 2026
63944a3
Fix stale Scaladoc in DSv2ExternalMutationTestBase, rename ident to t…
longvu-db May 26, 2026
cbb5cf6
Add checkRows comment explaining why sameRows is used instead of chec…
longvu-db May 26, 2026
8f2f31d
Remove redundant Connect-only DataFrame reuse tests
longvu-db May 26, 2026
ad9b765
Consolidate Connect DSv2 suites: mix DSv2RepeatedTableAccessTests int…
longvu-db May 26, 2026
0f2d4f8
Rename DataSourceV2TempViewConnectSuite to DataSourceV2DataFrameConne…
longvu-db May 26, 2026
c84e63d
Fix Scaladoc: describe both traits the Connect suite runs
longvu-db May 26, 2026
9c0ac7d
Fix DSv2RepeatedTableAccessTests Scaladoc to match design doc
longvu-db May 26, 2026
465892d
Fix Scaladoc: session-write -> session mutation, external-write -> ex…
longvu-db May 26, 2026
002cd83
Address review: hoist shared constants, fix Scaladoc scope and DDL/DM…
longvu-db May 26, 2026
561d290
Fix compilation: replace bare T/CT refs with testTable/cachingTestTable
longvu-db May 26, 2026
a9cd1ba
Fix scalastyle: break long lines in DSv2TempViewWithStoredPlanTests
longvu-db May 26, 2026
0dce6a8
Fix scalafmt: reformat Scaladoc in DataSourceV2DataFrameConnectSuite
longvu-db May 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,21 @@ import scala.reflect.ClassTag

import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession}
import org.apache.spark.sql.connector.DSv2TempViewWithStoredPlanTests
import org.apache.spark.sql.connector.{DSv2RepeatedTableAccessTests, DSv2TempViewWithStoredPlanTests}
import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, InMemoryTableCatalog, TableCatalog}

/**
* Connect-mode runner for [[DSv2TempViewWithStoredPlanTests]]. All test logic lives in the shared
* trait; this class only provides the Connect-specific session, catalog access, and result
* Connect-mode counterpart of [[org.apache.spark.sql.connector.DataSourceV2DataFrameSuite]].
*
* Runs DSv2 temp view tests ([[DSv2TempViewWithStoredPlanTests]]) and repeated table access tests
* ([[DSv2RepeatedTableAccessTests]]) under Spark Connect. All test logic lives in the shared
* traits; this class only provides the Connect-specific session, catalog access, and result
* comparison.
*/
class DataSourceV2TempViewConnectSuite
class DataSourceV2DataFrameConnectSuite
extends SparkConnectServerTest
with DSv2TempViewWithStoredPlanTests {
with DSv2TempViewWithStoredPlanTests
with DSv2RepeatedTableAccessTests {

override def sparkConf: SparkConf = super.sparkConf
.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,20 @@ import org.apache.spark.sql.connector.catalog.{BufferedRows, CatalogV2Util, Iden
* (where the test session IS the server session) and Connect mode (where the test session
* is a Connect client and catalog access requires the server session).
*
* Concrete suites override the abstract methods and mix in the test trait
* [[DSv2TempViewWithStoredPlanTests]].
* Concrete suites override the abstract methods and mix in a test trait such as
* [[DSv2TempViewWithStoredPlanTests]] or [[DSv2RepeatedTableAccessTests]].
*/
trait DSv2ExternalMutationTestBase extends QueryTest {

/** Fully qualified table name under the non-caching test catalog. */
protected val testTable: String = "testcat.ns1.ns2.tbl"

/** Fully qualified table name under the caching test catalog. */
protected val cachingTestTable: String = "cachingcat.ns1.ns2.tbl"

/** Identifier for the test table within its namespace. */
protected val testIdent: Identifier = Identifier.of(Array("ns1", "ns2"), "tbl")

/** Prefix for test names, e.g. "" or "[connect] ". */
protected def testPrefix: String

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.connector

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.connector.catalog.{CachingInMemoryTableCatalog, Column, InMemoryTableCatalog, TableChange, TableInfo}
import org.apache.spark.sql.types.IntegerType

/**
* Shared repeated table access tests with external changes for DSv2 tables. These tests verify
* that repeated `sql()` calls correctly reflect both session and external mutations:
*
* - Scenario 1 (external writes): external data appended via the catalog API is visible.
* - Scenario 2 (external schema changes): external ADD COLUMN via the catalog API is visible.
* - Scenario 3 (external drop/recreate): external drop and recreate via the catalog API
* resolves to the new empty table.
*
* Each scenario includes a session mutation baseline, an external mutation test, and a
* caching-connector variant showing stale results until `REFRESH TABLE`.
*
* NOTE: All `session.sql(...)` calls append `.collect()` because Connect client DataFrames
* are lazy and require an action to trigger execution. In classic mode `.collect()` on
* DDL / DML is a no-op (these execute eagerly), so this is harmless.
*/
trait DSv2RepeatedTableAccessTests extends DSv2ExternalMutationTestBase {

// Uses testTable, cachingTestTable, and testIdent from DSv2ExternalMutationTestBase.

// Scenario 1: data changes via writes

test(s"${testPrefix}repeated sql() reflects session write") {
withTestSession { session =>
withTestTableAndViews(session, testTable) {
session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100)))

session.sql(s"INSERT INTO $testTable VALUES (2, 200)").collect()
checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100), Row(2, 200)))
}
}
}

test(s"${testPrefix}repeated sql() reflects external write") {
withTestSession { session =>
withTestTableAndViews(session, testTable) {
session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100)))

val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200))

checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100), Row(2, 200)))
}
}
}

test(s"${testPrefix}connector w/ cache: repeated sql() stale after external write") {
withTestSession { session =>
withTestTableAndViews(session, cachingTestTable) {
session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100)").collect()
checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100)))

val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200))

// Caching connector returns stale table: external write invisible
checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100)))

// REFRESH TABLE invalidates the connector cache, external write becomes visible
session.sql(s"REFRESH TABLE $cachingTestTable").collect()
checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100), Row(2, 200)))
}
}
}

// Scenario 2: schema changes

test(s"${testPrefix}repeated sql() reflects session schema change") {
withTestSession { session =>
withTestTableAndViews(session, testTable) {
session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100)))

session.sql(s"ALTER TABLE $testTable ADD COLUMN new_col INT").collect()
session.sql(s"INSERT INTO $testTable VALUES (2, 200, -1)").collect()
checkRows(
session.sql(s"SELECT * FROM $testTable"),
Seq(Row(1, 100, null), Row(2, 200, -1)))
}
}
}

test(s"${testPrefix}repeated sql() reflects external schema change") {
withTestSession { session =>
withTestTableAndViews(session, testTable) {
session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100)))

val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true)
catalog.alterTable(testIdent, addCol)

externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1))

checkRows(
session.sql(s"SELECT * FROM $testTable"),
Seq(Row(1, 100, null), Row(2, 200, -1)))
}
}
}

test(s"${testPrefix}connector w/ cache: repeated sql() stale after external schema change") {
withTestSession { session =>
withTestTableAndViews(session, cachingTestTable) {
session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100)").collect()
checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100)))

val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
val addCol = TableChange.addColumn(Array("new_col"), IntegerType, true)
catalog.alterTable(testIdent, addCol)

externalAppend(catalog = catalog, ident = testIdent, row = InternalRow(2, 200, -1))

// Caching connector returns stale table: external changes invisible
checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100)))

// REFRESH TABLE invalidates the connector cache, schema change + data visible
session.sql(s"REFRESH TABLE $cachingTestTable").collect()
checkRows(
session.sql(s"SELECT * FROM $cachingTestTable"),
Seq(Row(1, 100, null), Row(2, 200, -1)))
}
}
}

// Scenario 3: drop and recreate table

test(s"${testPrefix}repeated sql() reflects session drop/recreate") {
withTestSession { session =>
withTestTableAndViews(session, testTable) {
session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100)))

session.sql(s"DROP TABLE $testTable").collect()
session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
checkRows(session.sql(s"SELECT * FROM $testTable"), Seq.empty)
}
}
}

test(s"${testPrefix}repeated sql() reflects external drop/recreate") {
withTestSession { session =>
withTestTableAndViews(session, testTable) {
session.sql(s"CREATE TABLE $testTable (id INT, salary INT) USING foo").collect()
session.sql(s"INSERT INTO $testTable VALUES (1, 100)").collect()
checkRows(session.sql(s"SELECT * FROM $testTable"), Seq(Row(1, 100)))

val catalog = getTableCatalog[InMemoryTableCatalog](session, "testcat")
catalog.dropTable(testIdent)
catalog.createTable(
testIdent,
new TableInfo.Builder()
.withColumns(Array(
Column.create("id", IntegerType),
Column.create("salary", IntegerType)))
.build())

checkRows(session.sql(s"SELECT * FROM $testTable"), Seq.empty)
}
}
}

test(s"${testPrefix}connector w/ cache: repeated sql() stale after external drop/recreate") {
withTestSession { session =>
withTestTableAndViews(session, cachingTestTable) {
session.sql(s"CREATE TABLE $cachingTestTable (id INT, salary INT) USING foo").collect()
session.sql(s"INSERT INTO $cachingTestTable VALUES (1, 100)").collect()
checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100)))

val catalog = getTableCatalog[CachingInMemoryTableCatalog](session, "cachingcat")
catalog.dropTable(testIdent)
catalog.createTable(
testIdent,
new TableInfo.Builder()
.withColumns(Array(
Column.create("id", IntegerType),
Column.create("salary", IntegerType)))
.build())

// Caching connector returns stale table: drop/recreate invisible
checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq(Row(1, 100)))

// REFRESH TABLE invalidates the connector cache, new empty table visible
session.sql(s"REFRESH TABLE $cachingTestTable").collect()
checkRows(session.sql(s"SELECT * FROM $cachingTestTable"), Seq.empty)
}
}
}
}
Loading