From 084d257d215cf3c415a65d5274ce51e76f645cab Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Tue, 13 Feb 2024 04:45:27 -0800 Subject: [PATCH 01/27] Initial draft --- .../spark/sql/SparkSessionE2ESuite.scala | 10 + .../main/protobuf/spark/connect/base.proto | 14 +- .../sql/connect/client/SparkResult.scala | 22 ++ .../ConnectProgressExecutionListener.scala | 124 +++++++++++ .../execution/ExecuteGrpcResponseSender.scala | 40 +++- .../execution/ExecuteResponseObserver.scala | 11 +- .../execution/ExecuteThreadRunner.scala | 5 +- .../connect/service/SparkConnectService.scala | 5 + ...onnectProgressExecutionListenerSuite.scala | 105 +++++++++ dev/sparktestsupport/modules.py | 1 + python/pyspark/shell.py | 19 +- python/pyspark/sql/connect/client/core.py | 26 ++- python/pyspark/sql/connect/proto/base_pb2.py | 206 +++++++++--------- python/pyspark/sql/connect/proto/base_pb2.pyi | 52 ++++- python/pyspark/sql/connect/shell/__init__.py | 28 +++ python/pyspark/sql/connect/shell/progress.py | 92 ++++++++ .../sql/tests/connect/shell/__init__.py | 16 ++ .../sql/tests/connect/shell/test_progress.py | 76 +++++++ 18 files changed, 741 insertions(+), 111 deletions(-) create mode 100644 connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala create mode 100644 connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala create mode 100644 python/pyspark/sql/connect/shell/__init__.py create mode 100644 python/pyspark/sql/connect/shell/progress.py create mode 100644 python/pyspark/sql/tests/connect/shell/__init__.py create mode 100644 python/pyspark/sql/tests/connect/shell/test_progress.py diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala index c76dc724828e5..4a4b110b475a9 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala @@ -229,6 +229,16 @@ class SparkSessionE2ESuite extends RemoteSparkSession { assert(interrupted.length == 2, s"Interrupted operations: $interrupted.") } + test("progress is available for the spark result") { + val result = spark + .range(10000) + .repartition(1000) + .collectResult() + assert(result.length == 10000) + assert(result.progress.totalTasks > 100) + assert(result.progress.completedTasks > 100) + } + test("interrupt operation") { val session = spark import session.implicits._ diff --git a/connector/connect/common/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto index f24ca0a8fc3b1..ff2537f2bda4f 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto @@ -321,7 +321,7 @@ message ExecutePlanRequest { // The response of a query, can be one or more for each request. Responses belonging to the // same input query, carry the same `session_id`. -// Next ID: 16 +// Next ID: 17 message ExecutePlanResponse { string session_id = 1; // Server-side generated idempotency key that the client can use to assert that the server side @@ -360,6 +360,9 @@ message ExecutePlanResponse { // Response type informing if the stream is complete in reattachable execution. ResultComplete result_complete = 14; + // (Optional) Intermediate query progress reports. + ExecutionProgress execution_progress = 16; + // Support arbitrary result objects. google.protobuf.Any extension = 999; } @@ -420,6 +423,15 @@ message ExecutePlanResponse { // the execution is complete. If the server sends onComplete without sending a ResultComplete, // it means that there is more, and the client should use ReattachExecute RPC to continue. } + + // This message is used to communicate progress about the query progress during the execution. + message ExecutionProgress { + int64 num_tasks = 1; + int64 num_completed_tasks = 2; + int64 num_stages = 3; + int64 num_completed_stages = 4; + int64 input_bytes_read = 5; + } } // The key-value pair for the config request and response. diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala index 7a7c6a2d6c925..32d1d256e6cdb 100644 --- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala +++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala @@ -40,6 +40,17 @@ private[sql] class SparkResult[T]( timeZoneId: String) extends AutoCloseable { self => + /** + * Progress of the query execution. This information can be accessed from the iterator. + */ + case class Progress ( + totalTasks: Long = 0, + completedTasks: Long = 0, + totalStages: Long = 0, + completedStages: Long = 0, + inputBytesRead: Long = 0) + + var progress: Progress = new Progress() private[this] var opId: String = _ private[this] var numRecords: Int = 0 private[this] var structType: StructType = _ @@ -97,6 +108,17 @@ private[sql] class SparkResult[T]( } stop |= stopOnOperationId + // Update the execution status. This information can now be accessed directly from + // the iterator. + if (response.hasExecutionProgress) { + progress = Progress( + response.getExecutionProgress.getNumTasks, + response.getExecutionProgress.getNumCompletedTasks, + response.getExecutionProgress.getNumStages, + response.getExecutionProgress.getNumCompletedStages, + response.getExecutionProgress.getInputBytesRead) + } + if (response.hasSchema) { // The original schema should arrive before ArrowBatches. structType = diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala new file mode 100644 index 0000000000000..c3b38ca847d6f --- /dev/null +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connect.execution + +import org.apache.spark.internal.Logging +import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd} + +/** + * A listener that tracks the execution of jobs and stages for a given set of tags. + * This is used to track the progress of a job that is being executed through the connect API. + * + * The listener is instantiated once for the SparkConnectService and then used to track all the + * current query executions. + */ +private[connect] class ConnectProgressExecutionListener extends SparkListener with Logging { + /** + * A tracker for a given tag. This is used to track the progress of an operation is being executed + * through the connect API. + */ + class ExecutionTracker(var tag: String) { + private[ConnectProgressExecutionListener] var jobs: Set[Int] = Set() + private[ConnectProgressExecutionListener] var stages: Set[Int] = Set() + private[ConnectProgressExecutionListener] var totalTasks = 0 + private[ConnectProgressExecutionListener] var completedTasks = 0 + private[ConnectProgressExecutionListener] var completedStages = 0 + private[ConnectProgressExecutionListener] var inputBytesRead = 0L + // The tracker is marked as dirty if it has new progress to report. This variable does + // not need to be protected by a mutex even if multiple threads would read the same dirty + // state the output is expected to be identical. + @volatile private[ConnectProgressExecutionListener] var dirty = false + + /** + * Yield the current state of the tracker if it is dirty. A consumer of the tracker can provide + * a callback that will be called with the current state of the tracker if the tracker has new + * progress to report. + * + * If the tracker was marked as dirty, the state is reset after. + */ + def yieldWhenDirty(thunk: (Int, Int, Int, Int, Long) => Unit): Unit = { + if (dirty) { + thunk(totalTasks, completedTasks, stages.size, totalTasks, inputBytesRead) + dirty = false + } + } + + /** + * Add a job to the tracker. This will add the job to the list of jobs that are being tracked + */ + def addJob(job: SparkListenerJobStart): Unit = { + jobs = jobs + job.jobId + stages = stages ++ job.stageIds + totalTasks += job.stageInfos.map(_.numTasks).sum + } + } + + val trackedTags = collection.mutable.Map[String, ExecutionTracker]() + + override def onJobStart(jobStart: SparkListenerJobStart): Unit = { + val tags = jobStart.properties.getProperty("spark.job.tags") + if (tags != null) { + val thisJobTags = tags.split(",").map(_.trim).toSet + thisJobTags.foreach { tag => + if (trackedTags.contains(tag)) { + trackedTags(tag).addJob(jobStart) + } + } + } + } + + override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { + // Check if the task belongs to a job that we are tracking. + trackedTags.foreach({ case (tag, tracker) => + if (tracker.stages.contains(taskEnd.stageId)) { + tracker.completedTasks += 1 + tracker.inputBytesRead += taskEnd.taskMetrics.inputMetrics.bytesRead + tracker.dirty = true + } + }) + } + + override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = { + trackedTags.foreach({ case (tag, tracker) => + if (tracker.stages.contains(stageCompleted.stageInfo.stageId)) { + tracker.completedStages += 1 + } + }) + } + + override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { + trackedTags.foreach({ case (tag, tracker) => + if (tracker.jobs.contains(jobEnd.jobId)) { + tracker.jobs -= jobEnd.jobId + } + }) + } + + def registerJobTag(tag: String): Unit = { + trackedTags += tag -> new ExecutionTracker(tag) + } + + def removeJobTag(tag: String): Unit = { + trackedTags -= tag + } + + def clearJobTags(): Unit = { + trackedTags.clear() + } + +} diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala index c9ceef969e297..d2d55019e1783 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala @@ -21,6 +21,7 @@ import com.google.protobuf.Message import io.grpc.stub.{ServerCallStreamObserver, StreamObserver} import org.apache.spark.{SparkEnv, SparkSQLException} +import org.apache.spark.connect.proto.ExecutePlanResponse import org.apache.spark.internal.Logging import org.apache.spark.sql.connect.common.ProtoUtils import org.apache.spark.sql.connect.config.Connect.{CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_DURATION, CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_SIZE} @@ -131,6 +132,38 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( } } + /** + * This method is called repeatedly during the query execution to enqueue a new message to be send + * to the client about the current query progress. The message is not directly send to the client, + * but rather enqueued to in the response observer. + */ + private def enqueueProgressMessage(): Unit = { + SparkConnectService.executionListener.foreach { listener => + if (listener.trackedTags.contains(executeHolder.jobTag)) { + val tracker = listener.trackedTags(executeHolder.jobTag) + // Only send progress message if there is something new to report. + tracker.yieldWhenDirty { (tasks, tasksCompleted, stages, stagesCompleted, inputBytesRead) => + val response = ExecutePlanResponse + .newBuilder() + .setExecutionProgress( + ExecutePlanResponse.ExecutionProgress + .newBuilder() + .setInputBytesRead(inputBytesRead) + .setNumTasks(tasks) + .setNumCompletedTasks(tasksCompleted) + .setNumCompletedStages(stagesCompleted) + .setNumStages(stages) + ) + .build() + // There is a special case when the response observer has alreaady determined + // that the final message is send (and the stream will be closed) but we might want + // to send the progress message. In this case we ignore the result of the `onNext` call. + executeHolder.responseObserver.tryOnNext(response) + } + } + } + } + /** * Attach to the executionObserver, consume responses from it, and send them to grpcObserver. * @@ -173,6 +206,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( var sentResponsesSize: Long = 0 while (!finished) { + enqueueProgressMessage() var response: Option[CachedStreamResponse[T]] = None // Conditions for exiting the inner loop (and helpers to compute them): @@ -201,9 +235,11 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( // The state of interrupted, response and lastIndex are changed under executionObserver // monitor, and will notify upon state change. if (response.isEmpty) { - val timeout = Math.max(1, deadlineTimeMillis - System.currentTimeMillis()) + // Wake up more frequently to send the progress updates. + val timeout = 2000 logTrace(s"Wait for response to become available with timeout=$timeout ms.") executionObserver.responseLock.wait(timeout) + enqueueProgressMessage() logTrace(s"Reacquired executionObserver lock after waiting.") sleepEnd = System.nanoTime() } @@ -228,6 +264,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( s"waitingForResults=${consumeSleep}ns waitingForSend=${sendSleep}ns") throw new SparkSQLException(errorClass = "INVALID_CURSOR.DISCONNECTED", Map.empty) } else if (gotResponse) { + enqueueProgressMessage() // There is a response available to be sent. val sent = sendResponse(response.get, deadlineTimeMillis) if (sent) { @@ -240,6 +277,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( assert(deadlineLimitReached || interrupted) } } else if (streamFinished) { + enqueueProgressMessage() // Stream is finished and all responses have been sent logInfo( s"Stream finished for opId=${executeHolder.operationId}, " + diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala index a7877503f4611..92c23c6165d23 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala @@ -107,9 +107,9 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder: 0 } - def onNext(r: T): Unit = responseLock.synchronized { + def tryOnNext(r: T): Boolean = responseLock.synchronized { if (finalProducedIndex.nonEmpty) { - throw new IllegalStateException("Stream onNext can't be called after stream completed") + return false } lastProducedIndex += 1 val processedResponse = setCommonResponseFields(r) @@ -127,6 +127,13 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder: s"Execution opId=${executeHolder.operationId} produced response " + s"responseId=${responseId} idx=$lastProducedIndex") responseLock.notifyAll() + true + } + + def onNext(r: T): Unit = { + if (!tryOnNext(r)) { + throw new IllegalStateException("Stream onNext can't be called after stream completed") + } } def onError(t: Throwable): Unit = responseLock.synchronized { diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala index 41146e4ef688d..56776819dac9d 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala @@ -28,7 +28,7 @@ import org.apache.spark.connect.proto import org.apache.spark.internal.Logging import org.apache.spark.sql.connect.common.ProtoUtils import org.apache.spark.sql.connect.planner.SparkConnectPlanner -import org.apache.spark.sql.connect.service.{ExecuteHolder, ExecuteSessionTag} +import org.apache.spark.sql.connect.service.{ExecuteHolder, ExecuteSessionTag, SparkConnectService} import org.apache.spark.sql.connect.utils.ErrorUtils import org.apache.spark.util.Utils @@ -123,6 +123,7 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends } } finally { executeHolder.sessionHolder.session.sparkContext.removeJobTag(executeHolder.jobTag) + SparkConnectService.executionListener.foreach(_.removeJobTag(executeHolder.jobTag)) executeHolder.sparkSessionTags.foreach { tag => executeHolder.sessionHolder.session.sparkContext.removeJobTag( ExecuteSessionTag( @@ -158,6 +159,8 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends // Set tag for query cancellation session.sparkContext.addJobTag(executeHolder.jobTag) + // Register the job for progress reports. + SparkConnectService.executionListener.foreach(_.registerJobTag(executeHolder.jobTag)) // Also set all user defined tags as Spark Job tags. executeHolder.sparkSessionTags.foreach { tag => session.sparkContext.addJobTag( diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala index e96e5dfcac089..b7acf3a1b3bd5 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala @@ -38,6 +38,7 @@ import org.apache.spark.connect.proto.SparkConnectServiceGrpc.AsyncService import org.apache.spark.internal.Logging import org.apache.spark.internal.config.UI.UI_ENABLED import org.apache.spark.sql.connect.config.Connect.{CONNECT_GRPC_BINDING_ADDRESS, CONNECT_GRPC_BINDING_PORT, CONNECT_GRPC_MARSHALLER_RECURSION_LIMIT, CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE} +import org.apache.spark.sql.connect.execution.ConnectProgressExecutionListener import org.apache.spark.sql.connect.ui.{SparkConnectServerAppStatusStore, SparkConnectServerListener, SparkConnectServerTab} import org.apache.spark.sql.connect.utils.ErrorUtils import org.apache.spark.status.ElementTrackingStore @@ -284,6 +285,7 @@ object SparkConnectService extends Logging { private[connect] var uiTab: Option[SparkConnectServerTab] = None private[connect] var listener: SparkConnectServerListener = _ + private[connect] var executionListener: Option[ConnectProgressExecutionListener] = None // For testing purpose, it's package level private. private[connect] def localPort: Int = { @@ -325,6 +327,9 @@ object SparkConnectService extends Logging { } else { None } + // Add the execution listener needed for query progress. + executionListener = Some(new ConnectProgressExecutionListener) + sc.addSparkListener(executionListener.get) } /** diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala new file mode 100644 index 0000000000000..bc0eeb5d4043f --- /dev/null +++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connect.execution + +import java.util.Properties + +import org.mockito.Mockito.when +import org.scalatestplus.mockito.MockitoSugar + +import org.apache.spark.{SparkFunSuite, Success} +import org.apache.spark.executor.{ExecutorMetrics, InputMetrics, TaskMetrics} +import org.apache.spark.scheduler.{SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd, StageInfo, TaskInfo} + +class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSugar { + + def mockStage(stageId: Int, numTasks: Int): StageInfo = { + val result = mock[StageInfo] + when(result.stageId).thenReturn(stageId) + when(result.numTasks).thenReturn(numTasks) + result + } + + val testTag = "testTag" + val testStage1 = mockStage(1, 1) + val testStage2 = mockStage(2, 1) + + val testStage1Task1 = mock[TaskInfo] + val testStage1Task1ExecutorMetrics = mock[ExecutorMetrics] + val testStage1Task1Metrics = mock[TaskMetrics] + + val inputMetrics = mock[InputMetrics] + when(inputMetrics.bytesRead).thenReturn(500) + when(testStage1Task1Metrics.inputMetrics).thenReturn(inputMetrics) + + val testStage2Task1 = mock[TaskInfo] +// + val testProperties = new Properties() + testProperties.setProperty("spark.job.tags", s"otherTag,$testTag,anotherTag") + + val testJobStart = SparkListenerJobStart(1, 1, Seq(testStage1, testStage2), testProperties) + + test("onJobStart with no matching tags") { + val listener = new ConnectProgressExecutionListener + listener.onJobStart(testJobStart) + assert(listener.trackedTags.isEmpty) + } + + test("onJobStart with a registered tag") { + val listener = new ConnectProgressExecutionListener + listener.registerJobTag(testTag) + assert(listener.trackedTags.size == 1) + + // Trigger the event + listener.onJobStart(testJobStart) + val t = listener.trackedTags(testTag) + assert(t.jobs.size === 1) + assert(t.jobs(testJobStart.jobId)) + assert(t.stages.size == 2) + assert(t.totalTasks == 2) + } + + test("taskDone") { + val listener = new ConnectProgressExecutionListener + listener.registerJobTag(testTag) + listener.onJobStart(testJobStart) + val t = listener.trackedTags(testTag) + + // Finish the tasks + val taskEnd = SparkListenerTaskEnd( + 1, + 1, + "taskType", + Success, + testStage1Task1, + testStage1Task1ExecutorMetrics, + testStage1Task1Metrics) + + assert(t.completedTasks == 0) + listener.onTaskEnd(taskEnd) + assert(t.inputBytesRead == 500) + assert(t.completedTasks == 1) + assert(t.completedStages == 0) + + val stageEnd = SparkListenerStageCompleted(testStage1) + listener.onStageCompleted(stageEnd) + assert(t.completedStages == 1) + + } + +} diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index ff3b23ff573a0..18e950c4cbc14 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -1026,6 +1026,7 @@ def __hash__(self): "pyspark.sql.tests.connect.test_parity_pandas_udf_scalar", "pyspark.sql.tests.connect.test_parity_pandas_udf_grouped_agg", "pyspark.sql.tests.connect.test_parity_pandas_udf_window", + "pyspark.sql.tests.connect.shell.test_progress", ], excluded_python_implementations=[ "PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index 7e2093c1d31d7..2d03e9fa7dc08 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -45,11 +45,28 @@ if parent_dir in sys.path: sys.path.remove(parent_dir) - if is_remote(): try: # Creates pyspark.sql.connect.SparkSession. spark = SparkSession.builder.getOrCreate() + + from pyspark.sql.connect.shell import PROGRESS_BAR_ENABLED + + # Check if th eprogress bar needs to be disabled. + if not PROGRESS_BAR_ENABLED in os.environ: + os.environ[PROGRESS_BAR_ENABLED] = "1" + else: + val = os.getenv(PROGRESS_BAR_ENABLED) + if val.lower().strip() == 'false': + os.environ[PROGRESS_BAR_ENABLED] = "0" + elif val.lower().strip() == 'true': + os.environ[PROGRESS_BAR_ENABLED] = "1" + + val = os.environ[PROGRESS_BAR_ENABLED] + if not val in ('1', '0'): + raise ValueError( + f"Environment variable '{PROGRESS_BAR_ENABLED}' must be set to either 1 or 0, found: {val}") + except Exception: import sys import traceback diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py index 565f2b7131938..95064f6b03330 100644 --- a/python/pyspark/sql/connect/client/core.py +++ b/python/pyspark/sql/connect/client/core.py @@ -90,6 +90,7 @@ from pyspark.rdd import PythonEvalType from pyspark.storagelevel import StorageLevel from pyspark.errors import PySparkValueError, PySparkAssertionError, PySparkNotImplementedError +from pyspark.sql.connect.shell.progress import Progress if TYPE_CHECKING: from google.rpc.error_details_pb2 import ErrorInfo @@ -1173,7 +1174,10 @@ def handle_response(b: pb2.ExecutePlanResponse) -> None: self._handle_error(error) def _execute_and_fetch_as_iterator( - self, req: pb2.ExecutePlanRequest, observations: Dict[str, Observation] + self, + req: pb2.ExecutePlanRequest, + observations: Dict[str, Observation], + progress: Optional["Progress"] = None, ) -> Iterator[ Union[ "pa.RecordBatch", @@ -1252,6 +1256,13 @@ def handle_response( yield {"get_resources_command_result": resources} if b.HasField("extension"): yield b.extension + if b.HasField("execution_progress"): + if progress: + progress.update_ticks( + b.execution_progress.num_tasks, + b.execution_progress.num_completed_tasks, + b.execution_progress.input_bytes_read, + ) if b.HasField("arrow_batch"): logger.debug( f"Received arrow batch rows={b.arrow_batch.row_count} " @@ -1295,6 +1306,15 @@ def handle_response( with attempt: for b in self._stub.ExecutePlan(req, metadata=self._builder.metadata()): yield from handle_response(b) + except KeyboardInterrupt: + logger.debug(f"Interrupt request received for operation={req.operation_id}") + try: + self.interrupt_operation(req.operation_id) + except: + # Swallow all errors if aborted. + pass + if not progress is None: + progress.finish() except Exception as error: self._handle_error(error) @@ -1318,7 +1338,8 @@ def _execute_and_fetch( schema: Optional[StructType] = None properties: Dict[str, Any] = {} - for response in self._execute_and_fetch_as_iterator(req, observations): + progress = Progress() + for response in self._execute_and_fetch_as_iterator(req, observations, progress=progress): if isinstance(response, StructType): schema = response elif isinstance(response, pa.RecordBatch): @@ -1336,6 +1357,7 @@ def _execute_and_fetch( "response": response, }, ) + progress.finish() if len(batches) > 0: if self_destruct: diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py index 8326ce511d56a..e2ed91e5d0d02 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.py +++ b/python/pyspark/sql/connect/proto/base_pb2.py @@ -37,7 +37,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf5\x12\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\xce\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xa0\x04\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1a\xa5\x01\n\rRequestOption\x12K\n\x10reattach_options\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ReattachOptionsH\x00R\x0freattachOptions\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\x9b\x10\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12\x1f\n\x0bresponse_id\x18\r \x01(\tR\nresponseId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\\\n\x0fresult_complete\x18\x0e \x01(\x0b\x32\x31.spark.connect.ExecutePlanResponse.ResultCompleteH\x00R\x0eresultComplete\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1av\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x12&\n\x0cstart_offset\x18\x03 \x01(\x03H\x00R\x0bstartOffset\x88\x01\x01\x42\x0f\n\r_start_offset\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1at\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x12\n\x04keys\x18\x03 \x03(\tR\x04keys\x1a\x10\n\x0eResultCompleteB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x84\x08\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"\xaf\x01\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x04 \x01(\tR\x13serverSideSessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xe7\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x01R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB\x0e\n\x0c_client_type"\x90\x02\n\x14\x41\x64\x64\x41rtifactsResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc3\x01\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB\x0e\n\x0c_client_type"\xe0\x02\n\x18\x41rtifactStatusesResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists"\xd8\x03\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"\x80\x01\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x1f\n\x1bINTERRUPT_TYPE_OPERATION_ID\x10\x03\x42\x0b\n\tinterruptB\x0e\n\x0c_client_type"\x90\x01\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds"5\n\x0fReattachOptions\x12"\n\x0creattachable\x18\x01 \x01(\x08R\x0creattachable"\x93\x02\n\x16ReattachExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x12-\n\x10last_response_id\x18\x05 \x01(\tH\x01R\x0elastResponseId\x88\x01\x01\x42\x0e\n\x0c_client_typeB\x13\n\x11_last_response_id"\xc6\x03\n\x15ReleaseExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12R\n\x0brelease_all\x18\x05 \x01(\x0b\x32/.spark.connect.ReleaseExecuteRequest.ReleaseAllH\x00R\nreleaseAll\x12X\n\rrelease_until\x18\x06 \x01(\x0b\x32\x31.spark.connect.ReleaseExecuteRequest.ReleaseUntilH\x00R\x0creleaseUntil\x1a\x0c\n\nReleaseAll\x1a/\n\x0cReleaseUntil\x12\x1f\n\x0bresponse_id\x18\x01 \x01(\tR\nresponseIdB\t\n\x07releaseB\x0e\n\x0c_client_type"\xa5\x01\n\x16ReleaseExecuteResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12&\n\x0coperation_id\x18\x02 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x42\x0f\n\r_operation_id"\xab\x01\n\x15ReleaseSessionRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"l\n\x16ReleaseSessionResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x02 \x01(\tR\x13serverSideSessionId"\xc9\x01\n\x18\x46\x65tchErrorDetailsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x19\n\x08\x65rror_id\x18\x03 \x01(\tR\x07\x65rrorId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"\x93\x0c\n\x19\x46\x65tchErrorDetailsResponse\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\x1d\n\nsession_id\x18\x04 \x01(\tR\tsessionId\x12)\n\x0eroot_error_idx\x18\x01 \x01(\x05H\x00R\x0crootErrorIdx\x88\x01\x01\x12\x46\n\x06\x65rrors\x18\x02 \x03(\x0b\x32..spark.connect.FetchErrorDetailsResponse.ErrorR\x06\x65rrors\x1a\xae\x01\n\x11StackTraceElement\x12\'\n\x0f\x64\x65\x63laring_class\x18\x01 \x01(\tR\x0e\x64\x65\x63laringClass\x12\x1f\n\x0bmethod_name\x18\x02 \x01(\tR\nmethodName\x12 \n\tfile_name\x18\x03 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12\x1f\n\x0bline_number\x18\x04 \x01(\x05R\nlineNumberB\x0c\n\n_file_name\x1a\xf0\x02\n\x0cQueryContext\x12\x64\n\x0c\x63ontext_type\x18\n \x01(\x0e\x32\x41.spark.connect.FetchErrorDetailsResponse.QueryContext.ContextTypeR\x0b\x63ontextType\x12\x1f\n\x0bobject_type\x18\x01 \x01(\tR\nobjectType\x12\x1f\n\x0bobject_name\x18\x02 \x01(\tR\nobjectName\x12\x1f\n\x0bstart_index\x18\x03 \x01(\x05R\nstartIndex\x12\x1d\n\nstop_index\x18\x04 \x01(\x05R\tstopIndex\x12\x1a\n\x08\x66ragment\x18\x05 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x06 \x01(\tR\x08\x63\x61llSite\x12\x18\n\x07summary\x18\x07 \x01(\tR\x07summary"%\n\x0b\x43ontextType\x12\x07\n\x03SQL\x10\x00\x12\r\n\tDATAFRAME\x10\x01\x1a\x99\x03\n\x0eSparkThrowable\x12$\n\x0b\x65rror_class\x18\x01 \x01(\tH\x00R\nerrorClass\x88\x01\x01\x12}\n\x12message_parameters\x18\x02 \x03(\x0b\x32N.spark.connect.FetchErrorDetailsResponse.SparkThrowable.MessageParametersEntryR\x11messageParameters\x12\\\n\x0equery_contexts\x18\x03 \x03(\x0b\x32\x35.spark.connect.FetchErrorDetailsResponse.QueryContextR\rqueryContexts\x12 \n\tsql_state\x18\x04 \x01(\tH\x01R\x08sqlState\x88\x01\x01\x1a\x44\n\x16MessageParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0e\n\x0c_error_classB\x0c\n\n_sql_state\x1a\xdb\x02\n\x05\x45rror\x12\x30\n\x14\x65rror_type_hierarchy\x18\x01 \x03(\tR\x12\x65rrorTypeHierarchy\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12[\n\x0bstack_trace\x18\x03 \x03(\x0b\x32:.spark.connect.FetchErrorDetailsResponse.StackTraceElementR\nstackTrace\x12 \n\tcause_idx\x18\x04 \x01(\x05H\x00R\x08\x63\x61useIdx\x88\x01\x01\x12\x65\n\x0fspark_throwable\x18\x05 \x01(\x0b\x32\x37.spark.connect.FetchErrorDetailsResponse.SparkThrowableH\x01R\x0esparkThrowable\x88\x01\x01\x42\x0c\n\n_cause_idxB\x12\n\x10_spark_throwableB\x11\n\x0f_root_error_idx2\xb2\x07\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x12`\n\x0fReattachExecute\x12%.spark.connect.ReattachExecuteRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12_\n\x0eReleaseExecute\x12$.spark.connect.ReleaseExecuteRequest\x1a%.spark.connect.ReleaseExecuteResponse"\x00\x12_\n\x0eReleaseSession\x12$.spark.connect.ReleaseSessionRequest\x1a%.spark.connect.ReleaseSessionResponse"\x00\x12h\n\x11\x46\x65tchErrorDetails\x12\'.spark.connect.FetchErrorDetailsRequest\x1a(.spark.connect.FetchErrorDetailsResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' + b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf5\x12\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\xce\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xa0\x04\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1a\xa5\x01\n\rRequestOption\x12K\n\x10reattach_options\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ReattachOptionsH\x00R\x0freattachOptions\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\xe0\x12\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12\x1f\n\x0bresponse_id\x18\r \x01(\tR\nresponseId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\\\n\x0fresult_complete\x18\x0e \x01(\x0b\x32\x31.spark.connect.ExecutePlanResponse.ResultCompleteH\x00R\x0eresultComplete\x12\x65\n\x12\x65xecution_progress\x18\x10 \x01(\x0b\x32\x34.spark.connect.ExecutePlanResponse.ExecutionProgressH\x00R\x11\x65xecutionProgress\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1av\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x12&\n\x0cstart_offset\x18\x03 \x01(\x03H\x00R\x0bstartOffset\x88\x01\x01\x42\x0f\n\r_start_offset\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1at\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x12\n\x04keys\x18\x03 \x03(\tR\x04keys\x1a\x10\n\x0eResultComplete\x1a\xdb\x01\n\x11\x45xecutionProgress\x12\x1b\n\tnum_tasks\x18\x01 \x01(\x03R\x08numTasks\x12.\n\x13num_completed_tasks\x18\x02 \x01(\x03R\x11numCompletedTasks\x12\x1d\n\nnum_stages\x18\x03 \x01(\x03R\tnumStages\x12\x30\n\x14num_completed_stages\x18\x04 \x01(\x03R\x12numCompletedStages\x12(\n\x10input_bytes_read\x18\x05 \x01(\x03R\x0einputBytesReadB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x84\x08\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"\xaf\x01\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x04 \x01(\tR\x13serverSideSessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xe7\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x01R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB\x0e\n\x0c_client_type"\x90\x02\n\x14\x41\x64\x64\x41rtifactsResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc3\x01\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB\x0e\n\x0c_client_type"\xe0\x02\n\x18\x41rtifactStatusesResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists"\xd8\x03\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"\x80\x01\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x1f\n\x1bINTERRUPT_TYPE_OPERATION_ID\x10\x03\x42\x0b\n\tinterruptB\x0e\n\x0c_client_type"\x90\x01\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds"5\n\x0fReattachOptions\x12"\n\x0creattachable\x18\x01 \x01(\x08R\x0creattachable"\x93\x02\n\x16ReattachExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x12-\n\x10last_response_id\x18\x05 \x01(\tH\x01R\x0elastResponseId\x88\x01\x01\x42\x0e\n\x0c_client_typeB\x13\n\x11_last_response_id"\xc6\x03\n\x15ReleaseExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12R\n\x0brelease_all\x18\x05 \x01(\x0b\x32/.spark.connect.ReleaseExecuteRequest.ReleaseAllH\x00R\nreleaseAll\x12X\n\rrelease_until\x18\x06 \x01(\x0b\x32\x31.spark.connect.ReleaseExecuteRequest.ReleaseUntilH\x00R\x0creleaseUntil\x1a\x0c\n\nReleaseAll\x1a/\n\x0cReleaseUntil\x12\x1f\n\x0bresponse_id\x18\x01 \x01(\tR\nresponseIdB\t\n\x07releaseB\x0e\n\x0c_client_type"\xa5\x01\n\x16ReleaseExecuteResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12&\n\x0coperation_id\x18\x02 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x42\x0f\n\r_operation_id"\xab\x01\n\x15ReleaseSessionRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"l\n\x16ReleaseSessionResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x02 \x01(\tR\x13serverSideSessionId"\xc9\x01\n\x18\x46\x65tchErrorDetailsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x19\n\x08\x65rror_id\x18\x03 \x01(\tR\x07\x65rrorId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"\x93\x0c\n\x19\x46\x65tchErrorDetailsResponse\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\x1d\n\nsession_id\x18\x04 \x01(\tR\tsessionId\x12)\n\x0eroot_error_idx\x18\x01 \x01(\x05H\x00R\x0crootErrorIdx\x88\x01\x01\x12\x46\n\x06\x65rrors\x18\x02 \x03(\x0b\x32..spark.connect.FetchErrorDetailsResponse.ErrorR\x06\x65rrors\x1a\xae\x01\n\x11StackTraceElement\x12\'\n\x0f\x64\x65\x63laring_class\x18\x01 \x01(\tR\x0e\x64\x65\x63laringClass\x12\x1f\n\x0bmethod_name\x18\x02 \x01(\tR\nmethodName\x12 \n\tfile_name\x18\x03 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12\x1f\n\x0bline_number\x18\x04 \x01(\x05R\nlineNumberB\x0c\n\n_file_name\x1a\xf0\x02\n\x0cQueryContext\x12\x64\n\x0c\x63ontext_type\x18\n \x01(\x0e\x32\x41.spark.connect.FetchErrorDetailsResponse.QueryContext.ContextTypeR\x0b\x63ontextType\x12\x1f\n\x0bobject_type\x18\x01 \x01(\tR\nobjectType\x12\x1f\n\x0bobject_name\x18\x02 \x01(\tR\nobjectName\x12\x1f\n\x0bstart_index\x18\x03 \x01(\x05R\nstartIndex\x12\x1d\n\nstop_index\x18\x04 \x01(\x05R\tstopIndex\x12\x1a\n\x08\x66ragment\x18\x05 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x06 \x01(\tR\x08\x63\x61llSite\x12\x18\n\x07summary\x18\x07 \x01(\tR\x07summary"%\n\x0b\x43ontextType\x12\x07\n\x03SQL\x10\x00\x12\r\n\tDATAFRAME\x10\x01\x1a\x99\x03\n\x0eSparkThrowable\x12$\n\x0b\x65rror_class\x18\x01 \x01(\tH\x00R\nerrorClass\x88\x01\x01\x12}\n\x12message_parameters\x18\x02 \x03(\x0b\x32N.spark.connect.FetchErrorDetailsResponse.SparkThrowable.MessageParametersEntryR\x11messageParameters\x12\\\n\x0equery_contexts\x18\x03 \x03(\x0b\x32\x35.spark.connect.FetchErrorDetailsResponse.QueryContextR\rqueryContexts\x12 \n\tsql_state\x18\x04 \x01(\tH\x01R\x08sqlState\x88\x01\x01\x1a\x44\n\x16MessageParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0e\n\x0c_error_classB\x0c\n\n_sql_state\x1a\xdb\x02\n\x05\x45rror\x12\x30\n\x14\x65rror_type_hierarchy\x18\x01 \x03(\tR\x12\x65rrorTypeHierarchy\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12[\n\x0bstack_trace\x18\x03 \x03(\x0b\x32:.spark.connect.FetchErrorDetailsResponse.StackTraceElementR\nstackTrace\x12 \n\tcause_idx\x18\x04 \x01(\x05H\x00R\x08\x63\x61useIdx\x88\x01\x01\x12\x65\n\x0fspark_throwable\x18\x05 \x01(\x0b\x32\x37.spark.connect.FetchErrorDetailsResponse.SparkThrowableH\x01R\x0esparkThrowable\x88\x01\x01\x42\x0c\n\n_cause_idxB\x12\n\x10_spark_throwableB\x11\n\x0f_root_error_idx2\xb2\x07\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x12`\n\x0fReattachExecute\x12%.spark.connect.ReattachExecuteRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12_\n\x0eReleaseExecute\x12$.spark.connect.ReleaseExecuteRequest\x1a%.spark.connect.ReleaseExecuteResponse"\x00\x12_\n\x0eReleaseSession\x12$.spark.connect.ReleaseSessionRequest\x1a%.spark.connect.ReleaseSessionResponse"\x00\x12h\n\x11\x46\x65tchErrorDetails\x12\'.spark.connect.FetchErrorDetailsRequest\x1a(.spark.connect.FetchErrorDetailsResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' ) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) @@ -120,105 +120,107 @@ _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_start = 4977 _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_end = 5142 _EXECUTEPLANRESPONSE._serialized_start = 5178 - _EXECUTEPLANRESPONSE._serialized_end = 7253 - _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 6389 - _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 6460 - _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 6462 - _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_end = 6580 - _EXECUTEPLANRESPONSE_METRICS._serialized_start = 6583 - _EXECUTEPLANRESPONSE_METRICS._serialized_end = 7100 - _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_start = 6678 - _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_end = 7010 - _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_start = 6887 - _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_end = 7010 - _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_start = 7012 - _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_end = 7100 - _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_start = 7102 - _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_end = 7218 - _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_start = 7220 - _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_end = 7236 - _KEYVALUE._serialized_start = 7255 - _KEYVALUE._serialized_end = 7320 - _CONFIGREQUEST._serialized_start = 7323 - _CONFIGREQUEST._serialized_end = 8351 - _CONFIGREQUEST_OPERATION._serialized_start = 7543 - _CONFIGREQUEST_OPERATION._serialized_end = 8041 - _CONFIGREQUEST_SET._serialized_start = 8043 - _CONFIGREQUEST_SET._serialized_end = 8095 - _CONFIGREQUEST_GET._serialized_start = 8097 - _CONFIGREQUEST_GET._serialized_end = 8122 - _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 8124 - _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 8187 - _CONFIGREQUEST_GETOPTION._serialized_start = 8189 - _CONFIGREQUEST_GETOPTION._serialized_end = 8220 - _CONFIGREQUEST_GETALL._serialized_start = 8222 - _CONFIGREQUEST_GETALL._serialized_end = 8270 - _CONFIGREQUEST_UNSET._serialized_start = 8272 - _CONFIGREQUEST_UNSET._serialized_end = 8299 - _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 8301 - _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 8335 - _CONFIGRESPONSE._serialized_start = 8354 - _CONFIGRESPONSE._serialized_end = 8529 - _ADDARTIFACTSREQUEST._serialized_start = 8532 - _ADDARTIFACTSREQUEST._serialized_end = 9403 - _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 8919 - _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 8972 - _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 8974 - _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 9085 - _ADDARTIFACTSREQUEST_BATCH._serialized_start = 9087 - _ADDARTIFACTSREQUEST_BATCH._serialized_end = 9180 - _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 9183 - _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 9376 - _ADDARTIFACTSRESPONSE._serialized_start = 9406 - _ADDARTIFACTSRESPONSE._serialized_end = 9678 - _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 9597 - _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 9678 - _ARTIFACTSTATUSESREQUEST._serialized_start = 9681 - _ARTIFACTSTATUSESREQUEST._serialized_end = 9876 - _ARTIFACTSTATUSESRESPONSE._serialized_start = 9879 - _ARTIFACTSTATUSESRESPONSE._serialized_end = 10231 - _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_start = 10074 - _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_end = 10189 - _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_start = 10191 - _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_end = 10231 - _INTERRUPTREQUEST._serialized_start = 10234 - _INTERRUPTREQUEST._serialized_end = 10706 - _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 10549 - _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 10677 - _INTERRUPTRESPONSE._serialized_start = 10709 - _INTERRUPTRESPONSE._serialized_end = 10853 - _REATTACHOPTIONS._serialized_start = 10855 - _REATTACHOPTIONS._serialized_end = 10908 - _REATTACHEXECUTEREQUEST._serialized_start = 10911 - _REATTACHEXECUTEREQUEST._serialized_end = 11186 - _RELEASEEXECUTEREQUEST._serialized_start = 11189 - _RELEASEEXECUTEREQUEST._serialized_end = 11643 - _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_start = 11555 - _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_end = 11567 - _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_start = 11569 - _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_end = 11616 - _RELEASEEXECUTERESPONSE._serialized_start = 11646 - _RELEASEEXECUTERESPONSE._serialized_end = 11811 - _RELEASESESSIONREQUEST._serialized_start = 11814 - _RELEASESESSIONREQUEST._serialized_end = 11985 - _RELEASESESSIONRESPONSE._serialized_start = 11987 - _RELEASESESSIONRESPONSE._serialized_end = 12095 - _FETCHERRORDETAILSREQUEST._serialized_start = 12098 - _FETCHERRORDETAILSREQUEST._serialized_end = 12299 - _FETCHERRORDETAILSRESPONSE._serialized_start = 12302 - _FETCHERRORDETAILSRESPONSE._serialized_end = 13857 - _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_start = 12531 - _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 12705 - _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_start = 12708 - _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_end = 13076 - _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_start = 13039 - _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_end = 13076 - _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_start = 13079 - _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_end = 13488 - _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_start = 13390 - _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_end = 13458 - _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 13491 - _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 13838 - _SPARKCONNECTSERVICE._serialized_start = 13860 - _SPARKCONNECTSERVICE._serialized_end = 14806 + _EXECUTEPLANRESPONSE._serialized_end = 7578 + _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 6492 + _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 6563 + _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 6565 + _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_end = 6683 + _EXECUTEPLANRESPONSE_METRICS._serialized_start = 6686 + _EXECUTEPLANRESPONSE_METRICS._serialized_end = 7203 + _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_start = 6781 + _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_end = 7113 + _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_start = 6990 + _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_end = 7113 + _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_start = 7115 + _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_end = 7203 + _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_start = 7205 + _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_end = 7321 + _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_start = 7323 + _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_end = 7339 + _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_start = 7342 + _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_end = 7561 + _KEYVALUE._serialized_start = 7580 + _KEYVALUE._serialized_end = 7645 + _CONFIGREQUEST._serialized_start = 7648 + _CONFIGREQUEST._serialized_end = 8676 + _CONFIGREQUEST_OPERATION._serialized_start = 7868 + _CONFIGREQUEST_OPERATION._serialized_end = 8366 + _CONFIGREQUEST_SET._serialized_start = 8368 + _CONFIGREQUEST_SET._serialized_end = 8420 + _CONFIGREQUEST_GET._serialized_start = 8422 + _CONFIGREQUEST_GET._serialized_end = 8447 + _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 8449 + _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 8512 + _CONFIGREQUEST_GETOPTION._serialized_start = 8514 + _CONFIGREQUEST_GETOPTION._serialized_end = 8545 + _CONFIGREQUEST_GETALL._serialized_start = 8547 + _CONFIGREQUEST_GETALL._serialized_end = 8595 + _CONFIGREQUEST_UNSET._serialized_start = 8597 + _CONFIGREQUEST_UNSET._serialized_end = 8624 + _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 8626 + _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 8660 + _CONFIGRESPONSE._serialized_start = 8679 + _CONFIGRESPONSE._serialized_end = 8854 + _ADDARTIFACTSREQUEST._serialized_start = 8857 + _ADDARTIFACTSREQUEST._serialized_end = 9728 + _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 9244 + _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 9297 + _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 9299 + _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 9410 + _ADDARTIFACTSREQUEST_BATCH._serialized_start = 9412 + _ADDARTIFACTSREQUEST_BATCH._serialized_end = 9505 + _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 9508 + _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 9701 + _ADDARTIFACTSRESPONSE._serialized_start = 9731 + _ADDARTIFACTSRESPONSE._serialized_end = 10003 + _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 9922 + _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 10003 + _ARTIFACTSTATUSESREQUEST._serialized_start = 10006 + _ARTIFACTSTATUSESREQUEST._serialized_end = 10201 + _ARTIFACTSTATUSESRESPONSE._serialized_start = 10204 + _ARTIFACTSTATUSESRESPONSE._serialized_end = 10556 + _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_start = 10399 + _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_end = 10514 + _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_start = 10516 + _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_end = 10556 + _INTERRUPTREQUEST._serialized_start = 10559 + _INTERRUPTREQUEST._serialized_end = 11031 + _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 10874 + _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 11002 + _INTERRUPTRESPONSE._serialized_start = 11034 + _INTERRUPTRESPONSE._serialized_end = 11178 + _REATTACHOPTIONS._serialized_start = 11180 + _REATTACHOPTIONS._serialized_end = 11233 + _REATTACHEXECUTEREQUEST._serialized_start = 11236 + _REATTACHEXECUTEREQUEST._serialized_end = 11511 + _RELEASEEXECUTEREQUEST._serialized_start = 11514 + _RELEASEEXECUTEREQUEST._serialized_end = 11968 + _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_start = 11880 + _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_end = 11892 + _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_start = 11894 + _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_end = 11941 + _RELEASEEXECUTERESPONSE._serialized_start = 11971 + _RELEASEEXECUTERESPONSE._serialized_end = 12136 + _RELEASESESSIONREQUEST._serialized_start = 12139 + _RELEASESESSIONREQUEST._serialized_end = 12310 + _RELEASESESSIONRESPONSE._serialized_start = 12312 + _RELEASESESSIONRESPONSE._serialized_end = 12420 + _FETCHERRORDETAILSREQUEST._serialized_start = 12423 + _FETCHERRORDETAILSREQUEST._serialized_end = 12624 + _FETCHERRORDETAILSRESPONSE._serialized_start = 12627 + _FETCHERRORDETAILSRESPONSE._serialized_end = 14182 + _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_start = 12856 + _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 13030 + _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_start = 13033 + _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_end = 13401 + _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_start = 13364 + _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_end = 13401 + _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_start = 13404 + _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_end = 13813 + _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_start = 13715 + _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_end = 13783 + _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 13816 + _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 14163 + _SPARKCONNECTSERVICE._serialized_start = 14185 + _SPARKCONNECTSERVICE._serialized_end = 15131 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi index e4ed03dc6945b..879ec7e88b2e4 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.pyi +++ b/python/pyspark/sql/connect/proto/base_pb2.pyi @@ -1178,7 +1178,7 @@ global___ExecutePlanRequest = ExecutePlanRequest class ExecutePlanResponse(google.protobuf.message.Message): """The response of a query, can be one or more for each request. Responses belonging to the same input query, carry the same `session_id`. - Next ID: 16 + Next ID: 17 """ DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -1400,6 +1400,46 @@ class ExecutePlanResponse(google.protobuf.message.Message): self, ) -> None: ... + class ExecutionProgress(google.protobuf.message.Message): + """This message is used to communicate progress about the query progress during the execution.""" + + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + NUM_TASKS_FIELD_NUMBER: builtins.int + NUM_COMPLETED_TASKS_FIELD_NUMBER: builtins.int + NUM_STAGES_FIELD_NUMBER: builtins.int + NUM_COMPLETED_STAGES_FIELD_NUMBER: builtins.int + INPUT_BYTES_READ_FIELD_NUMBER: builtins.int + num_tasks: builtins.int + num_completed_tasks: builtins.int + num_stages: builtins.int + num_completed_stages: builtins.int + input_bytes_read: builtins.int + def __init__( + self, + *, + num_tasks: builtins.int = ..., + num_completed_tasks: builtins.int = ..., + num_stages: builtins.int = ..., + num_completed_stages: builtins.int = ..., + input_bytes_read: builtins.int = ..., + ) -> None: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "input_bytes_read", + b"input_bytes_read", + "num_completed_stages", + b"num_completed_stages", + "num_completed_tasks", + b"num_completed_tasks", + "num_stages", + b"num_stages", + "num_tasks", + b"num_tasks", + ], + ) -> None: ... + SESSION_ID_FIELD_NUMBER: builtins.int SERVER_SIDE_SESSION_ID_FIELD_NUMBER: builtins.int OPERATION_ID_FIELD_NUMBER: builtins.int @@ -1411,6 +1451,7 @@ class ExecutePlanResponse(google.protobuf.message.Message): GET_RESOURCES_COMMAND_RESULT_FIELD_NUMBER: builtins.int STREAMING_QUERY_MANAGER_COMMAND_RESULT_FIELD_NUMBER: builtins.int RESULT_COMPLETE_FIELD_NUMBER: builtins.int + EXECUTION_PROGRESS_FIELD_NUMBER: builtins.int EXTENSION_FIELD_NUMBER: builtins.int METRICS_FIELD_NUMBER: builtins.int OBSERVED_METRICS_FIELD_NUMBER: builtins.int @@ -1459,6 +1500,9 @@ class ExecutePlanResponse(google.protobuf.message.Message): def result_complete(self) -> global___ExecutePlanResponse.ResultComplete: """Response type informing if the stream is complete in reattachable execution.""" @property + def execution_progress(self) -> global___ExecutePlanResponse.ExecutionProgress: + """(Optional) Intermediate query progress reports.""" + @property def extension(self) -> google.protobuf.any_pb2.Any: """Support arbitrary result objects.""" @property @@ -1494,6 +1538,7 @@ class ExecutePlanResponse(google.protobuf.message.Message): streaming_query_manager_command_result: pyspark.sql.connect.proto.commands_pb2.StreamingQueryManagerCommandResult | None = ..., result_complete: global___ExecutePlanResponse.ResultComplete | None = ..., + execution_progress: global___ExecutePlanResponse.ExecutionProgress | None = ..., extension: google.protobuf.any_pb2.Any | None = ..., metrics: global___ExecutePlanResponse.Metrics | None = ..., observed_metrics: collections.abc.Iterable[global___ExecutePlanResponse.ObservedMetrics] @@ -1505,6 +1550,8 @@ class ExecutePlanResponse(google.protobuf.message.Message): field_name: typing_extensions.Literal[ "arrow_batch", b"arrow_batch", + "execution_progress", + b"execution_progress", "extension", b"extension", "get_resources_command_result", @@ -1532,6 +1579,8 @@ class ExecutePlanResponse(google.protobuf.message.Message): field_name: typing_extensions.Literal[ "arrow_batch", b"arrow_batch", + "execution_progress", + b"execution_progress", "extension", b"extension", "get_resources_command_result", @@ -1575,6 +1624,7 @@ class ExecutePlanResponse(google.protobuf.message.Message): "get_resources_command_result", "streaming_query_manager_command_result", "result_complete", + "execution_progress", "extension", ] | None diff --git a/python/pyspark/sql/connect/shell/__init__.py b/python/pyspark/sql/connect/shell/__init__.py new file mode 100644 index 0000000000000..eeb74c5ea7300 --- /dev/null +++ b/python/pyspark/sql/connect/shell/__init__.py @@ -0,0 +1,28 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Helpers for integration with the IPython Shell""" + +import os + +from pyspark.sql.connect.dataframe import DataFrame + +PROGRESS_BAR_ENABLED = "SPARK_CONNECT_PROGRESS_BAR_ENABLED" + + +def progress_bar_enabled(): + return os.getenv(PROGRESS_BAR_ENABLED, "0") == "1" diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py new file mode 100644 index 0000000000000..359fc277b4200 --- /dev/null +++ b/python/pyspark/sql/connect/shell/progress.py @@ -0,0 +1,92 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Implementation of a progress bar that is displayed while a query is running.""" + +import os, time +import sys + +from IPython.utils.terminal import get_terminal_size +from pyspark.sql.connect.shell import progress_bar_enabled + + +class Progress: + """This is a small helper class to visualize a textual progress bar. The interface is very simple and assumes + that nothing else prints to the standard output.""" + + SI_BYTE_SIZES = (1 << 60, 1 << 50, 1 << 40, 1 << 30, 1 << 20, 1 << 10, 1) + SI_BYTE_SUFFIXES = ("EiB", "PiB", "TiB", "GiB", "MiB", "KiB", "B") + + def __init__(self, char="*", min_width=80, output=sys.stdout, enabled=False): + """ + Constructs a new Progress bar. The progress bar is typically used in the blocking query execution path + to process the execution progress methods from the server. + Parameters + ---------- + char str the Default character to be used for printing the bar. + min_width numeric The minimum width of the progress bar + output file The output device to write the progress bar to. + """ + self._ticks = 0 + self._tick = 0 + x, y = get_terminal_size() + self._min_width = min_width + self._char = char + self._width = max(min(min_width, x), self._min_width) + self._max_printed = 0 + self._started = time.time() + self._enabled = enabled or progress_bar_enabled() + self._bytes_read = 0 + self._out = output + + def update_ticks(self, ticks: int, current: int, bytes_read: int) -> None: + """This method is called from the execution to update the progress bar with a new total + tick counter and the current position. This is necessary in case new stages get added with + new tasks and so the total task number will be udpated as well.""" + if ticks > 0 and current != self._tick: + self._ticks = ticks + self._tick = current + self._bytes_read = bytes_read + if self._tick > 0: + self.output() + + def finish(self): + """Clear the last line""" + if self._enabled: + print("\r" + " " * self._max_printed, end="", flush=True, file=self._out) + print("\r", end="", flush=True, file=self._out) + + def output(self): + """Writes the progress bar out.""" + if self._enabled: + val = int((self._tick / float(self._ticks)) * self._width) + bar = self._char * val + "-" * (self._width - val) + percent_complete = (self._tick / self._ticks) * 100 + elapsed = int(time.time() - self._started) + scanned = self._bytes_to_string(self._bytes_read) + buffer = f"\r[{bar}] {percent_complete:.2f}% Complete ({elapsed}s, Scanned {scanned})" + self._max_printed = max(len(buffer), self._max_printed) + print(buffer, end="", flush=True, file=self._out) + + @staticmethod + def _bytes_to_string(size: int) -> str: + """Helper method to convert a numeric bytes value into a human readable representation""" + i = 0 + while i < len(Progress.SI_BYTE_SIZES) - 1 and size < 2 * Progress.SI_BYTE_SIZES[i]: + i += 1 + result = float(size) / Progress.SI_BYTE_SIZES[i] + return f"{result:.1f} {Progress.SI_BYTE_SUFFIXES[i]}" diff --git a/python/pyspark/sql/tests/connect/shell/__init__.py b/python/pyspark/sql/tests/connect/shell/__init__.py new file mode 100644 index 0000000000000..cce3acad34a49 --- /dev/null +++ b/python/pyspark/sql/tests/connect/shell/__init__.py @@ -0,0 +1,16 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pyspark/sql/tests/connect/shell/test_progress.py b/python/pyspark/sql/tests/connect/shell/test_progress.py new file mode 100644 index 0000000000000..c8909ab005859 --- /dev/null +++ b/python/pyspark/sql/tests/connect/shell/test_progress.py @@ -0,0 +1,76 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from io import StringIO +import unittest + +from pyspark.testing.connectutils import ( + PlanOnlyTestFixture, + should_test_connect, + connect_requirement_message, +) +from pyspark.testing.utils import PySparkErrorTestUtils +from pyspark.sql.connect.shell.progress import Progress + + +@unittest.skipIf(not should_test_connect, connect_requirement_message) +class ProgressBarTest(unittest.TestCase, PySparkErrorTestUtils): + def test_simple_progress(self): + buffer = StringIO() + p = Progress(output=buffer, enabled=True) + p.update_ticks(100, 50, 999) + val = buffer.getvalue() + self.assertIn("50.00%", val, "Current progress is 50%") + self.assertIn("****", val, "Should use the default char to print.") + self.assertIn("Scanned 999.0 B", val, "Should contain the bytes scanned metric.") + self.assertFalse(val.endswith("\r"), "Line should not be empty") + p.finish() + val = buffer.getvalue() + self.assertTrue(val.endswith("\r"), "Line should be empty") + + def test_configure_char(self): + buffer = StringIO() + p = Progress(char="+", output=buffer, enabled=True) + p.update_ticks(100, 50, 999) + val = buffer.getvalue() + self.assertIn("++++++", val, "Updating the char works.") + + def test_disabled_does_not_print(self): + buffer = StringIO() + p = Progress(char="+", output=buffer, enabled=False) + p.update_ticks(100, 50, 999) + p.update_ticks(100, 51, 999) + val = buffer.getvalue() + self.assertEqual(0, len(val), "If the printing is disabled, don't print.") + + def test_finish_progress(self): + buffer = StringIO() + p = Progress(char="+", output=buffer, enabled=True) + p.update_ticks(100, 50, 999) + p.finish() + self.assertTrue(buffer.getvalue().endswith("\r"), "Last line should be empty") + +if __name__ == "__main__": + from pyspark.sql.tests.connect.shell.test_progress import * # noqa: F401 + + try: + import xmlrunner # type: ignore + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2) \ No newline at end of file From 234b927ee0421a8c57aa2c644be14a5e28590f3d Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Sat, 17 Feb 2024 21:07:23 +0100 Subject: [PATCH 02/27] update --- .../sql/connect/client/SparkResult.scala | 12 ++--- .../ConnectProgressExecutionListener.scala | 28 +++++++--- .../execution/ExecuteGrpcResponseSender.scala | 24 ++++----- ...onnectProgressExecutionListenerSuite.scala | 54 +++++++++++++++---- 4 files changed, 81 insertions(+), 37 deletions(-) diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala index 32d1d256e6cdb..5f52c5c664fa4 100644 --- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala +++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala @@ -43,12 +43,12 @@ private[sql] class SparkResult[T]( /** * Progress of the query execution. This information can be accessed from the iterator. */ - case class Progress ( - totalTasks: Long = 0, - completedTasks: Long = 0, - totalStages: Long = 0, - completedStages: Long = 0, - inputBytesRead: Long = 0) + case class Progress( + totalTasks: Long = 0, + completedTasks: Long = 0, + totalStages: Long = 0, + completedStages: Long = 0, + inputBytesRead: Long = 0) var progress: Progress = new Progress() private[this] var opId: String = _ diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala index c3b38ca847d6f..02ff1ae295fb1 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala @@ -21,16 +21,17 @@ import org.apache.spark.internal.Logging import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd} /** - * A listener that tracks the execution of jobs and stages for a given set of tags. - * This is used to track the progress of a job that is being executed through the connect API. + * A listener that tracks the execution of jobs and stages for a given set of tags. This is used + * to track the progress of a job that is being executed through the connect API. * * The listener is instantiated once for the SparkConnectService and then used to track all the * current query executions. */ private[connect] class ConnectProgressExecutionListener extends SparkListener with Logging { + /** - * A tracker for a given tag. This is used to track the progress of an operation is being executed - * through the connect API. + * A tracker for a given tag. This is used to track the progress of an operation is being + * executed through the connect API. */ class ExecutionTracker(var tag: String) { private[ConnectProgressExecutionListener] var jobs: Set[Int] = Set() @@ -45,15 +46,15 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi @volatile private[ConnectProgressExecutionListener] var dirty = false /** - * Yield the current state of the tracker if it is dirty. A consumer of the tracker can provide - * a callback that will be called with the current state of the tracker if the tracker has new - * progress to report. + * Yield the current state of the tracker if it is dirty. A consumer of the tracker can + * provide a callback that will be called with the current state of the tracker if the tracker + * has new progress to report. * * If the tracker was marked as dirty, the state is reset after. */ def yieldWhenDirty(thunk: (Int, Int, Int, Int, Long) => Unit): Unit = { if (dirty) { - thunk(totalTasks, completedTasks, stages.size, totalTasks, inputBytesRead) + thunk(totalTasks, completedTasks, stages.size, completedStages, inputBytesRead) dirty = false } } @@ -65,6 +66,15 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi jobs = jobs + job.jobId stages = stages ++ job.stageIds totalTasks += job.stageInfos.map(_.numTasks).sum + dirty = true + } + + def jobCount(): Int = { + jobs.size + } + + def stageCount(): Int = { + stages.size } } @@ -97,6 +107,7 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi trackedTags.foreach({ case (tag, tracker) => if (tracker.stages.contains(stageCompleted.stageInfo.stageId)) { tracker.completedStages += 1 + tracker.dirty = true } }) } @@ -105,6 +116,7 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi trackedTags.foreach({ case (tag, tracker) => if (tracker.jobs.contains(jobEnd.jobId)) { tracker.jobs -= jobEnd.jobId + tracker.dirty = true } }) } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala index d2d55019e1783..ddce062008979 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala @@ -133,17 +133,18 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( } /** - * This method is called repeatedly during the query execution to enqueue a new message to be send - * to the client about the current query progress. The message is not directly send to the client, - * but rather enqueued to in the response observer. + * This method is called repeatedly during the query execution to enqueue a new message to be + * send to the client about the current query progress. The message is not directly send to the + * client, but rather enqueued to in the response observer. */ private def enqueueProgressMessage(): Unit = { SparkConnectService.executionListener.foreach { listener => if (listener.trackedTags.contains(executeHolder.jobTag)) { val tracker = listener.trackedTags(executeHolder.jobTag) // Only send progress message if there is something new to report. - tracker.yieldWhenDirty { (tasks, tasksCompleted, stages, stagesCompleted, inputBytesRead) => - val response = ExecutePlanResponse + tracker.yieldWhenDirty { + (tasks, tasksCompleted, stages, stagesCompleted, inputBytesRead) => + val response = ExecutePlanResponse .newBuilder() .setExecutionProgress( ExecutePlanResponse.ExecutionProgress @@ -152,13 +153,12 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( .setNumTasks(tasks) .setNumCompletedTasks(tasksCompleted) .setNumCompletedStages(stagesCompleted) - .setNumStages(stages) - ) - .build() - // There is a special case when the response observer has alreaady determined - // that the final message is send (and the stream will be closed) but we might want - // to send the progress message. In this case we ignore the result of the `onNext` call. - executeHolder.responseObserver.tryOnNext(response) + .setNumStages(stages)) + .build() + // There is a special case when the response observer has alreaady determined + // that the final message is send (and the stream will be closed) but we might want + // to send the progress message. In this case we ignore the result of the `onNext` call. + executeHolder.responseObserver.tryOnNext(response) } } } diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala index bc0eeb5d4043f..8bb35d781610a 100644 --- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala +++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala @@ -68,17 +68,20 @@ class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSu // Trigger the event listener.onJobStart(testJobStart) val t = listener.trackedTags(testTag) - assert(t.jobs.size === 1) - assert(t.jobs(testJobStart.jobId)) - assert(t.stages.size == 2) - assert(t.totalTasks == 2) + + t.yieldWhenDirty((totalTasks, completedTasks, totalStages, completedStages, bytesRead) => { + assert(totalTasks == 2) + assert(completedTasks == 0) + assert(totalStages == 2) + assert(completedStages == 0) + assert(bytesRead == 0) + }) } test("taskDone") { val listener = new ConnectProgressExecutionListener listener.registerJobTag(testTag) listener.onJobStart(testJobStart) - val t = listener.trackedTags(testTag) // Finish the tasks val taskEnd = SparkListenerTaskEnd( @@ -90,16 +93,45 @@ class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSu testStage1Task1ExecutorMetrics, testStage1Task1Metrics) - assert(t.completedTasks == 0) + val t = listener.trackedTags(testTag) + var yielded = false + t.yieldWhenDirty { (totalTasks, completedTasks, totalStages, completedStages, bytesRead) => + assert(totalTasks == 2) + assert(completedTasks == 0) + assert(totalStages == 2) + assert(completedStages == 0) + yielded = true + } + assert(yielded, "Must updated with results") + + yielded = false listener.onTaskEnd(taskEnd) - assert(t.inputBytesRead == 500) - assert(t.completedTasks == 1) - assert(t.completedStages == 0) + t.yieldWhenDirty { (totalTasks, completedTasks, totalStages, completedStages, bytesRead) => + assert(totalTasks == 2) + assert(completedTasks == 1) + assert(totalStages == 2) + assert(completedStages == 0) + assert(bytesRead == 500) + yielded = true + } + assert(yielded, "Must updated with results") + yielded = false + t.yieldWhenDirty { (totalTasks, completedTasks, totalStages, completedStages, bytesRead) => + yielded = true + } + assert(!yielded, "Must not update if not dirty") val stageEnd = SparkListenerStageCompleted(testStage1) listener.onStageCompleted(stageEnd) - assert(t.completedStages == 1) - + t.yieldWhenDirty { (totalTasks, completedTasks, totalStages, completedStages, bytesRead) => + assert(totalTasks == 2) + assert(completedTasks == 1) + assert(totalStages == 2) + assert(completedStages == 1) + assert(bytesRead == 500) + yielded = true + } + assert(yielded, "Must updated with results") } } From f78519e0ad131b9c5ae09ab7afeae538c557893d Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Sun, 18 Feb 2024 08:00:26 +0100 Subject: [PATCH 03/27] update --- python/pyspark/sql/connect/shell/progress.py | 9 ++++++++- python/pyspark/sql/tests/connect/shell/test_progress.py | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index 359fc277b4200..6d5992fb9173c 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -20,7 +20,14 @@ import os, time import sys -from IPython.utils.terminal import get_terminal_size +try: + from IPython.utils.terminal import get_terminal_size +except ImportError: + + def get_terminal_size(): + return (80, 25) + + from pyspark.sql.connect.shell import progress_bar_enabled diff --git a/python/pyspark/sql/tests/connect/shell/test_progress.py b/python/pyspark/sql/tests/connect/shell/test_progress.py index c8909ab005859..e3a29642fbd53 100644 --- a/python/pyspark/sql/tests/connect/shell/test_progress.py +++ b/python/pyspark/sql/tests/connect/shell/test_progress.py @@ -64,6 +64,7 @@ def test_finish_progress(self): p.finish() self.assertTrue(buffer.getvalue().endswith("\r"), "Last line should be empty") + if __name__ == "__main__": from pyspark.sql.tests.connect.shell.test_progress import * # noqa: F401 @@ -73,4 +74,4 @@ def test_finish_progress(self): testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) except ImportError: testRunner = None - unittest.main(testRunner=testRunner, verbosity=2) \ No newline at end of file + unittest.main(testRunner=testRunner, verbosity=2) From 962dfd4036f297554db5347d20e71b2b97e6c48b Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Sun, 18 Feb 2024 18:14:43 +0100 Subject: [PATCH 04/27] fix race condition --- .../execution/ConnectProgressExecutionListener.scala | 4 ++++ .../sql/connect/execution/ExecuteGrpcResponseSender.scala | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala index 02ff1ae295fb1..4231dd6294554 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala @@ -121,6 +121,10 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi }) } + def tryGetTracker(tag: String): Option[ExecutionTracker] = { + trackedTags.get(tag) + } + def registerJobTag(tag: String): Unit = { trackedTags += tag -> new ExecutionTracker(tag) } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala index ddce062008979..4c82521e5c5a3 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala @@ -139,8 +139,10 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( */ private def enqueueProgressMessage(): Unit = { SparkConnectService.executionListener.foreach { listener => - if (listener.trackedTags.contains(executeHolder.jobTag)) { - val tracker = listener.trackedTags(executeHolder.jobTag) + // It is possible, that the tracker is no longer available and in this + // case we simply ignore it and do not send any progress message. This avoids + // having to synchronize on the listener. + listener.tryGetTracker(executeHolder.jobTag).foreach { tracker => // Only send progress message if there is something new to report. tracker.yieldWhenDirty { (tasks, tasksCompleted, stages, stagesCompleted, inputBytesRead) => From 4947e79e461a840d782dc11e9adb22b26ab7e749 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Sun, 18 Feb 2024 21:39:17 +0100 Subject: [PATCH 05/27] fix lint --- .../main/scala/org/apache/spark/deploy/SparkSubmit.scala | 7 +++++-- python/pyspark/shell.py | 9 +++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 5e7dc799ab071..1b15c36ddd2c7 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -727,8 +727,11 @@ private[spark] class SparkSubmit extends Logging { } } - // In case of shells, spark.ui.showConsoleProgress can be true by default or by user. - if (isShell(args.primaryResource) && !sparkConf.contains(UI_SHOW_CONSOLE_PROGRESS)) { + // In case of shells, spark.ui.showConsoleProgress can be true by default or by user. Except, + // when Spark Connect is in local mode, because Spark Connect support its own progress + // reporting. + if (isShell(args.primaryResource) && !sparkConf.contains(UI_SHOW_CONSOLE_PROGRESS) && + !sparkConf.contains("spark.local.connect")) { sparkConf.set(UI_SHOW_CONSOLE_PROGRESS, true) } diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index 2d03e9fa7dc08..da0840a925355 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -57,15 +57,16 @@ os.environ[PROGRESS_BAR_ENABLED] = "1" else: val = os.getenv(PROGRESS_BAR_ENABLED) - if val.lower().strip() == 'false': + if val.lower().strip() == "false": os.environ[PROGRESS_BAR_ENABLED] = "0" - elif val.lower().strip() == 'true': + elif val.lower().strip() == "true": os.environ[PROGRESS_BAR_ENABLED] = "1" val = os.environ[PROGRESS_BAR_ENABLED] - if not val in ('1', '0'): + if not val in ("1", "0"): raise ValueError( - f"Environment variable '{PROGRESS_BAR_ENABLED}' must be set to either 1 or 0, found: {val}") + f"Environment variable '{PROGRESS_BAR_ENABLED}' must be set to either 1 or 0, found: {val}" + ) except Exception: import sys From 228717f39c92d60536f168fbd5b92ce6fb2e4d47 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Mon, 19 Feb 2024 11:03:06 +0100 Subject: [PATCH 06/27] lint --- python/pyspark/shell.py | 7 ++++--- python/pyspark/sql/connect/client/core.py | 3 ++- python/pyspark/sql/connect/shell/__init__.py | 2 -- python/pyspark/sql/connect/shell/progress.py | 12 +++++++----- .../pyspark/sql/tests/connect/shell/test_progress.py | 1 - 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index da0840a925355..a1ea99181c3ed 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -53,7 +53,7 @@ from pyspark.sql.connect.shell import PROGRESS_BAR_ENABLED # Check if th eprogress bar needs to be disabled. - if not PROGRESS_BAR_ENABLED in os.environ: + if PROGRESS_BAR_ENABLED not in os.environ: os.environ[PROGRESS_BAR_ENABLED] = "1" else: val = os.getenv(PROGRESS_BAR_ENABLED) @@ -63,9 +63,10 @@ os.environ[PROGRESS_BAR_ENABLED] = "1" val = os.environ[PROGRESS_BAR_ENABLED] - if not val in ("1", "0"): + if val not in ("1", "0"): raise ValueError( - f"Environment variable '{PROGRESS_BAR_ENABLED}' must be set to either 1 or 0, found: {val}" + f"Environment variable '{PROGRESS_BAR_ENABLED}' must " + f"be set to either 1 or 0, found: {val}" ) except Exception: diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py index 95064f6b03330..749473caf5f23 100644 --- a/python/pyspark/sql/connect/client/core.py +++ b/python/pyspark/sql/connect/client/core.py @@ -1310,8 +1310,9 @@ def handle_response( logger.debug(f"Interrupt request received for operation={req.operation_id}") try: self.interrupt_operation(req.operation_id) - except: + except Exception as e: # Swallow all errors if aborted. + logger.debug(f"Caught an error during interrupt handling, silenced: {e}") pass if not progress is None: progress.finish() diff --git a/python/pyspark/sql/connect/shell/__init__.py b/python/pyspark/sql/connect/shell/__init__.py index eeb74c5ea7300..f757f8af4175c 100644 --- a/python/pyspark/sql/connect/shell/__init__.py +++ b/python/pyspark/sql/connect/shell/__init__.py @@ -19,8 +19,6 @@ import os -from pyspark.sql.connect.dataframe import DataFrame - PROGRESS_BAR_ENABLED = "SPARK_CONNECT_PROGRESS_BAR_ENABLED" diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index 6d5992fb9173c..86828b7cffada 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -17,7 +17,7 @@ """Implementation of a progress bar that is displayed while a query is running.""" -import os, time +import time import sys try: @@ -32,16 +32,18 @@ def get_terminal_size(): class Progress: - """This is a small helper class to visualize a textual progress bar. The interface is very simple and assumes - that nothing else prints to the standard output.""" + """This is a small helper class to visualize a textual progress bar. + he interface is very simple and assumes that nothing else prints to the + standard output.""" SI_BYTE_SIZES = (1 << 60, 1 << 50, 1 << 40, 1 << 30, 1 << 20, 1 << 10, 1) SI_BYTE_SUFFIXES = ("EiB", "PiB", "TiB", "GiB", "MiB", "KiB", "B") def __init__(self, char="*", min_width=80, output=sys.stdout, enabled=False): """ - Constructs a new Progress bar. The progress bar is typically used in the blocking query execution path - to process the execution progress methods from the server. + Constructs a new Progress bar. The progress bar is typically used in + the blocking query execution path to process the execution progress + methods from the server. Parameters ---------- char str the Default character to be used for printing the bar. diff --git a/python/pyspark/sql/tests/connect/shell/test_progress.py b/python/pyspark/sql/tests/connect/shell/test_progress.py index e3a29642fbd53..55667a9adb08e 100644 --- a/python/pyspark/sql/tests/connect/shell/test_progress.py +++ b/python/pyspark/sql/tests/connect/shell/test_progress.py @@ -19,7 +19,6 @@ import unittest from pyspark.testing.connectutils import ( - PlanOnlyTestFixture, should_test_connect, connect_requirement_message, ) From 36d7924bfae5165f6aa301d4c755dee0ced6ff40 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Mon, 19 Feb 2024 13:22:47 +0100 Subject: [PATCH 07/27] lint --- python/pyspark/sql/connect/client/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py index 749473caf5f23..e89389cdfa74c 100644 --- a/python/pyspark/sql/connect/client/core.py +++ b/python/pyspark/sql/connect/client/core.py @@ -1314,7 +1314,7 @@ def handle_response( # Swallow all errors if aborted. logger.debug(f"Caught an error during interrupt handling, silenced: {e}") pass - if not progress is None: + if progress is not None: progress.finish() except Exception as error: self._handle_error(error) From dfb29e45b6122813a1e972ba415d9b006b20880b Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Wed, 21 Feb 2024 15:01:34 +0100 Subject: [PATCH 08/27] fix --- .../ConnectProgressExecutionListener.scala | 56 +++++++++++-------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala index 4231dd6294554..68c0a7d0f247e 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.connect.execution +import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicLong} + import org.apache.spark.internal.Logging import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd} @@ -34,16 +36,18 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi * executed through the connect API. */ class ExecutionTracker(var tag: String) { + // The set of jobs that are being tracked by this tracker. We always only add to this list + // but never remove. This is to avoid concurrency issues. private[ConnectProgressExecutionListener] var jobs: Set[Int] = Set() + // The set of stages that are being tracked by this tracker. We always only add to this list + // but never remove. This is to avoid concurrency issues. private[ConnectProgressExecutionListener] var stages: Set[Int] = Set() - private[ConnectProgressExecutionListener] var totalTasks = 0 - private[ConnectProgressExecutionListener] var completedTasks = 0 - private[ConnectProgressExecutionListener] var completedStages = 0 - private[ConnectProgressExecutionListener] var inputBytesRead = 0L - // The tracker is marked as dirty if it has new progress to report. This variable does - // not need to be protected by a mutex even if multiple threads would read the same dirty - // state the output is expected to be identical. - @volatile private[ConnectProgressExecutionListener] var dirty = false + private[ConnectProgressExecutionListener] val totalTasks = new AtomicInteger(0) + private[ConnectProgressExecutionListener] val completedTasks = new AtomicInteger(0) + private[ConnectProgressExecutionListener] val completedStages = new AtomicInteger(0) + private[ConnectProgressExecutionListener] val inputBytesRead = new AtomicLong(0) + // The tracker is marked as dirty if it has new progress to report. + private[ConnectProgressExecutionListener] val dirty = new AtomicBoolean(false) /** * Yield the current state of the tracker if it is dirty. A consumer of the tracker can @@ -53,20 +57,25 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi * If the tracker was marked as dirty, the state is reset after. */ def yieldWhenDirty(thunk: (Int, Int, Int, Int, Long) => Unit): Unit = { - if (dirty) { - thunk(totalTasks, completedTasks, stages.size, completedStages, inputBytesRead) - dirty = false + if (dirty.get()) { + thunk( + totalTasks.get(), + completedTasks.get(), + stages.size, + completedStages.get(), + inputBytesRead.get()) + dirty.set(false) } } /** * Add a job to the tracker. This will add the job to the list of jobs that are being tracked */ - def addJob(job: SparkListenerJobStart): Unit = { + def addJob(job: SparkListenerJobStart): Unit = synchronized { jobs = jobs + job.jobId stages = stages ++ job.stageIds - totalTasks += job.stageInfos.map(_.numTasks).sum - dirty = true + totalTasks.updateAndGet(_ + job.stageInfos.map(_.numTasks).sum) + dirty.set(true) } def jobCount(): Int = { @@ -78,15 +87,15 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi } } - val trackedTags = collection.mutable.Map[String, ExecutionTracker]() + val trackedTags = collection.concurrent.TrieMap[String, ExecutionTracker]() override def onJobStart(jobStart: SparkListenerJobStart): Unit = { val tags = jobStart.properties.getProperty("spark.job.tags") if (tags != null) { val thisJobTags = tags.split(",").map(_.trim).toSet thisJobTags.foreach { tag => - if (trackedTags.contains(tag)) { - trackedTags(tag).addJob(jobStart) + trackedTags.get(tag).foreach { tracker => + tracker.addJob(jobStart) } } } @@ -96,9 +105,9 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi // Check if the task belongs to a job that we are tracking. trackedTags.foreach({ case (tag, tracker) => if (tracker.stages.contains(taskEnd.stageId)) { - tracker.completedTasks += 1 - tracker.inputBytesRead += taskEnd.taskMetrics.inputMetrics.bytesRead - tracker.dirty = true + tracker.completedTasks.incrementAndGet() + tracker.inputBytesRead.updateAndGet(_ + taskEnd.taskMetrics.inputMetrics.bytesRead) + tracker.dirty.set(true) } }) } @@ -106,8 +115,8 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = { trackedTags.foreach({ case (tag, tracker) => if (tracker.stages.contains(stageCompleted.stageInfo.stageId)) { - tracker.completedStages += 1 - tracker.dirty = true + tracker.completedStages.incrementAndGet() + tracker.dirty.set(true) } }) } @@ -115,8 +124,7 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { trackedTags.foreach({ case (tag, tracker) => if (tracker.jobs.contains(jobEnd.jobId)) { - tracker.jobs -= jobEnd.jobId - tracker.dirty = true + tracker.dirty.set(true) } }) } From be08f539fddf1e2ea3c198f0659d34a9083fe3e6 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Wed, 20 Mar 2024 12:42:08 +0100 Subject: [PATCH 09/27] more progress stuff --- .../main/protobuf/spark/connect/base.proto | 1 + .../spark/sql/connect/config/Connect.scala | 7 + .../ConnectProgressExecutionListener.scala | 22 ++- .../execution/ExecuteGrpcResponseSender.scala | 9 +- ...onnectProgressExecutionListenerSuite.scala | 67 ++++--- python/pyspark/sql/connect/client/core.py | 29 ++- python/pyspark/sql/connect/proto/base_pb2.py | 174 +++++++++--------- python/pyspark/sql/connect/proto/base_pb2.pyi | 5 + python/pyspark/sql/connect/session.py | 7 + python/pyspark/sql/connect/shell/progress.py | 37 +++- .../sql/tests/connect/shell/test_progress.py | 30 ++- 11 files changed, 253 insertions(+), 135 deletions(-) diff --git a/connector/connect/common/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto index ff2537f2bda4f..80dfeb15e0e78 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto @@ -431,6 +431,7 @@ message ExecutePlanResponse { int64 num_stages = 3; int64 num_completed_stages = 4; int64 input_bytes_read = 5; + int64 num_inflight_tasks = 6; } } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala index 39bf1a630af62..74c8458019fce 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala @@ -265,4 +265,11 @@ object Connect { .version("4.0.0") .bytesConf(ByteUnit.BYTE) .createWithDefault(1024) + + val CONNECT_PROGRESS_REPORT_INTERVAL = + buildStaticConf("spark.connect.progress.reportInterval") + .doc("The interval at which the progress of a query is reported to the client.") + .version("4.0.0") + .timeConf(TimeUnit.MILLISECONDS) + .createWithDefaultString("2s") } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala index 68c0a7d0f247e..913b60b127cfa 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.connect.execution import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicLong} import org.apache.spark.internal.Logging -import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd} +import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart} /** * A listener that tracks the execution of jobs and stages for a given set of tags. This is used @@ -48,6 +48,8 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi private[ConnectProgressExecutionListener] val inputBytesRead = new AtomicLong(0) // The tracker is marked as dirty if it has new progress to report. private[ConnectProgressExecutionListener] val dirty = new AtomicBoolean(false) + // Tracks all currently running tasks for a particular tracker. + private[ConnectProgressExecutionListener] val inFlightTasks = new AtomicInteger(0) /** * Yield the current state of the tracker if it is dirty. A consumer of the tracker can @@ -56,13 +58,14 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi * * If the tracker was marked as dirty, the state is reset after. */ - def yieldWhenDirty(thunk: (Int, Int, Int, Int, Long) => Unit): Unit = { + def yieldWhenDirty(thunk: (Int, Int, Int, Int, Int, Long) => Unit): Unit = { if (dirty.get()) { thunk( totalTasks.get(), completedTasks.get(), stages.size, completedStages.get(), + inFlightTasks.get(), inputBytesRead.get()) dirty.set(false) } @@ -101,12 +104,27 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi } } + override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = { + // Check if the task belongs to a job that we are tracking. + trackedTags.foreach({ case (tag, tracker) => + if (tracker.stages.contains(taskStart.stageId)) { + tracker.inFlightTasks.incrementAndGet() + tracker.dirty.set(true) + } + }) + } + override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { // Check if the task belongs to a job that we are tracking. trackedTags.foreach({ case (tag, tracker) => if (tracker.stages.contains(taskEnd.stageId)) { tracker.completedTasks.incrementAndGet() tracker.inputBytesRead.updateAndGet(_ + taskEnd.taskMetrics.inputMetrics.bytesRead) + // This should never become negative, simply reset to zero if it does. + tracker.inFlightTasks.decrementAndGet() + if (tracker.inFlightTasks.get() < 0) { + tracker.inFlightTasks.set(0) + } tracker.dirty.set(true) } }) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala index 4c82521e5c5a3..553162f858474 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala @@ -24,7 +24,7 @@ import org.apache.spark.{SparkEnv, SparkSQLException} import org.apache.spark.connect.proto.ExecutePlanResponse import org.apache.spark.internal.Logging import org.apache.spark.sql.connect.common.ProtoUtils -import org.apache.spark.sql.connect.config.Connect.{CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_DURATION, CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_SIZE} +import org.apache.spark.sql.connect.config.Connect.{CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_DURATION, CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_SIZE, CONNECT_PROGRESS_REPORT_INTERVAL} import org.apache.spark.sql.connect.service.{ExecuteHolder, SparkConnectService} import org.apache.spark.sql.connect.utils.ErrorUtils @@ -145,7 +145,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( listener.tryGetTracker(executeHolder.jobTag).foreach { tracker => // Only send progress message if there is something new to report. tracker.yieldWhenDirty { - (tasks, tasksCompleted, stages, stagesCompleted, inputBytesRead) => + (tasks, tasksCompleted, stages, stagesCompleted, inflightTasks, inputBytesRead) => val response = ExecutePlanResponse .newBuilder() .setExecutionProgress( @@ -155,7 +155,8 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( .setNumTasks(tasks) .setNumCompletedTasks(tasksCompleted) .setNumCompletedStages(stagesCompleted) - .setNumStages(stages)) + .setNumStages(stages) + .setNumInflightTasks(inflightTasks)) .build() // There is a special case when the response observer has alreaady determined // that the final message is send (and the stream will be closed) but we might want @@ -238,7 +239,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( // monitor, and will notify upon state change. if (response.isEmpty) { // Wake up more frequently to send the progress updates. - val timeout = 2000 + val timeout = SparkEnv.get.conf.get(CONNECT_PROGRESS_REPORT_INTERVAL) logTrace(s"Wait for response to become available with timeout=$timeout ms.") executionObserver.responseLock.wait(timeout) enqueueProgressMessage() diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala index 8bb35d781610a..ee6b9664286d4 100644 --- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala +++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala @@ -24,7 +24,7 @@ import org.scalatestplus.mockito.MockitoSugar import org.apache.spark.{SparkFunSuite, Success} import org.apache.spark.executor.{ExecutorMetrics, InputMetrics, TaskMetrics} -import org.apache.spark.scheduler.{SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd, StageInfo, TaskInfo} +import org.apache.spark.scheduler.{SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart, StageInfo, TaskInfo} class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSugar { @@ -53,6 +53,7 @@ class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSu testProperties.setProperty("spark.job.tags", s"otherTag,$testTag,anotherTag") val testJobStart = SparkListenerJobStart(1, 1, Seq(testStage1, testStage2), testProperties) + val testTaskStart = SparkListenerTaskStart(1, 1, testStage1Task1) test("onJobStart with no matching tags") { val listener = new ConnectProgressExecutionListener @@ -69,13 +70,15 @@ class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSu listener.onJobStart(testJobStart) val t = listener.trackedTags(testTag) - t.yieldWhenDirty((totalTasks, completedTasks, totalStages, completedStages, bytesRead) => { - assert(totalTasks == 2) - assert(completedTasks == 0) - assert(totalStages == 2) - assert(completedStages == 0) - assert(bytesRead == 0) - }) + t.yieldWhenDirty( + (totalTasks, completedTasks, totalStages, completedStages, inflight, bytesRead) => { + assert(totalTasks == 2) + assert(completedTasks == 0) + assert(totalStages == 2) + assert(completedStages == 0) + assert(bytesRead == 0) + assert(inflight == 0) + }) } test("taskDone") { @@ -95,41 +98,45 @@ class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSu val t = listener.trackedTags(testTag) var yielded = false - t.yieldWhenDirty { (totalTasks, completedTasks, totalStages, completedStages, bytesRead) => - assert(totalTasks == 2) - assert(completedTasks == 0) - assert(totalStages == 2) - assert(completedStages == 0) - yielded = true + t.yieldWhenDirty { + (totalTasks, completedTasks, totalStages, completedStages, inflight, bytesRead) => + assert(totalTasks == 2) + assert(completedTasks == 0) + assert(totalStages == 2) + assert(completedStages == 0) + yielded = true } assert(yielded, "Must updated with results") yielded = false listener.onTaskEnd(taskEnd) - t.yieldWhenDirty { (totalTasks, completedTasks, totalStages, completedStages, bytesRead) => - assert(totalTasks == 2) - assert(completedTasks == 1) - assert(totalStages == 2) - assert(completedStages == 0) - assert(bytesRead == 500) - yielded = true + t.yieldWhenDirty { + (totalTasks, completedTasks, totalStages, completedStages, inflight, bytesRead) => + assert(totalTasks == 2) + assert(completedTasks == 1) + assert(totalStages == 2) + assert(completedStages == 0) + assert(bytesRead == 500) + yielded = true } assert(yielded, "Must updated with results") yielded = false - t.yieldWhenDirty { (totalTasks, completedTasks, totalStages, completedStages, bytesRead) => - yielded = true + t.yieldWhenDirty { + (totalTasks, completedTasks, totalStages, completedStages, inflight, bytesRead) => + yielded = true } assert(!yielded, "Must not update if not dirty") val stageEnd = SparkListenerStageCompleted(testStage1) listener.onStageCompleted(stageEnd) - t.yieldWhenDirty { (totalTasks, completedTasks, totalStages, completedStages, bytesRead) => - assert(totalTasks == 2) - assert(completedTasks == 1) - assert(totalStages == 2) - assert(completedStages == 1) - assert(bytesRead == 500) - yielded = true + t.yieldWhenDirty { + (totalTasks, completedTasks, totalStages, completedStages, inflight, bytesRead) => + assert(totalTasks == 2) + assert(completedTasks == 1) + assert(totalStages == 2) + assert(completedStages == 1) + assert(bytesRead == 500) + yielded = true } assert(yielded, "Must updated with results") } diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py index e89389cdfa74c..51a1cc78da770 100644 --- a/python/pyspark/sql/connect/client/core.py +++ b/python/pyspark/sql/connect/client/core.py @@ -44,6 +44,7 @@ cast, TYPE_CHECKING, Sequence, + Callable, ) import pandas as pd @@ -90,7 +91,7 @@ from pyspark.rdd import PythonEvalType from pyspark.storagelevel import StorageLevel from pyspark.errors import PySparkValueError, PySparkAssertionError, PySparkNotImplementedError -from pyspark.sql.connect.shell.progress import Progress +from pyspark.sql.connect.shell.progress import Progress, ProgressHandler if TYPE_CHECKING: from google.rpc.error_details_pb2 import ErrorInfo @@ -676,6 +677,29 @@ class ClientThreadLocals(threading.local): self._profiler_collector = ConnectProfilerCollector() + self._progress_handlers: Iterable[ProgressHandler] = [] + + def register_progress_handler(self, handler: Callable) -> None: + """ + Register a progress handler to be called when a progress message is received. + Parameters + ---------- + handler + + Returns + ------- + + """ + if handler in self._progress_handlers: + return + self._progress_handlers.append(handler) + + def clear_handlers(self) -> None: + self._progress_handlers.clear() + + def remove_progress_handler(self, handler: Callable) -> None: + self._progress_handlers.remove(handler) + def _retrying(self) -> "Retrying": return Retrying(self._retry_policies) @@ -1262,6 +1286,7 @@ def handle_response( b.execution_progress.num_tasks, b.execution_progress.num_completed_tasks, b.execution_progress.input_bytes_read, + b.execution_progress.num_inflight_tasks, ) if b.HasField("arrow_batch"): logger.debug( @@ -1339,7 +1364,7 @@ def _execute_and_fetch( schema: Optional[StructType] = None properties: Dict[str, Any] = {} - progress = Progress() + progress = Progress(handlers=self._progress_handlers) for response in self._execute_and_fetch_as_iterator(req, observations, progress=progress): if isinstance(response, StructType): schema = response diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py index e2ed91e5d0d02..fdf6115f408aa 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.py +++ b/python/pyspark/sql/connect/proto/base_pb2.py @@ -37,7 +37,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf5\x12\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\xce\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xa0\x04\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1a\xa5\x01\n\rRequestOption\x12K\n\x10reattach_options\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ReattachOptionsH\x00R\x0freattachOptions\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\xe0\x12\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12\x1f\n\x0bresponse_id\x18\r \x01(\tR\nresponseId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\\\n\x0fresult_complete\x18\x0e \x01(\x0b\x32\x31.spark.connect.ExecutePlanResponse.ResultCompleteH\x00R\x0eresultComplete\x12\x65\n\x12\x65xecution_progress\x18\x10 \x01(\x0b\x32\x34.spark.connect.ExecutePlanResponse.ExecutionProgressH\x00R\x11\x65xecutionProgress\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1av\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x12&\n\x0cstart_offset\x18\x03 \x01(\x03H\x00R\x0bstartOffset\x88\x01\x01\x42\x0f\n\r_start_offset\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1at\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x12\n\x04keys\x18\x03 \x03(\tR\x04keys\x1a\x10\n\x0eResultComplete\x1a\xdb\x01\n\x11\x45xecutionProgress\x12\x1b\n\tnum_tasks\x18\x01 \x01(\x03R\x08numTasks\x12.\n\x13num_completed_tasks\x18\x02 \x01(\x03R\x11numCompletedTasks\x12\x1d\n\nnum_stages\x18\x03 \x01(\x03R\tnumStages\x12\x30\n\x14num_completed_stages\x18\x04 \x01(\x03R\x12numCompletedStages\x12(\n\x10input_bytes_read\x18\x05 \x01(\x03R\x0einputBytesReadB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x84\x08\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"\xaf\x01\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x04 \x01(\tR\x13serverSideSessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xe7\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x01R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB\x0e\n\x0c_client_type"\x90\x02\n\x14\x41\x64\x64\x41rtifactsResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc3\x01\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB\x0e\n\x0c_client_type"\xe0\x02\n\x18\x41rtifactStatusesResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists"\xd8\x03\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"\x80\x01\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x1f\n\x1bINTERRUPT_TYPE_OPERATION_ID\x10\x03\x42\x0b\n\tinterruptB\x0e\n\x0c_client_type"\x90\x01\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds"5\n\x0fReattachOptions\x12"\n\x0creattachable\x18\x01 \x01(\x08R\x0creattachable"\x93\x02\n\x16ReattachExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x12-\n\x10last_response_id\x18\x05 \x01(\tH\x01R\x0elastResponseId\x88\x01\x01\x42\x0e\n\x0c_client_typeB\x13\n\x11_last_response_id"\xc6\x03\n\x15ReleaseExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12R\n\x0brelease_all\x18\x05 \x01(\x0b\x32/.spark.connect.ReleaseExecuteRequest.ReleaseAllH\x00R\nreleaseAll\x12X\n\rrelease_until\x18\x06 \x01(\x0b\x32\x31.spark.connect.ReleaseExecuteRequest.ReleaseUntilH\x00R\x0creleaseUntil\x1a\x0c\n\nReleaseAll\x1a/\n\x0cReleaseUntil\x12\x1f\n\x0bresponse_id\x18\x01 \x01(\tR\nresponseIdB\t\n\x07releaseB\x0e\n\x0c_client_type"\xa5\x01\n\x16ReleaseExecuteResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12&\n\x0coperation_id\x18\x02 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x42\x0f\n\r_operation_id"\xab\x01\n\x15ReleaseSessionRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"l\n\x16ReleaseSessionResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x02 \x01(\tR\x13serverSideSessionId"\xc9\x01\n\x18\x46\x65tchErrorDetailsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x19\n\x08\x65rror_id\x18\x03 \x01(\tR\x07\x65rrorId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"\x93\x0c\n\x19\x46\x65tchErrorDetailsResponse\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\x1d\n\nsession_id\x18\x04 \x01(\tR\tsessionId\x12)\n\x0eroot_error_idx\x18\x01 \x01(\x05H\x00R\x0crootErrorIdx\x88\x01\x01\x12\x46\n\x06\x65rrors\x18\x02 \x03(\x0b\x32..spark.connect.FetchErrorDetailsResponse.ErrorR\x06\x65rrors\x1a\xae\x01\n\x11StackTraceElement\x12\'\n\x0f\x64\x65\x63laring_class\x18\x01 \x01(\tR\x0e\x64\x65\x63laringClass\x12\x1f\n\x0bmethod_name\x18\x02 \x01(\tR\nmethodName\x12 \n\tfile_name\x18\x03 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12\x1f\n\x0bline_number\x18\x04 \x01(\x05R\nlineNumberB\x0c\n\n_file_name\x1a\xf0\x02\n\x0cQueryContext\x12\x64\n\x0c\x63ontext_type\x18\n \x01(\x0e\x32\x41.spark.connect.FetchErrorDetailsResponse.QueryContext.ContextTypeR\x0b\x63ontextType\x12\x1f\n\x0bobject_type\x18\x01 \x01(\tR\nobjectType\x12\x1f\n\x0bobject_name\x18\x02 \x01(\tR\nobjectName\x12\x1f\n\x0bstart_index\x18\x03 \x01(\x05R\nstartIndex\x12\x1d\n\nstop_index\x18\x04 \x01(\x05R\tstopIndex\x12\x1a\n\x08\x66ragment\x18\x05 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x06 \x01(\tR\x08\x63\x61llSite\x12\x18\n\x07summary\x18\x07 \x01(\tR\x07summary"%\n\x0b\x43ontextType\x12\x07\n\x03SQL\x10\x00\x12\r\n\tDATAFRAME\x10\x01\x1a\x99\x03\n\x0eSparkThrowable\x12$\n\x0b\x65rror_class\x18\x01 \x01(\tH\x00R\nerrorClass\x88\x01\x01\x12}\n\x12message_parameters\x18\x02 \x03(\x0b\x32N.spark.connect.FetchErrorDetailsResponse.SparkThrowable.MessageParametersEntryR\x11messageParameters\x12\\\n\x0equery_contexts\x18\x03 \x03(\x0b\x32\x35.spark.connect.FetchErrorDetailsResponse.QueryContextR\rqueryContexts\x12 \n\tsql_state\x18\x04 \x01(\tH\x01R\x08sqlState\x88\x01\x01\x1a\x44\n\x16MessageParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0e\n\x0c_error_classB\x0c\n\n_sql_state\x1a\xdb\x02\n\x05\x45rror\x12\x30\n\x14\x65rror_type_hierarchy\x18\x01 \x03(\tR\x12\x65rrorTypeHierarchy\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12[\n\x0bstack_trace\x18\x03 \x03(\x0b\x32:.spark.connect.FetchErrorDetailsResponse.StackTraceElementR\nstackTrace\x12 \n\tcause_idx\x18\x04 \x01(\x05H\x00R\x08\x63\x61useIdx\x88\x01\x01\x12\x65\n\x0fspark_throwable\x18\x05 \x01(\x0b\x32\x37.spark.connect.FetchErrorDetailsResponse.SparkThrowableH\x01R\x0esparkThrowable\x88\x01\x01\x42\x0c\n\n_cause_idxB\x12\n\x10_spark_throwableB\x11\n\x0f_root_error_idx2\xb2\x07\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x12`\n\x0fReattachExecute\x12%.spark.connect.ReattachExecuteRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12_\n\x0eReleaseExecute\x12$.spark.connect.ReleaseExecuteRequest\x1a%.spark.connect.ReleaseExecuteResponse"\x00\x12_\n\x0eReleaseSession\x12$.spark.connect.ReleaseSessionRequest\x1a%.spark.connect.ReleaseSessionResponse"\x00\x12h\n\x11\x46\x65tchErrorDetails\x12\'.spark.connect.FetchErrorDetailsRequest\x1a(.spark.connect.FetchErrorDetailsResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' + b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf5\x12\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\xce\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xa0\x04\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1a\xa5\x01\n\rRequestOption\x12K\n\x10reattach_options\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ReattachOptionsH\x00R\x0freattachOptions\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\x8e\x13\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12\x1f\n\x0bresponse_id\x18\r \x01(\tR\nresponseId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\\\n\x0fresult_complete\x18\x0e \x01(\x0b\x32\x31.spark.connect.ExecutePlanResponse.ResultCompleteH\x00R\x0eresultComplete\x12\x65\n\x12\x65xecution_progress\x18\x10 \x01(\x0b\x32\x34.spark.connect.ExecutePlanResponse.ExecutionProgressH\x00R\x11\x65xecutionProgress\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1av\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x12&\n\x0cstart_offset\x18\x03 \x01(\x03H\x00R\x0bstartOffset\x88\x01\x01\x42\x0f\n\r_start_offset\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1at\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x12\n\x04keys\x18\x03 \x03(\tR\x04keys\x1a\x10\n\x0eResultComplete\x1a\x89\x02\n\x11\x45xecutionProgress\x12\x1b\n\tnum_tasks\x18\x01 \x01(\x03R\x08numTasks\x12.\n\x13num_completed_tasks\x18\x02 \x01(\x03R\x11numCompletedTasks\x12\x1d\n\nnum_stages\x18\x03 \x01(\x03R\tnumStages\x12\x30\n\x14num_completed_stages\x18\x04 \x01(\x03R\x12numCompletedStages\x12(\n\x10input_bytes_read\x18\x05 \x01(\x03R\x0einputBytesRead\x12,\n\x12num_inflight_tasks\x18\x06 \x01(\x03R\x10numInflightTasksB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x84\x08\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"\xaf\x01\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x04 \x01(\tR\x13serverSideSessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xe7\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x01R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB\x0e\n\x0c_client_type"\x90\x02\n\x14\x41\x64\x64\x41rtifactsResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc3\x01\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB\x0e\n\x0c_client_type"\xe0\x02\n\x18\x41rtifactStatusesResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists"\xd8\x03\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"\x80\x01\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x1f\n\x1bINTERRUPT_TYPE_OPERATION_ID\x10\x03\x42\x0b\n\tinterruptB\x0e\n\x0c_client_type"\x90\x01\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds"5\n\x0fReattachOptions\x12"\n\x0creattachable\x18\x01 \x01(\x08R\x0creattachable"\x93\x02\n\x16ReattachExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x12-\n\x10last_response_id\x18\x05 \x01(\tH\x01R\x0elastResponseId\x88\x01\x01\x42\x0e\n\x0c_client_typeB\x13\n\x11_last_response_id"\xc6\x03\n\x15ReleaseExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12R\n\x0brelease_all\x18\x05 \x01(\x0b\x32/.spark.connect.ReleaseExecuteRequest.ReleaseAllH\x00R\nreleaseAll\x12X\n\rrelease_until\x18\x06 \x01(\x0b\x32\x31.spark.connect.ReleaseExecuteRequest.ReleaseUntilH\x00R\x0creleaseUntil\x1a\x0c\n\nReleaseAll\x1a/\n\x0cReleaseUntil\x12\x1f\n\x0bresponse_id\x18\x01 \x01(\tR\nresponseIdB\t\n\x07releaseB\x0e\n\x0c_client_type"\xa5\x01\n\x16ReleaseExecuteResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12&\n\x0coperation_id\x18\x02 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x42\x0f\n\r_operation_id"\xab\x01\n\x15ReleaseSessionRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"l\n\x16ReleaseSessionResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x02 \x01(\tR\x13serverSideSessionId"\xc9\x01\n\x18\x46\x65tchErrorDetailsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x19\n\x08\x65rror_id\x18\x03 \x01(\tR\x07\x65rrorId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"\x93\x0c\n\x19\x46\x65tchErrorDetailsResponse\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\x1d\n\nsession_id\x18\x04 \x01(\tR\tsessionId\x12)\n\x0eroot_error_idx\x18\x01 \x01(\x05H\x00R\x0crootErrorIdx\x88\x01\x01\x12\x46\n\x06\x65rrors\x18\x02 \x03(\x0b\x32..spark.connect.FetchErrorDetailsResponse.ErrorR\x06\x65rrors\x1a\xae\x01\n\x11StackTraceElement\x12\'\n\x0f\x64\x65\x63laring_class\x18\x01 \x01(\tR\x0e\x64\x65\x63laringClass\x12\x1f\n\x0bmethod_name\x18\x02 \x01(\tR\nmethodName\x12 \n\tfile_name\x18\x03 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12\x1f\n\x0bline_number\x18\x04 \x01(\x05R\nlineNumberB\x0c\n\n_file_name\x1a\xf0\x02\n\x0cQueryContext\x12\x64\n\x0c\x63ontext_type\x18\n \x01(\x0e\x32\x41.spark.connect.FetchErrorDetailsResponse.QueryContext.ContextTypeR\x0b\x63ontextType\x12\x1f\n\x0bobject_type\x18\x01 \x01(\tR\nobjectType\x12\x1f\n\x0bobject_name\x18\x02 \x01(\tR\nobjectName\x12\x1f\n\x0bstart_index\x18\x03 \x01(\x05R\nstartIndex\x12\x1d\n\nstop_index\x18\x04 \x01(\x05R\tstopIndex\x12\x1a\n\x08\x66ragment\x18\x05 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x06 \x01(\tR\x08\x63\x61llSite\x12\x18\n\x07summary\x18\x07 \x01(\tR\x07summary"%\n\x0b\x43ontextType\x12\x07\n\x03SQL\x10\x00\x12\r\n\tDATAFRAME\x10\x01\x1a\x99\x03\n\x0eSparkThrowable\x12$\n\x0b\x65rror_class\x18\x01 \x01(\tH\x00R\nerrorClass\x88\x01\x01\x12}\n\x12message_parameters\x18\x02 \x03(\x0b\x32N.spark.connect.FetchErrorDetailsResponse.SparkThrowable.MessageParametersEntryR\x11messageParameters\x12\\\n\x0equery_contexts\x18\x03 \x03(\x0b\x32\x35.spark.connect.FetchErrorDetailsResponse.QueryContextR\rqueryContexts\x12 \n\tsql_state\x18\x04 \x01(\tH\x01R\x08sqlState\x88\x01\x01\x1a\x44\n\x16MessageParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0e\n\x0c_error_classB\x0c\n\n_sql_state\x1a\xdb\x02\n\x05\x45rror\x12\x30\n\x14\x65rror_type_hierarchy\x18\x01 \x03(\tR\x12\x65rrorTypeHierarchy\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12[\n\x0bstack_trace\x18\x03 \x03(\x0b\x32:.spark.connect.FetchErrorDetailsResponse.StackTraceElementR\nstackTrace\x12 \n\tcause_idx\x18\x04 \x01(\x05H\x00R\x08\x63\x61useIdx\x88\x01\x01\x12\x65\n\x0fspark_throwable\x18\x05 \x01(\x0b\x32\x37.spark.connect.FetchErrorDetailsResponse.SparkThrowableH\x01R\x0esparkThrowable\x88\x01\x01\x42\x0c\n\n_cause_idxB\x12\n\x10_spark_throwableB\x11\n\x0f_root_error_idx2\xb2\x07\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x12`\n\x0fReattachExecute\x12%.spark.connect.ReattachExecuteRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12_\n\x0eReleaseExecute\x12$.spark.connect.ReleaseExecuteRequest\x1a%.spark.connect.ReleaseExecuteResponse"\x00\x12_\n\x0eReleaseSession\x12$.spark.connect.ReleaseSessionRequest\x1a%.spark.connect.ReleaseSessionResponse"\x00\x12h\n\x11\x46\x65tchErrorDetails\x12\'.spark.connect.FetchErrorDetailsRequest\x1a(.spark.connect.FetchErrorDetailsResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' ) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) @@ -120,7 +120,7 @@ _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_start = 4977 _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_end = 5142 _EXECUTEPLANRESPONSE._serialized_start = 5178 - _EXECUTEPLANRESPONSE._serialized_end = 7578 + _EXECUTEPLANRESPONSE._serialized_end = 7624 _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 6492 _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 6563 _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 6565 @@ -138,89 +138,89 @@ _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_start = 7323 _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_end = 7339 _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_start = 7342 - _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_end = 7561 - _KEYVALUE._serialized_start = 7580 - _KEYVALUE._serialized_end = 7645 - _CONFIGREQUEST._serialized_start = 7648 - _CONFIGREQUEST._serialized_end = 8676 - _CONFIGREQUEST_OPERATION._serialized_start = 7868 - _CONFIGREQUEST_OPERATION._serialized_end = 8366 - _CONFIGREQUEST_SET._serialized_start = 8368 - _CONFIGREQUEST_SET._serialized_end = 8420 - _CONFIGREQUEST_GET._serialized_start = 8422 - _CONFIGREQUEST_GET._serialized_end = 8447 - _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 8449 - _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 8512 - _CONFIGREQUEST_GETOPTION._serialized_start = 8514 - _CONFIGREQUEST_GETOPTION._serialized_end = 8545 - _CONFIGREQUEST_GETALL._serialized_start = 8547 - _CONFIGREQUEST_GETALL._serialized_end = 8595 - _CONFIGREQUEST_UNSET._serialized_start = 8597 - _CONFIGREQUEST_UNSET._serialized_end = 8624 - _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 8626 - _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 8660 - _CONFIGRESPONSE._serialized_start = 8679 - _CONFIGRESPONSE._serialized_end = 8854 - _ADDARTIFACTSREQUEST._serialized_start = 8857 - _ADDARTIFACTSREQUEST._serialized_end = 9728 - _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 9244 - _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 9297 - _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 9299 - _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 9410 - _ADDARTIFACTSREQUEST_BATCH._serialized_start = 9412 - _ADDARTIFACTSREQUEST_BATCH._serialized_end = 9505 - _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 9508 - _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 9701 - _ADDARTIFACTSRESPONSE._serialized_start = 9731 - _ADDARTIFACTSRESPONSE._serialized_end = 10003 - _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 9922 - _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 10003 - _ARTIFACTSTATUSESREQUEST._serialized_start = 10006 - _ARTIFACTSTATUSESREQUEST._serialized_end = 10201 - _ARTIFACTSTATUSESRESPONSE._serialized_start = 10204 - _ARTIFACTSTATUSESRESPONSE._serialized_end = 10556 - _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_start = 10399 - _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_end = 10514 - _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_start = 10516 - _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_end = 10556 - _INTERRUPTREQUEST._serialized_start = 10559 - _INTERRUPTREQUEST._serialized_end = 11031 - _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 10874 - _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 11002 - _INTERRUPTRESPONSE._serialized_start = 11034 - _INTERRUPTRESPONSE._serialized_end = 11178 - _REATTACHOPTIONS._serialized_start = 11180 - _REATTACHOPTIONS._serialized_end = 11233 - _REATTACHEXECUTEREQUEST._serialized_start = 11236 - _REATTACHEXECUTEREQUEST._serialized_end = 11511 - _RELEASEEXECUTEREQUEST._serialized_start = 11514 - _RELEASEEXECUTEREQUEST._serialized_end = 11968 - _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_start = 11880 - _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_end = 11892 - _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_start = 11894 - _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_end = 11941 - _RELEASEEXECUTERESPONSE._serialized_start = 11971 - _RELEASEEXECUTERESPONSE._serialized_end = 12136 - _RELEASESESSIONREQUEST._serialized_start = 12139 - _RELEASESESSIONREQUEST._serialized_end = 12310 - _RELEASESESSIONRESPONSE._serialized_start = 12312 - _RELEASESESSIONRESPONSE._serialized_end = 12420 - _FETCHERRORDETAILSREQUEST._serialized_start = 12423 - _FETCHERRORDETAILSREQUEST._serialized_end = 12624 - _FETCHERRORDETAILSRESPONSE._serialized_start = 12627 - _FETCHERRORDETAILSRESPONSE._serialized_end = 14182 - _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_start = 12856 - _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 13030 - _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_start = 13033 - _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_end = 13401 - _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_start = 13364 - _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_end = 13401 - _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_start = 13404 - _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_end = 13813 - _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_start = 13715 - _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_end = 13783 - _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 13816 - _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 14163 - _SPARKCONNECTSERVICE._serialized_start = 14185 - _SPARKCONNECTSERVICE._serialized_end = 15131 + _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_end = 7607 + _KEYVALUE._serialized_start = 7626 + _KEYVALUE._serialized_end = 7691 + _CONFIGREQUEST._serialized_start = 7694 + _CONFIGREQUEST._serialized_end = 8722 + _CONFIGREQUEST_OPERATION._serialized_start = 7914 + _CONFIGREQUEST_OPERATION._serialized_end = 8412 + _CONFIGREQUEST_SET._serialized_start = 8414 + _CONFIGREQUEST_SET._serialized_end = 8466 + _CONFIGREQUEST_GET._serialized_start = 8468 + _CONFIGREQUEST_GET._serialized_end = 8493 + _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 8495 + _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 8558 + _CONFIGREQUEST_GETOPTION._serialized_start = 8560 + _CONFIGREQUEST_GETOPTION._serialized_end = 8591 + _CONFIGREQUEST_GETALL._serialized_start = 8593 + _CONFIGREQUEST_GETALL._serialized_end = 8641 + _CONFIGREQUEST_UNSET._serialized_start = 8643 + _CONFIGREQUEST_UNSET._serialized_end = 8670 + _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 8672 + _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 8706 + _CONFIGRESPONSE._serialized_start = 8725 + _CONFIGRESPONSE._serialized_end = 8900 + _ADDARTIFACTSREQUEST._serialized_start = 8903 + _ADDARTIFACTSREQUEST._serialized_end = 9774 + _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 9290 + _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 9343 + _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 9345 + _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 9456 + _ADDARTIFACTSREQUEST_BATCH._serialized_start = 9458 + _ADDARTIFACTSREQUEST_BATCH._serialized_end = 9551 + _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 9554 + _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 9747 + _ADDARTIFACTSRESPONSE._serialized_start = 9777 + _ADDARTIFACTSRESPONSE._serialized_end = 10049 + _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 9968 + _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 10049 + _ARTIFACTSTATUSESREQUEST._serialized_start = 10052 + _ARTIFACTSTATUSESREQUEST._serialized_end = 10247 + _ARTIFACTSTATUSESRESPONSE._serialized_start = 10250 + _ARTIFACTSTATUSESRESPONSE._serialized_end = 10602 + _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_start = 10445 + _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_end = 10560 + _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_start = 10562 + _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_end = 10602 + _INTERRUPTREQUEST._serialized_start = 10605 + _INTERRUPTREQUEST._serialized_end = 11077 + _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 10920 + _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 11048 + _INTERRUPTRESPONSE._serialized_start = 11080 + _INTERRUPTRESPONSE._serialized_end = 11224 + _REATTACHOPTIONS._serialized_start = 11226 + _REATTACHOPTIONS._serialized_end = 11279 + _REATTACHEXECUTEREQUEST._serialized_start = 11282 + _REATTACHEXECUTEREQUEST._serialized_end = 11557 + _RELEASEEXECUTEREQUEST._serialized_start = 11560 + _RELEASEEXECUTEREQUEST._serialized_end = 12014 + _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_start = 11926 + _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_end = 11938 + _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_start = 11940 + _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_end = 11987 + _RELEASEEXECUTERESPONSE._serialized_start = 12017 + _RELEASEEXECUTERESPONSE._serialized_end = 12182 + _RELEASESESSIONREQUEST._serialized_start = 12185 + _RELEASESESSIONREQUEST._serialized_end = 12356 + _RELEASESESSIONRESPONSE._serialized_start = 12358 + _RELEASESESSIONRESPONSE._serialized_end = 12466 + _FETCHERRORDETAILSREQUEST._serialized_start = 12469 + _FETCHERRORDETAILSREQUEST._serialized_end = 12670 + _FETCHERRORDETAILSRESPONSE._serialized_start = 12673 + _FETCHERRORDETAILSRESPONSE._serialized_end = 14228 + _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_start = 12902 + _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 13076 + _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_start = 13079 + _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_end = 13447 + _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_start = 13410 + _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_end = 13447 + _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_start = 13450 + _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_end = 13859 + _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_start = 13761 + _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_end = 13829 + _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 13862 + _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 14209 + _SPARKCONNECTSERVICE._serialized_start = 14231 + _SPARKCONNECTSERVICE._serialized_end = 15177 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi index 879ec7e88b2e4..777bb7ee3e11b 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.pyi +++ b/python/pyspark/sql/connect/proto/base_pb2.pyi @@ -1410,11 +1410,13 @@ class ExecutePlanResponse(google.protobuf.message.Message): NUM_STAGES_FIELD_NUMBER: builtins.int NUM_COMPLETED_STAGES_FIELD_NUMBER: builtins.int INPUT_BYTES_READ_FIELD_NUMBER: builtins.int + NUM_INFLIGHT_TASKS_FIELD_NUMBER: builtins.int num_tasks: builtins.int num_completed_tasks: builtins.int num_stages: builtins.int num_completed_stages: builtins.int input_bytes_read: builtins.int + num_inflight_tasks: builtins.int def __init__( self, *, @@ -1423,6 +1425,7 @@ class ExecutePlanResponse(google.protobuf.message.Message): num_stages: builtins.int = ..., num_completed_stages: builtins.int = ..., input_bytes_read: builtins.int = ..., + num_inflight_tasks: builtins.int = ..., ) -> None: ... def ClearField( self, @@ -1433,6 +1436,8 @@ class ExecutePlanResponse(google.protobuf.message.Message): b"num_completed_stages", "num_completed_tasks", b"num_completed_tasks", + "num_inflight_tasks", + b"num_inflight_tasks", "num_stages", b"num_stages", "num_tasks", diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py index 9a678c28a6ccc..a83bda575ce3a 100644 --- a/python/pyspark/sql/connect/session.py +++ b/python/pyspark/sql/connect/session.py @@ -37,6 +37,7 @@ Iterable, TYPE_CHECKING, ClassVar, + Callable, ) import numpy as np @@ -317,6 +318,12 @@ def readStream(self) -> "DataStreamReader": readStream.__doc__ = PySparkSession.readStream.__doc__ + def register_progress_handler(self, handler: Callable) -> None: + """ + Register a progress handler to be called when a progress update is received from the server. + """ + self._client.register_progress_handler(handler) + def _inferSchemaFromList( self, data: Iterable[Any], names: Optional[List[str]] = None ) -> StructType: diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index 86828b7cffada..48d4948fe6221 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -16,9 +16,10 @@ # """Implementation of a progress bar that is displayed while a query is running.""" - +import abc import time import sys +from typing import Iterable, Callable try: from IPython.utils.terminal import get_terminal_size @@ -31,6 +32,14 @@ def get_terminal_size(): from pyspark.sql.connect.shell import progress_bar_enabled +class ProgressHandler(abc.ABC): + @abc.abstractmethod + def __call__( + self, total_tasks: int, tasks_completed: int, bytes_read: int, inflight_tasks: int + ) -> None: + pass + + class Progress: """This is a small helper class to visualize a textual progress bar. he interface is very simple and assumes that nothing else prints to the @@ -39,7 +48,14 @@ class Progress: SI_BYTE_SIZES = (1 << 60, 1 << 50, 1 << 40, 1 << 30, 1 << 20, 1 << 10, 1) SI_BYTE_SUFFIXES = ("EiB", "PiB", "TiB", "GiB", "MiB", "KiB", "B") - def __init__(self, char="*", min_width=80, output=sys.stdout, enabled=False): + def __init__( + self, + char="*", + min_width=80, + output=sys.stdout, + enabled=False, + handlers: Iterable[ProgressHandler] = [], + ): """ Constructs a new Progress bar. The progress bar is typically used in the blocking query execution path to process the execution progress @@ -61,17 +77,27 @@ def __init__(self, char="*", min_width=80, output=sys.stdout, enabled=False): self._enabled = enabled or progress_bar_enabled() self._bytes_read = 0 self._out = output + self._running = 0 + self._handlers = handlers - def update_ticks(self, ticks: int, current: int, bytes_read: int) -> None: + def update_ticks(self, ticks: int, current: int, bytes_read: int, inflight_tasks: int) -> None: """This method is called from the execution to update the progress bar with a new total tick counter and the current position. This is necessary in case new stages get added with - new tasks and so the total task number will be udpated as well.""" + new tasks and so the total task number will be updated as well.""" if ticks > 0 and current != self._tick: self._ticks = ticks self._tick = current self._bytes_read = bytes_read if self._tick > 0: self.output() + self._running = inflight_tasks + for handler in self._handlers: + handler( + total_tasks=ticks, + tasks_completed=current, + bytes_read=bytes_read, + inflight_tasks=inflight_tasks, + ) def finish(self): """Clear the last line""" @@ -87,7 +113,8 @@ def output(self): percent_complete = (self._tick / self._ticks) * 100 elapsed = int(time.time() - self._started) scanned = self._bytes_to_string(self._bytes_read) - buffer = f"\r[{bar}] {percent_complete:.2f}% Complete ({elapsed}s, Scanned {scanned})" + running = self._running + buffer = f"\r[{bar}] {percent_complete:.2f}% Complete ({running} Tasks running, {elapsed}s, Scanned {scanned})" self._max_printed = max(len(buffer), self._max_printed) print(buffer, end="", flush=True, file=self._out) diff --git a/python/pyspark/sql/tests/connect/shell/test_progress.py b/python/pyspark/sql/tests/connect/shell/test_progress.py index 55667a9adb08e..a3d641b01d095 100644 --- a/python/pyspark/sql/tests/connect/shell/test_progress.py +++ b/python/pyspark/sql/tests/connect/shell/test_progress.py @@ -31,7 +31,7 @@ class ProgressBarTest(unittest.TestCase, PySparkErrorTestUtils): def test_simple_progress(self): buffer = StringIO() p = Progress(output=buffer, enabled=True) - p.update_ticks(100, 50, 999) + p.update_ticks(100, 50, 999, 10) val = buffer.getvalue() self.assertIn("50.00%", val, "Current progress is 50%") self.assertIn("****", val, "Should use the default char to print.") @@ -44,22 +44,42 @@ def test_simple_progress(self): def test_configure_char(self): buffer = StringIO() p = Progress(char="+", output=buffer, enabled=True) - p.update_ticks(100, 50, 999) + p.update_ticks(100, 50, 999, 10) val = buffer.getvalue() self.assertIn("++++++", val, "Updating the char works.") def test_disabled_does_not_print(self): buffer = StringIO() p = Progress(char="+", output=buffer, enabled=False) - p.update_ticks(100, 50, 999) - p.update_ticks(100, 51, 999) + p.update_ticks(100, 50, 999, 10) + p.update_ticks(100, 51, 999, 10) val = buffer.getvalue() self.assertEqual(0, len(val), "If the printing is disabled, don't print.") def test_finish_progress(self): buffer = StringIO() p = Progress(char="+", output=buffer, enabled=True) - p.update_ticks(100, 50, 999) + p.update_ticks(100, 50, 999, 10) + p.finish() + self.assertTrue(buffer.getvalue().endswith("\r"), "Last line should be empty") + + def test_progress_handler(self): + handler_called = 0 + + def handler(**kwargs): + nonlocal handler_called + handler_called = 1 + self.assertEqual(100, kwargs["total_tasks"]) + self.assertEqual(50, kwargs["tasks_completed"]) + self.assertEqual(999, kwargs["bytes_read"]) + self.assertEqual(10, kwargs["inflight_tasks"]) + + buffer = StringIO() + p = Progress(char="+", output=buffer, enabled=True, handlers=[handler]) + p.update_ticks(100, 0, 0, 1) + p.update_ticks(100, 50, 999, 10) + self.assertIn("++++++", buffer.getvalue(), "Updating the char works.") + self.assertEqual(1, handler_called, "Handler should be called.") p.finish() self.assertTrue(buffer.getvalue().endswith("\r"), "Last line should be empty") From 1b1a61ad22a7ca06f1f4b812c5fd83478f11ba13 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Fri, 22 Mar 2024 20:43:35 +0100 Subject: [PATCH 10/27] fix --- python/pyspark/sql/connect/shell/progress.py | 33 +++++++++++++------ .../sql/tests/connect/shell/test_progress.py | 6 +++- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index 48d4948fe6221..3f37d0e573c07 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -35,7 +35,12 @@ def get_terminal_size(): class ProgressHandler(abc.ABC): @abc.abstractmethod def __call__( - self, total_tasks: int, tasks_completed: int, bytes_read: int, inflight_tasks: int + self, + total_tasks: int, + tasks_completed: int, + bytes_read: int, + inflight_tasks: int, + done: bool, ) -> None: pass @@ -80,6 +85,16 @@ def __init__( self._running = 0 self._handlers = handlers + def _notify(self, done: bool = False): + for handler in self._handlers: + handler( + total_tasks=self._ticks, + tasks_completed=self._tick, + bytes_read=self._bytes_read, + inflight_tasks=self._running, + done=done, + ) + def update_ticks(self, ticks: int, current: int, bytes_read: int, inflight_tasks: int) -> None: """This method is called from the execution to update the progress bar with a new total tick counter and the current position. This is necessary in case new stages get added with @@ -91,16 +106,11 @@ def update_ticks(self, ticks: int, current: int, bytes_read: int, inflight_tasks if self._tick > 0: self.output() self._running = inflight_tasks - for handler in self._handlers: - handler( - total_tasks=ticks, - tasks_completed=current, - bytes_read=bytes_read, - inflight_tasks=inflight_tasks, - ) + self._notify(False) def finish(self): """Clear the last line""" + self._notify(True) if self._enabled: print("\r" + " " * self._max_printed, end="", flush=True, file=self._out) print("\r", end="", flush=True, file=self._out) @@ -114,13 +124,16 @@ def output(self): elapsed = int(time.time() - self._started) scanned = self._bytes_to_string(self._bytes_read) running = self._running - buffer = f"\r[{bar}] {percent_complete:.2f}% Complete ({running} Tasks running, {elapsed}s, Scanned {scanned})" + buffer = ( + f"\r[{bar}] {percent_complete:.2f}% Complete " + f"({running} Tasks running, {elapsed}s, Scanned {scanned})" + ) self._max_printed = max(len(buffer), self._max_printed) print(buffer, end="", flush=True, file=self._out) @staticmethod def _bytes_to_string(size: int) -> str: - """Helper method to convert a numeric bytes value into a human readable representation""" + """Helper method to convert a numeric bytes value into a human-readable representation""" i = 0 while i < len(Progress.SI_BYTE_SIZES) - 1 and size < 2 * Progress.SI_BYTE_SIZES[i]: i += 1 diff --git a/python/pyspark/sql/tests/connect/shell/test_progress.py b/python/pyspark/sql/tests/connect/shell/test_progress.py index a3d641b01d095..8caadb70d79de 100644 --- a/python/pyspark/sql/tests/connect/shell/test_progress.py +++ b/python/pyspark/sql/tests/connect/shell/test_progress.py @@ -65,14 +65,16 @@ def test_finish_progress(self): def test_progress_handler(self): handler_called = 0 + done = False def handler(**kwargs): - nonlocal handler_called + nonlocal handler_called, done handler_called = 1 self.assertEqual(100, kwargs["total_tasks"]) self.assertEqual(50, kwargs["tasks_completed"]) self.assertEqual(999, kwargs["bytes_read"]) self.assertEqual(10, kwargs["inflight_tasks"]) + done = kwargs["done"] buffer = StringIO() p = Progress(char="+", output=buffer, enabled=True, handlers=[handler]) @@ -80,8 +82,10 @@ def handler(**kwargs): p.update_ticks(100, 50, 999, 10) self.assertIn("++++++", buffer.getvalue(), "Updating the char works.") self.assertEqual(1, handler_called, "Handler should be called.") + self.assertFalse(done, "Before finish, done should be False") p.finish() self.assertTrue(buffer.getvalue().endswith("\r"), "Last line should be empty") + self.assertTrue(done, "After finish, done should be True") if __name__ == "__main__": From aa924c0be5235557dad44bbb4359bf86e4de0b2c Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Fri, 22 Mar 2024 21:01:01 +0100 Subject: [PATCH 11/27] fix --- python/pyspark/sql/connect/client/core.py | 14 +++++---- python/pyspark/sql/connect/session.py | 29 ++++++++++++++++++- .../sql/tests/connect/test_connect_session.py | 21 ++++++++++++++ 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py index ed6d581645801..22664a09f17bb 100644 --- a/python/pyspark/sql/connect/client/core.py +++ b/python/pyspark/sql/connect/client/core.py @@ -699,20 +699,24 @@ def register_progress_handler(self, handler: Callable) -> None: Register a progress handler to be called when a progress message is received. Parameters ---------- - handler - - Returns - ------- + handler - The callable that will be called with the progress information. """ if handler in self._progress_handlers: return self._progress_handlers.append(handler) - def clear_handlers(self) -> None: + def clear_progress_handlers(self) -> None: self._progress_handlers.clear() def remove_progress_handler(self, handler: Callable) -> None: + """ + Remove a progress handler from the list of registered handlers. + Parameters + ---------- + handler - The callable to remove from the list of progress handlers. + + """ self._progress_handlers.remove(handler) def _retrying(self) -> "Retrying": diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py index a61e7ac1cd07e..547702f802c6f 100644 --- a/python/pyspark/sql/connect/session.py +++ b/python/pyspark/sql/connect/session.py @@ -319,12 +319,39 @@ def readStream(self) -> "DataStreamReader": readStream.__doc__ = PySparkSession.readStream.__doc__ - def register_progress_handler(self, handler: Callable) -> None: + def registerProgressHandler(self, handler: Callable) -> None: """ Register a progress handler to be called when a progress update is received from the server. + .. versionadded:: 4.0 + + Examples + -------- + >>> handler = lambda **kwargs: print(kwargs) + >>> spark.register_progress_handler(handler) """ self._client.register_progress_handler(handler) + def removeProgressHandler(self, handler: Callable) -> None: + """ + Remove a progress handler that was previously registered. + .. versionadded:: 4.0 + + Examples + -------- + >>> handler = lambda **kwargs: print(kwargs) + >>> spark.register_progress_handler(handler) + >>> spark.remove_progress_handler(handler) + """ + self._client.remove_progress_handler(handler) + + def clearProgressHandlers(self) -> None: + """ + Clear all registered progress handlers. + .. versionadded:: 4.0 + + """ + self._client.clear_progress_handlers() + def _inferSchemaFromList( self, data: Iterable[Any], names: Optional[List[str]] = None ) -> StructType: diff --git a/python/pyspark/sql/tests/connect/test_connect_session.py b/python/pyspark/sql/tests/connect/test_connect_session.py index bebe2cfc29233..b73a563409843 100644 --- a/python/pyspark/sql/tests/connect/test_connect_session.py +++ b/python/pyspark/sql/tests/connect/test_connect_session.py @@ -58,6 +58,27 @@ def setUp(self) -> None: def tearDown(self): self.spark.stop() + def test_progress_handler(self): + handler_called = [] + + def handler(**kwargs): + nonlocal handler_called + handler_called.append(kwargs) + + self.spark.registerProgressHandler(handler) + self.spark.sql("select 1").collect() + self.assertGreaterEqual(len(handler_called), 1) + + handler_called = [] + self.spark.removeProgressHandler(handler) + self.spark.sql("select 1").collect() + self.assertEqual(len(handler_called), 0) + + self.spark.registerProgressHandler(handler) + self.spark.clearProgressHandlers() + self.spark.sql("select 1").collect() + self.assertGreaterEqual(len(handler_called), 0) + def _check_no_active_session_error(self, e: PySparkException): self.check_error(exception=e, error_class="NO_ACTIVE_SESSION", message_parameters=dict()) From 7cedd98cc4acc4bc1e93d19d7404df1b3eb759a8 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Fri, 22 Mar 2024 21:03:48 +0100 Subject: [PATCH 12/27] doc --- python/pyspark/sql/connect/shell/progress.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index 3f37d0e573c07..7fc17abb28992 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -70,6 +70,8 @@ def __init__( char str the Default character to be used for printing the bar. min_width numeric The minimum width of the progress bar output file The output device to write the progress bar to. + enabled bool Whether the progress bar printing should be enabled or not. + handlers list A list of handlers that will be called when the progress bar is updated. """ self._ticks = 0 self._tick = 0 @@ -98,7 +100,16 @@ def _notify(self, done: bool = False): def update_ticks(self, ticks: int, current: int, bytes_read: int, inflight_tasks: int) -> None: """This method is called from the execution to update the progress bar with a new total tick counter and the current position. This is necessary in case new stages get added with - new tasks and so the total task number will be updated as well.""" + new tasks and so the total task number will be updated as well. + + Parameters + ========== + ticks int The total number of ticks to be processed + current int The current tick position + bytes_read int The number of bytes read + inflight_tasks int The number of tasks that are currently running + + """ if ticks > 0 and current != self._tick: self._ticks = ticks self._tick = current @@ -109,7 +120,7 @@ def update_ticks(self, ticks: int, current: int, bytes_read: int, inflight_tasks self._notify(False) def finish(self): - """Clear the last line""" + """Clear the last line. Called when the processing is done.""" self._notify(True) if self._enabled: print("\r" + " " * self._max_printed, end="", flush=True, file=self._out) From e2063f285f9d927fbef106cfe2992d244280a4e4 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Sat, 23 Mar 2024 17:06:25 +0100 Subject: [PATCH 13/27] fixing tests --- python/pyspark/sql/connect/session.py | 6 +++--- python/pyspark/sql/connect/shell/progress.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py index 547702f802c6f..5d22209f07e8f 100644 --- a/python/pyspark/sql/connect/session.py +++ b/python/pyspark/sql/connect/session.py @@ -327,7 +327,7 @@ def registerProgressHandler(self, handler: Callable) -> None: Examples -------- >>> handler = lambda **kwargs: print(kwargs) - >>> spark.register_progress_handler(handler) + >>> spark.registerProgressHandler(handler) """ self._client.register_progress_handler(handler) @@ -339,8 +339,8 @@ def removeProgressHandler(self, handler: Callable) -> None: Examples -------- >>> handler = lambda **kwargs: print(kwargs) - >>> spark.register_progress_handler(handler) - >>> spark.remove_progress_handler(handler) + >>> spark.registerProgressHandler(handler) + >>> spark.removeProgressHandler(handler) """ self._client.remove_progress_handler(handler) diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index 7fc17abb28992..fb09d2662141c 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -19,7 +19,7 @@ import abc import time import sys -from typing import Iterable, Callable +from typing import Iterable try: from IPython.utils.terminal import get_terminal_size From 84425c3f442e5e4e4dfb1524002e07f7df5ee753 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Mon, 25 Mar 2024 09:44:27 +0100 Subject: [PATCH 14/27] fixing lint --- python/pyspark/shell.py | 2 +- python/pyspark/sql/connect/client/core.py | 2 +- python/pyspark/sql/connect/shell/__init__.py | 2 +- python/pyspark/sql/connect/shell/progress.py | 21 ++++++++++---------- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index a1ea99181c3ed..90fbc8433268b 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -56,7 +56,7 @@ if PROGRESS_BAR_ENABLED not in os.environ: os.environ[PROGRESS_BAR_ENABLED] = "1" else: - val = os.getenv(PROGRESS_BAR_ENABLED) + val = os.getenv(PROGRESS_BAR_ENABLED, "false") if val.lower().strip() == "false": os.environ[PROGRESS_BAR_ENABLED] = "0" elif val.lower().strip() == "true": diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py index 22664a09f17bb..b6819df6d3654 100644 --- a/python/pyspark/sql/connect/client/core.py +++ b/python/pyspark/sql/connect/client/core.py @@ -692,7 +692,7 @@ class ClientThreadLocals(threading.local): self._profiler_collector = ConnectProfilerCollector() - self._progress_handlers: Iterable[ProgressHandler] = [] + self._progress_handlers: List[ProgressHandler] = [] def register_progress_handler(self, handler: Callable) -> None: """ diff --git a/python/pyspark/sql/connect/shell/__init__.py b/python/pyspark/sql/connect/shell/__init__.py index f757f8af4175c..b99733bffa0a0 100644 --- a/python/pyspark/sql/connect/shell/__init__.py +++ b/python/pyspark/sql/connect/shell/__init__.py @@ -22,5 +22,5 @@ PROGRESS_BAR_ENABLED = "SPARK_CONNECT_PROGRESS_BAR_ENABLED" -def progress_bar_enabled(): +def progress_bar_enabled() -> bool: return os.getenv(PROGRESS_BAR_ENABLED, "0") == "1" diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index fb09d2662141c..14ef76825b088 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -19,13 +19,14 @@ import abc import time import sys -from typing import Iterable +import typing +from typing import Iterable, Any try: from IPython.utils.terminal import get_terminal_size except ImportError: - def get_terminal_size(): + def get_terminal_size(x: Any = None, y: Any = None) -> tuple[int, int]: return (80, 25) @@ -55,12 +56,12 @@ class Progress: def __init__( self, - char="*", - min_width=80, - output=sys.stdout, - enabled=False, + char: str = "*", + min_width: int = 80, + output: typing.IO = sys.stdout, + enabled: bool = False, handlers: Iterable[ProgressHandler] = [], - ): + ) -> None: """ Constructs a new Progress bar. The progress bar is typically used in the blocking query execution path to process the execution progress @@ -87,7 +88,7 @@ def __init__( self._running = 0 self._handlers = handlers - def _notify(self, done: bool = False): + def _notify(self, done: bool = False) -> None: for handler in self._handlers: handler( total_tasks=self._ticks, @@ -119,14 +120,14 @@ def update_ticks(self, ticks: int, current: int, bytes_read: int, inflight_tasks self._running = inflight_tasks self._notify(False) - def finish(self): + def finish(self) -> None: """Clear the last line. Called when the processing is done.""" self._notify(True) if self._enabled: print("\r" + " " * self._max_printed, end="", flush=True, file=self._out) print("\r", end="", flush=True, file=self._out) - def output(self): + def output(self) -> None: """Writes the progress bar out.""" if self._enabled: val = int((self._tick / float(self._ticks)) * self._width) From 50e4cbde394071b1cbcb7df394b5e26eafaf1bde Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Wed, 27 Mar 2024 15:28:50 +0100 Subject: [PATCH 15/27] fixing lint --- python/pyspark/sql/connect/client/core.py | 5 ++--- python/pyspark/sql/connect/session.py | 6 ++++-- python/pyspark/sql/connect/shell/progress.py | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py index b6819df6d3654..46968ab1e383c 100644 --- a/python/pyspark/sql/connect/client/core.py +++ b/python/pyspark/sql/connect/client/core.py @@ -44,7 +44,6 @@ cast, TYPE_CHECKING, Sequence, - Callable, ) import pandas as pd @@ -694,7 +693,7 @@ class ClientThreadLocals(threading.local): self._progress_handlers: List[ProgressHandler] = [] - def register_progress_handler(self, handler: Callable) -> None: + def register_progress_handler(self, handler: ProgressHandler) -> None: """ Register a progress handler to be called when a progress message is received. Parameters @@ -709,7 +708,7 @@ def register_progress_handler(self, handler: Callable) -> None: def clear_progress_handlers(self) -> None: self._progress_handlers.clear() - def remove_progress_handler(self, handler: Callable) -> None: + def remove_progress_handler(self, handler: ProgressHandler) -> None: """ Remove a progress handler from the list of registered handlers. Parameters diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py index 5d22209f07e8f..fa29b6afe4f1d 100644 --- a/python/pyspark/sql/connect/session.py +++ b/python/pyspark/sql/connect/session.py @@ -326,7 +326,8 @@ def registerProgressHandler(self, handler: Callable) -> None: Examples -------- - >>> handler = lambda **kwargs: print(kwargs) + >>> import os + >>> handler = lambda **kwargs: os.write(str(kwargs)) >>> spark.registerProgressHandler(handler) """ self._client.register_progress_handler(handler) @@ -338,7 +339,8 @@ def removeProgressHandler(self, handler: Callable) -> None: Examples -------- - >>> handler = lambda **kwargs: print(kwargs) + >>> import os + >>> handler = lambda **kwargs: os.write(str(kwargs)) >>> spark.registerProgressHandler(handler) >>> spark.removeProgressHandler(handler) """ diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index 14ef76825b088..9e1080ce4befe 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -26,7 +26,7 @@ from IPython.utils.terminal import get_terminal_size except ImportError: - def get_terminal_size(x: Any = None, y: Any = None) -> tuple[int, int]: + def get_terminal_size(defaultx: Any = None, defaulty: Any = None) -> Any: return (80, 25) From 677e70b992f89aae70c2da6c1df68b3b2798488f Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Thu, 28 Mar 2024 08:17:32 +0100 Subject: [PATCH 16/27] refactoring to expose stage information --- .../main/protobuf/spark/connect/base.proto | 19 +- .../sql/connect/client/SparkResult.scala | 43 +++-- .../ConnectProgressExecutionListener.scala | 71 ++++--- .../execution/ExecuteGrpcResponseSender.scala | 33 ++-- ...onnectProgressExecutionListenerSuite.scala | 82 ++++---- python/pyspark/sql/connect/client/core.py | 10 +- python/pyspark/sql/connect/proto/base_pb2.py | 176 +++++++++--------- python/pyspark/sql/connect/proto/base_pb2.pyi | 79 +++++--- python/pyspark/sql/connect/shell/progress.py | 43 ++++- .../sql/tests/connect/shell/test_progress.py | 24 ++- 10 files changed, 348 insertions(+), 232 deletions(-) diff --git a/connector/connect/common/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto index c2a56146d5c84..1fd99663f9464 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto @@ -441,12 +441,19 @@ message ExecutePlanResponse { // This message is used to communicate progress about the query progress during the execution. message ExecutionProgress { - int64 num_tasks = 1; - int64 num_completed_tasks = 2; - int64 num_stages = 3; - int64 num_completed_stages = 4; - int64 input_bytes_read = 5; - int64 num_inflight_tasks = 6; + // Captures the progress of each individual stage. + repeated StageInfo stages = 1; + + // Captures the currently in progress tasks. + int64 num_inflight_tasks = 2; + + message StageInfo { + int64 stage_id = 1; + int64 num_tasks = 2; + int64 num_completed_tasks = 3; + int64 input_bytes_read = 4; + bool done = 5; + } } } diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala index 5f52c5c664fa4..93d1075aea025 100644 --- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala +++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala @@ -20,6 +20,7 @@ import java.lang.ref.Cleaner import java.util.Objects import scala.collection.mutable +import scala.jdk.CollectionConverters._ import org.apache.arrow.memory.BufferAllocator import org.apache.arrow.vector.ipc.message.{ArrowMessage, ArrowRecordBatch} @@ -40,17 +41,38 @@ private[sql] class SparkResult[T]( timeZoneId: String) extends AutoCloseable { self => + case class StageInfo( + stageId: Long, + numTasks: Long, + completedTasks: Long = 0, + inputBytesRead: Long = 0, + completed: Boolean = false) + + object StageInfo { + def apply(stageInfo: proto.ExecutePlanResponse.ExecutionProgress.StageInfo): StageInfo = { + StageInfo( + stageInfo.getStageId, + stageInfo.getNumTasks, + stageInfo.getNumCompletedTasks, + stageInfo.getInputBytesRead, + stageInfo.getDone) + } + } + + object Progress { + def apply(progress: proto.ExecutePlanResponse.ExecutionProgress): Progress = { + Progress( + progress.getStagesList.asScala.map(StageInfo(_)).toSeq, + progress.getNumInflightTasks) + } + } + /** * Progress of the query execution. This information can be accessed from the iterator. */ - case class Progress( - totalTasks: Long = 0, - completedTasks: Long = 0, - totalStages: Long = 0, - completedStages: Long = 0, - inputBytesRead: Long = 0) + case class Progress(stages: Seq[StageInfo], inflight: Long) - var progress: Progress = new Progress() + var progress: Progress = new Progress(Seq.empty, 0) private[this] var opId: String = _ private[this] var numRecords: Int = 0 private[this] var structType: StructType = _ @@ -111,12 +133,7 @@ private[sql] class SparkResult[T]( // Update the execution status. This information can now be accessed directly from // the iterator. if (response.hasExecutionProgress) { - progress = Progress( - response.getExecutionProgress.getNumTasks, - response.getExecutionProgress.getNumCompletedTasks, - response.getExecutionProgress.getNumStages, - response.getExecutionProgress.getNumCompletedStages, - response.getExecutionProgress.getInputBytesRead) + progress = Progress(response.getExecutionProgress) } if (response.hasSchema) { diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala index 913b60b127cfa..f48553a3f3d01 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala @@ -17,8 +17,9 @@ package org.apache.spark.sql.connect.execution -import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicLong} +import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger} +import org.apache.spark.connect.proto.ExecutePlanResponse import org.apache.spark.internal.Logging import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart} @@ -36,16 +37,39 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi * executed through the connect API. */ class ExecutionTracker(var tag: String) { + + class StageInfo( + val stageId: Int, + var numTasks: Int, + var completedTasks: Int = 0, + var inputBytesRead: Long = 0, + var completed: Boolean = false) { + + val lock = new Object + def update(i: StageInfo => Unit): Unit = { + lock.synchronized { + i(this) + } + } + + def toProto(): ExecutePlanResponse.ExecutionProgress.StageInfo = { + ExecutePlanResponse.ExecutionProgress.StageInfo + .newBuilder() + .setStageId(stageId) + .setNumTasks(numTasks) + .setNumCompletedTasks(completedTasks) + .setInputBytesRead(inputBytesRead) + .setDone(completed) + .build() + } + } + // The set of jobs that are being tracked by this tracker. We always only add to this list // but never remove. This is to avoid concurrency issues. private[ConnectProgressExecutionListener] var jobs: Set[Int] = Set() // The set of stages that are being tracked by this tracker. We always only add to this list // but never remove. This is to avoid concurrency issues. - private[ConnectProgressExecutionListener] var stages: Set[Int] = Set() - private[ConnectProgressExecutionListener] val totalTasks = new AtomicInteger(0) - private[ConnectProgressExecutionListener] val completedTasks = new AtomicInteger(0) - private[ConnectProgressExecutionListener] val completedStages = new AtomicInteger(0) - private[ConnectProgressExecutionListener] val inputBytesRead = new AtomicLong(0) + private[ConnectProgressExecutionListener] var stages: Map[Int, StageInfo] = Map.empty // The tracker is marked as dirty if it has new progress to report. private[ConnectProgressExecutionListener] val dirty = new AtomicBoolean(false) // Tracks all currently running tasks for a particular tracker. @@ -58,15 +82,9 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi * * If the tracker was marked as dirty, the state is reset after. */ - def yieldWhenDirty(thunk: (Int, Int, Int, Int, Int, Long) => Unit): Unit = { + def yieldWhenDirty(thunk: (Seq[StageInfo], Long) => Unit): Unit = { if (dirty.get()) { - thunk( - totalTasks.get(), - completedTasks.get(), - stages.size, - completedStages.get(), - inFlightTasks.get(), - inputBytesRead.get()) + thunk(stages.values.toSeq, inFlightTasks.get()) dirty.set(false) } } @@ -76,8 +94,9 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi */ def addJob(job: SparkListenerJobStart): Unit = synchronized { jobs = jobs + job.jobId - stages = stages ++ job.stageIds - totalTasks.updateAndGet(_ + job.stageInfos.map(_.numTasks).sum) + job.stageInfos.foreach { stage => + stages = stages + (stage.stageId -> new StageInfo(stage.stageId, stage.numTasks)) + } dirty.set(true) } @@ -106,7 +125,7 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = { // Check if the task belongs to a job that we are tracking. - trackedTags.foreach({ case (tag, tracker) => + trackedTags.foreach({ case (_, tracker) => if (tracker.stages.contains(taskStart.stageId)) { tracker.inFlightTasks.incrementAndGet() tracker.dirty.set(true) @@ -116,10 +135,14 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { // Check if the task belongs to a job that we are tracking. - trackedTags.foreach({ case (tag, tracker) => + trackedTags.foreach({ case (_, tracker) => if (tracker.stages.contains(taskEnd.stageId)) { - tracker.completedTasks.incrementAndGet() - tracker.inputBytesRead.updateAndGet(_ + taskEnd.taskMetrics.inputMetrics.bytesRead) + tracker.stages.get(taskEnd.stageId).foreach { stage => + stage.update { i => + i.completedTasks += 1 + i.inputBytesRead += taskEnd.taskMetrics.inputMetrics.bytesRead + } + } // This should never become negative, simply reset to zero if it does. tracker.inFlightTasks.decrementAndGet() if (tracker.inFlightTasks.get() < 0) { @@ -131,16 +154,18 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi } override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = { - trackedTags.foreach({ case (tag, tracker) => + trackedTags.foreach({ case (_, tracker) => if (tracker.stages.contains(stageCompleted.stageInfo.stageId)) { - tracker.completedStages.incrementAndGet() + tracker.stages(stageCompleted.stageInfo.stageId).update { stage => + stage.completed = true + } tracker.dirty.set(true) } }) } override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { - trackedTags.foreach({ case (tag, tracker) => + trackedTags.foreach({ case (_, tracker) => if (tracker.jobs.contains(jobEnd.jobId)) { tracker.dirty.set(true) } diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala index 553162f858474..d53e854219950 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.connect.execution +import scala.jdk.CollectionConverters._ + import com.google.protobuf.Message import io.grpc.stub.{ServerCallStreamObserver, StreamObserver} @@ -144,24 +146,19 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( // having to synchronize on the listener. listener.tryGetTracker(executeHolder.jobTag).foreach { tracker => // Only send progress message if there is something new to report. - tracker.yieldWhenDirty { - (tasks, tasksCompleted, stages, stagesCompleted, inflightTasks, inputBytesRead) => - val response = ExecutePlanResponse - .newBuilder() - .setExecutionProgress( - ExecutePlanResponse.ExecutionProgress - .newBuilder() - .setInputBytesRead(inputBytesRead) - .setNumTasks(tasks) - .setNumCompletedTasks(tasksCompleted) - .setNumCompletedStages(stagesCompleted) - .setNumStages(stages) - .setNumInflightTasks(inflightTasks)) - .build() - // There is a special case when the response observer has alreaady determined - // that the final message is send (and the stream will be closed) but we might want - // to send the progress message. In this case we ignore the result of the `onNext` call. - executeHolder.responseObserver.tryOnNext(response) + tracker.yieldWhenDirty { (stages, inflightTasks) => + val response = ExecutePlanResponse + .newBuilder() + .setExecutionProgress( + ExecutePlanResponse.ExecutionProgress + .newBuilder() + .addAllStages(stages.map(_.toProto()).asJava) + .setNumInflightTasks(inflightTasks)) + .build() + // There is a special case when the response observer has alreaady determined + // that the final message is send (and the stream will be closed) but we might want + // to send the progress message. In this case we ignore the result of the `onNext` call. + executeHolder.responseObserver.tryOnNext(response) } } } diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala index ee6b9664286d4..43e978a18f1f9 100644 --- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala +++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala @@ -70,15 +70,13 @@ class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSu listener.onJobStart(testJobStart) val t = listener.trackedTags(testTag) - t.yieldWhenDirty( - (totalTasks, completedTasks, totalStages, completedStages, inflight, bytesRead) => { - assert(totalTasks == 2) - assert(completedTasks == 0) - assert(totalStages == 2) - assert(completedStages == 0) - assert(bytesRead == 0) - assert(inflight == 0) - }) + t.yieldWhenDirty((stages, inflight) => { + assert(stages.map(_.numTasks).sum == 2) + assert(stages.map(_.completedTasks).sum == 0) + assert(stages.size == 2) + assert(stages.map(_.inputBytesRead).sum == 0) + assert(inflight == 0) + }) } test("taskDone") { @@ -98,45 +96,59 @@ class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSu val t = listener.trackedTags(testTag) var yielded = false - t.yieldWhenDirty { - (totalTasks, completedTasks, totalStages, completedStages, inflight, bytesRead) => - assert(totalTasks == 2) - assert(completedTasks == 0) - assert(totalStages == 2) - assert(completedStages == 0) - yielded = true + t.yieldWhenDirty { (stages, inflight) => + assert(stages.map(_.numTasks).sum == 2) + assert(stages.map(_.completedTasks).sum == 0) + assert(stages.size == 2) + assert( + stages + .map(_.completed match { + case true => 1 + case false => 0 + }) + .sum == 0) + yielded = true } assert(yielded, "Must updated with results") yielded = false listener.onTaskEnd(taskEnd) - t.yieldWhenDirty { - (totalTasks, completedTasks, totalStages, completedStages, inflight, bytesRead) => - assert(totalTasks == 2) - assert(completedTasks == 1) - assert(totalStages == 2) - assert(completedStages == 0) - assert(bytesRead == 500) - yielded = true + t.yieldWhenDirty { (stages, inflight) => + assert(stages.map(_.numTasks).sum == 2) + assert(stages.map(_.completedTasks).sum == 1) + assert(stages.size == 2) + assert(stages.map(_.inputBytesRead).sum == 500) + assert( + stages + .map(_.completed match { + case true => 1 + case false => 0 + }) + .sum == 0) + yielded = true } assert(yielded, "Must updated with results") yielded = false - t.yieldWhenDirty { - (totalTasks, completedTasks, totalStages, completedStages, inflight, bytesRead) => - yielded = true + t.yieldWhenDirty { (stages, inflight) => + yielded = true } assert(!yielded, "Must not update if not dirty") val stageEnd = SparkListenerStageCompleted(testStage1) listener.onStageCompleted(stageEnd) - t.yieldWhenDirty { - (totalTasks, completedTasks, totalStages, completedStages, inflight, bytesRead) => - assert(totalTasks == 2) - assert(completedTasks == 1) - assert(totalStages == 2) - assert(completedStages == 1) - assert(bytesRead == 500) - yielded = true + t.yieldWhenDirty { (stages, inflight) => + assert(stages.map(_.numTasks).sum == 2) + assert(stages.map(_.completedTasks).sum == 1) + assert(stages.size == 2) + assert(stages.map(_.inputBytesRead).sum == 500) + assert( + stages + .map(_.completed match { + case true => 1 + case false => 0 + }) + .sum == 1) + yielded = true } assert(yielded, "Must updated with results") } diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py index 46968ab1e383c..f0b4766e5b9c0 100644 --- a/python/pyspark/sql/connect/client/core.py +++ b/python/pyspark/sql/connect/client/core.py @@ -90,7 +90,7 @@ from pyspark.rdd import PythonEvalType from pyspark.storagelevel import StorageLevel from pyspark.errors import PySparkValueError, PySparkAssertionError, PySparkNotImplementedError -from pyspark.sql.connect.shell.progress import Progress, ProgressHandler +from pyspark.sql.connect.shell.progress import Progress, ProgressHandler, from_proto if TYPE_CHECKING: from google.rpc.error_details_pb2 import ErrorInfo @@ -1304,12 +1304,8 @@ def handle_response( yield b.extension if b.HasField("execution_progress"): if progress: - progress.update_ticks( - b.execution_progress.num_tasks, - b.execution_progress.num_completed_tasks, - b.execution_progress.input_bytes_read, - b.execution_progress.num_inflight_tasks, - ) + p = from_proto(b.execution_progress) + progress.update_ticks(*p) if b.HasField("arrow_batch"): logger.debug( f"Received arrow batch rows={b.arrow_batch.row_count} " diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py index 50463ab25e0af..39f38c7c9b9af 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.py +++ b/python/pyspark/sql/connect/proto/base_pb2.py @@ -37,7 +37,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf8\x13\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x11 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xce\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xa3\x05\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x01R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1a\xa5\x01\n\rRequestOption\x12K\n\x10reattach_options\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ReattachOptionsH\x00R\x0freattachOptions\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB)\n\'_client_observed_server_side_session_idB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\x98\x14\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12\x1f\n\x0bresponse_id\x18\r \x01(\tR\nresponseId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\x87\x01\n&streaming_query_listener_events_result\x18\x10 \x01(\x0b\x32\x31.spark.connect.StreamingQueryListenerEventsResultH\x00R"streamingQueryListenerEventsResult\x12\\\n\x0fresult_complete\x18\x0e \x01(\x0b\x32\x31.spark.connect.ExecutePlanResponse.ResultCompleteH\x00R\x0eresultComplete\x12\x65\n\x12\x65xecution_progress\x18\x11 \x01(\x0b\x32\x34.spark.connect.ExecutePlanResponse.ExecutionProgressH\x00R\x11\x65xecutionProgress\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1av\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x12&\n\x0cstart_offset\x18\x03 \x01(\x03H\x00R\x0bstartOffset\x88\x01\x01\x42\x0f\n\r_start_offset\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1at\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x12\n\x04keys\x18\x03 \x03(\tR\x04keys\x1a\x10\n\x0eResultComplete\x1a\x89\x02\n\x11\x45xecutionProgress\x12\x1b\n\tnum_tasks\x18\x01 \x01(\x03R\x08numTasks\x12.\n\x13num_completed_tasks\x18\x02 \x01(\x03R\x11numCompletedTasks\x12\x1d\n\nnum_stages\x18\x03 \x01(\x03R\tnumStages\x12\x30\n\x14num_completed_stages\x18\x04 \x01(\x03R\x12numCompletedStages\x12(\n\x10input_bytes_read\x18\x05 \x01(\x03R\x0einputBytesRead\x12,\n\x12num_inflight_tasks\x18\x06 \x01(\x03R\x10numInflightTasksB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x87\t\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xaf\x01\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x04 \x01(\tR\x13serverSideSessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xea\x07\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x02R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x02\n\x14\x41\x64\x64\x41rtifactsResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc6\x02\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xe0\x02\n\x18\x41rtifactStatusesResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists"\xdb\x04\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"\x80\x01\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x1f\n\x1bINTERRUPT_TYPE_OPERATION_ID\x10\x03\x42\x0b\n\tinterruptB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x01\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds"5\n\x0fReattachOptions\x12"\n\x0creattachable\x18\x01 \x01(\x08R\x0creattachable"\x96\x03\n\x16ReattachExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x06 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12-\n\x10last_response_id\x18\x05 \x01(\tH\x02R\x0elastResponseId\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_typeB\x13\n\x11_last_response_id"\xc9\x04\n\x15ReleaseExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12R\n\x0brelease_all\x18\x05 \x01(\x0b\x32/.spark.connect.ReleaseExecuteRequest.ReleaseAllH\x00R\nreleaseAll\x12X\n\rrelease_until\x18\x06 \x01(\x0b\x32\x31.spark.connect.ReleaseExecuteRequest.ReleaseUntilH\x00R\x0creleaseUntil\x1a\x0c\n\nReleaseAll\x1a/\n\x0cReleaseUntil\x12\x1f\n\x0bresponse_id\x18\x01 \x01(\tR\nresponseIdB\t\n\x07releaseB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xa5\x01\n\x16ReleaseExecuteResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12&\n\x0coperation_id\x18\x02 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x42\x0f\n\r_operation_id"\xab\x01\n\x15ReleaseSessionRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"l\n\x16ReleaseSessionResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x02 \x01(\tR\x13serverSideSessionId"\xcc\x02\n\x18\x46\x65tchErrorDetailsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x19\n\x08\x65rror_id\x18\x03 \x01(\tR\x07\x65rrorId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x93\x0c\n\x19\x46\x65tchErrorDetailsResponse\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\x1d\n\nsession_id\x18\x04 \x01(\tR\tsessionId\x12)\n\x0eroot_error_idx\x18\x01 \x01(\x05H\x00R\x0crootErrorIdx\x88\x01\x01\x12\x46\n\x06\x65rrors\x18\x02 \x03(\x0b\x32..spark.connect.FetchErrorDetailsResponse.ErrorR\x06\x65rrors\x1a\xae\x01\n\x11StackTraceElement\x12\'\n\x0f\x64\x65\x63laring_class\x18\x01 \x01(\tR\x0e\x64\x65\x63laringClass\x12\x1f\n\x0bmethod_name\x18\x02 \x01(\tR\nmethodName\x12 \n\tfile_name\x18\x03 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12\x1f\n\x0bline_number\x18\x04 \x01(\x05R\nlineNumberB\x0c\n\n_file_name\x1a\xf0\x02\n\x0cQueryContext\x12\x64\n\x0c\x63ontext_type\x18\n \x01(\x0e\x32\x41.spark.connect.FetchErrorDetailsResponse.QueryContext.ContextTypeR\x0b\x63ontextType\x12\x1f\n\x0bobject_type\x18\x01 \x01(\tR\nobjectType\x12\x1f\n\x0bobject_name\x18\x02 \x01(\tR\nobjectName\x12\x1f\n\x0bstart_index\x18\x03 \x01(\x05R\nstartIndex\x12\x1d\n\nstop_index\x18\x04 \x01(\x05R\tstopIndex\x12\x1a\n\x08\x66ragment\x18\x05 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x06 \x01(\tR\x08\x63\x61llSite\x12\x18\n\x07summary\x18\x07 \x01(\tR\x07summary"%\n\x0b\x43ontextType\x12\x07\n\x03SQL\x10\x00\x12\r\n\tDATAFRAME\x10\x01\x1a\x99\x03\n\x0eSparkThrowable\x12$\n\x0b\x65rror_class\x18\x01 \x01(\tH\x00R\nerrorClass\x88\x01\x01\x12}\n\x12message_parameters\x18\x02 \x03(\x0b\x32N.spark.connect.FetchErrorDetailsResponse.SparkThrowable.MessageParametersEntryR\x11messageParameters\x12\\\n\x0equery_contexts\x18\x03 \x03(\x0b\x32\x35.spark.connect.FetchErrorDetailsResponse.QueryContextR\rqueryContexts\x12 \n\tsql_state\x18\x04 \x01(\tH\x01R\x08sqlState\x88\x01\x01\x1a\x44\n\x16MessageParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0e\n\x0c_error_classB\x0c\n\n_sql_state\x1a\xdb\x02\n\x05\x45rror\x12\x30\n\x14\x65rror_type_hierarchy\x18\x01 \x03(\tR\x12\x65rrorTypeHierarchy\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12[\n\x0bstack_trace\x18\x03 \x03(\x0b\x32:.spark.connect.FetchErrorDetailsResponse.StackTraceElementR\nstackTrace\x12 \n\tcause_idx\x18\x04 \x01(\x05H\x00R\x08\x63\x61useIdx\x88\x01\x01\x12\x65\n\x0fspark_throwable\x18\x05 \x01(\x0b\x32\x37.spark.connect.FetchErrorDetailsResponse.SparkThrowableH\x01R\x0esparkThrowable\x88\x01\x01\x42\x0c\n\n_cause_idxB\x12\n\x10_spark_throwableB\x11\n\x0f_root_error_idx2\xb2\x07\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x12`\n\x0fReattachExecute\x12%.spark.connect.ReattachExecuteRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12_\n\x0eReleaseExecute\x12$.spark.connect.ReleaseExecuteRequest\x1a%.spark.connect.ReleaseExecuteResponse"\x00\x12_\n\x0eReleaseSession\x12$.spark.connect.ReleaseSessionRequest\x1a%.spark.connect.ReleaseSessionResponse"\x00\x12h\n\x11\x46\x65tchErrorDetails\x12\'.spark.connect.FetchErrorDetailsRequest\x1a(.spark.connect.FetchErrorDetailsResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' + b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf8\x13\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x11 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xce\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xa3\x05\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x01R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1a\xa5\x01\n\rRequestOption\x12K\n\x10reattach_options\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ReattachOptionsH\x00R\x0freattachOptions\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB)\n\'_client_observed_server_side_session_idB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\xdc\x14\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12\x1f\n\x0bresponse_id\x18\r \x01(\tR\nresponseId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\x87\x01\n&streaming_query_listener_events_result\x18\x10 \x01(\x0b\x32\x31.spark.connect.StreamingQueryListenerEventsResultH\x00R"streamingQueryListenerEventsResult\x12\\\n\x0fresult_complete\x18\x0e \x01(\x0b\x32\x31.spark.connect.ExecutePlanResponse.ResultCompleteH\x00R\x0eresultComplete\x12\x65\n\x12\x65xecution_progress\x18\x11 \x01(\x0b\x32\x34.spark.connect.ExecutePlanResponse.ExecutionProgressH\x00R\x11\x65xecutionProgress\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1av\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x12&\n\x0cstart_offset\x18\x03 \x01(\x03H\x00R\x0bstartOffset\x88\x01\x01\x42\x0f\n\r_start_offset\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1at\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x12\n\x04keys\x18\x03 \x03(\tR\x04keys\x1a\x10\n\x0eResultComplete\x1a\xcd\x02\n\x11\x45xecutionProgress\x12V\n\x06stages\x18\x01 \x03(\x0b\x32>.spark.connect.ExecutePlanResponse.ExecutionProgress.StageInfoR\x06stages\x12,\n\x12num_inflight_tasks\x18\x02 \x01(\x03R\x10numInflightTasks\x1a\xb1\x01\n\tStageInfo\x12\x19\n\x08stage_id\x18\x01 \x01(\x03R\x07stageId\x12\x1b\n\tnum_tasks\x18\x02 \x01(\x03R\x08numTasks\x12.\n\x13num_completed_tasks\x18\x03 \x01(\x03R\x11numCompletedTasks\x12(\n\x10input_bytes_read\x18\x04 \x01(\x03R\x0einputBytesRead\x12\x12\n\x04\x64one\x18\x05 \x01(\x08R\x04\x64oneB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x87\t\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xaf\x01\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x04 \x01(\tR\x13serverSideSessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xea\x07\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x02R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x02\n\x14\x41\x64\x64\x41rtifactsResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc6\x02\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xe0\x02\n\x18\x41rtifactStatusesResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists"\xdb\x04\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"\x80\x01\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x1f\n\x1bINTERRUPT_TYPE_OPERATION_ID\x10\x03\x42\x0b\n\tinterruptB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x01\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds"5\n\x0fReattachOptions\x12"\n\x0creattachable\x18\x01 \x01(\x08R\x0creattachable"\x96\x03\n\x16ReattachExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x06 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12-\n\x10last_response_id\x18\x05 \x01(\tH\x02R\x0elastResponseId\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_typeB\x13\n\x11_last_response_id"\xc9\x04\n\x15ReleaseExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12R\n\x0brelease_all\x18\x05 \x01(\x0b\x32/.spark.connect.ReleaseExecuteRequest.ReleaseAllH\x00R\nreleaseAll\x12X\n\rrelease_until\x18\x06 \x01(\x0b\x32\x31.spark.connect.ReleaseExecuteRequest.ReleaseUntilH\x00R\x0creleaseUntil\x1a\x0c\n\nReleaseAll\x1a/\n\x0cReleaseUntil\x12\x1f\n\x0bresponse_id\x18\x01 \x01(\tR\nresponseIdB\t\n\x07releaseB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xa5\x01\n\x16ReleaseExecuteResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12&\n\x0coperation_id\x18\x02 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x42\x0f\n\r_operation_id"\xab\x01\n\x15ReleaseSessionRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"l\n\x16ReleaseSessionResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x02 \x01(\tR\x13serverSideSessionId"\xcc\x02\n\x18\x46\x65tchErrorDetailsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x19\n\x08\x65rror_id\x18\x03 \x01(\tR\x07\x65rrorId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x93\x0c\n\x19\x46\x65tchErrorDetailsResponse\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\x1d\n\nsession_id\x18\x04 \x01(\tR\tsessionId\x12)\n\x0eroot_error_idx\x18\x01 \x01(\x05H\x00R\x0crootErrorIdx\x88\x01\x01\x12\x46\n\x06\x65rrors\x18\x02 \x03(\x0b\x32..spark.connect.FetchErrorDetailsResponse.ErrorR\x06\x65rrors\x1a\xae\x01\n\x11StackTraceElement\x12\'\n\x0f\x64\x65\x63laring_class\x18\x01 \x01(\tR\x0e\x64\x65\x63laringClass\x12\x1f\n\x0bmethod_name\x18\x02 \x01(\tR\nmethodName\x12 \n\tfile_name\x18\x03 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12\x1f\n\x0bline_number\x18\x04 \x01(\x05R\nlineNumberB\x0c\n\n_file_name\x1a\xf0\x02\n\x0cQueryContext\x12\x64\n\x0c\x63ontext_type\x18\n \x01(\x0e\x32\x41.spark.connect.FetchErrorDetailsResponse.QueryContext.ContextTypeR\x0b\x63ontextType\x12\x1f\n\x0bobject_type\x18\x01 \x01(\tR\nobjectType\x12\x1f\n\x0bobject_name\x18\x02 \x01(\tR\nobjectName\x12\x1f\n\x0bstart_index\x18\x03 \x01(\x05R\nstartIndex\x12\x1d\n\nstop_index\x18\x04 \x01(\x05R\tstopIndex\x12\x1a\n\x08\x66ragment\x18\x05 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x06 \x01(\tR\x08\x63\x61llSite\x12\x18\n\x07summary\x18\x07 \x01(\tR\x07summary"%\n\x0b\x43ontextType\x12\x07\n\x03SQL\x10\x00\x12\r\n\tDATAFRAME\x10\x01\x1a\x99\x03\n\x0eSparkThrowable\x12$\n\x0b\x65rror_class\x18\x01 \x01(\tH\x00R\nerrorClass\x88\x01\x01\x12}\n\x12message_parameters\x18\x02 \x03(\x0b\x32N.spark.connect.FetchErrorDetailsResponse.SparkThrowable.MessageParametersEntryR\x11messageParameters\x12\\\n\x0equery_contexts\x18\x03 \x03(\x0b\x32\x35.spark.connect.FetchErrorDetailsResponse.QueryContextR\rqueryContexts\x12 \n\tsql_state\x18\x04 \x01(\tH\x01R\x08sqlState\x88\x01\x01\x1a\x44\n\x16MessageParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0e\n\x0c_error_classB\x0c\n\n_sql_state\x1a\xdb\x02\n\x05\x45rror\x12\x30\n\x14\x65rror_type_hierarchy\x18\x01 \x03(\tR\x12\x65rrorTypeHierarchy\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12[\n\x0bstack_trace\x18\x03 \x03(\x0b\x32:.spark.connect.FetchErrorDetailsResponse.StackTraceElementR\nstackTrace\x12 \n\tcause_idx\x18\x04 \x01(\x05H\x00R\x08\x63\x61useIdx\x88\x01\x01\x12\x65\n\x0fspark_throwable\x18\x05 \x01(\x0b\x32\x37.spark.connect.FetchErrorDetailsResponse.SparkThrowableH\x01R\x0esparkThrowable\x88\x01\x01\x42\x0c\n\n_cause_idxB\x12\n\x10_spark_throwableB\x11\n\x0f_root_error_idx2\xb2\x07\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x12`\n\x0fReattachExecute\x12%.spark.connect.ReattachExecuteRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12_\n\x0eReleaseExecute\x12$.spark.connect.ReleaseExecuteRequest\x1a%.spark.connect.ReleaseExecuteResponse"\x00\x12_\n\x0eReleaseSession\x12$.spark.connect.ReleaseSessionRequest\x1a%.spark.connect.ReleaseSessionResponse"\x00\x12h\n\x11\x46\x65tchErrorDetails\x12\'.spark.connect.FetchErrorDetailsRequest\x1a(.spark.connect.FetchErrorDetailsResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3' ) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) @@ -120,7 +120,7 @@ _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_start = 5196 _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_end = 5361 _EXECUTEPLANRESPONSE._serialized_start = 5440 - _EXECUTEPLANRESPONSE._serialized_end = 8024 + _EXECUTEPLANRESPONSE._serialized_end = 8092 _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 6892 _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 6963 _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 6965 @@ -138,89 +138,91 @@ _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_start = 7723 _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_end = 7739 _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_start = 7742 - _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_end = 8007 - _KEYVALUE._serialized_start = 8026 - _KEYVALUE._serialized_end = 8091 - _CONFIGREQUEST._serialized_start = 8094 - _CONFIGREQUEST._serialized_end = 9253 - _CONFIGREQUEST_OPERATION._serialized_start = 8402 - _CONFIGREQUEST_OPERATION._serialized_end = 8900 - _CONFIGREQUEST_SET._serialized_start = 8902 - _CONFIGREQUEST_SET._serialized_end = 8954 - _CONFIGREQUEST_GET._serialized_start = 8956 - _CONFIGREQUEST_GET._serialized_end = 8981 - _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 8983 - _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 9046 - _CONFIGREQUEST_GETOPTION._serialized_start = 9048 - _CONFIGREQUEST_GETOPTION._serialized_end = 9079 - _CONFIGREQUEST_GETALL._serialized_start = 9081 - _CONFIGREQUEST_GETALL._serialized_end = 9129 - _CONFIGREQUEST_UNSET._serialized_start = 9131 - _CONFIGREQUEST_UNSET._serialized_end = 9158 - _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 9160 - _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 9194 - _CONFIGRESPONSE._serialized_start = 9256 - _CONFIGRESPONSE._serialized_end = 9431 - _ADDARTIFACTSREQUEST._serialized_start = 9434 - _ADDARTIFACTSREQUEST._serialized_end = 10436 - _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 9909 - _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 9962 - _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 9964 - _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 10075 - _ADDARTIFACTSREQUEST_BATCH._serialized_start = 10077 - _ADDARTIFACTSREQUEST_BATCH._serialized_end = 10170 - _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 10173 - _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 10366 - _ADDARTIFACTSRESPONSE._serialized_start = 10439 - _ADDARTIFACTSRESPONSE._serialized_end = 10711 - _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 10630 - _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 10711 - _ARTIFACTSTATUSESREQUEST._serialized_start = 10714 - _ARTIFACTSTATUSESREQUEST._serialized_end = 11040 - _ARTIFACTSTATUSESRESPONSE._serialized_start = 11043 - _ARTIFACTSTATUSESRESPONSE._serialized_end = 11395 - _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_start = 11238 - _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_end = 11353 - _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_start = 11355 - _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_end = 11395 - _INTERRUPTREQUEST._serialized_start = 11398 - _INTERRUPTREQUEST._serialized_end = 12001 - _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 11801 - _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 11929 - _INTERRUPTRESPONSE._serialized_start = 12004 - _INTERRUPTRESPONSE._serialized_end = 12148 - _REATTACHOPTIONS._serialized_start = 12150 - _REATTACHOPTIONS._serialized_end = 12203 - _REATTACHEXECUTEREQUEST._serialized_start = 12206 - _REATTACHEXECUTEREQUEST._serialized_end = 12612 - _RELEASEEXECUTEREQUEST._serialized_start = 12615 - _RELEASEEXECUTEREQUEST._serialized_end = 13200 - _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_start = 13069 - _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_end = 13081 - _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_start = 13083 - _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_end = 13130 - _RELEASEEXECUTERESPONSE._serialized_start = 13203 - _RELEASEEXECUTERESPONSE._serialized_end = 13368 - _RELEASESESSIONREQUEST._serialized_start = 13371 - _RELEASESESSIONREQUEST._serialized_end = 13542 - _RELEASESESSIONRESPONSE._serialized_start = 13544 - _RELEASESESSIONRESPONSE._serialized_end = 13652 - _FETCHERRORDETAILSREQUEST._serialized_start = 13655 - _FETCHERRORDETAILSREQUEST._serialized_end = 13987 - _FETCHERRORDETAILSRESPONSE._serialized_start = 13990 - _FETCHERRORDETAILSRESPONSE._serialized_end = 15545 - _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_start = 14219 - _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 14393 - _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_start = 14396 - _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_end = 14764 - _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_start = 14727 - _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_end = 14764 - _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_start = 14767 - _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_end = 15176 - _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_start = 15078 - _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_end = 15146 - _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 15179 - _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 15526 - _SPARKCONNECTSERVICE._serialized_start = 15548 - _SPARKCONNECTSERVICE._serialized_end = 16494 + _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_end = 8075 + _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO._serialized_start = 7898 + _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO._serialized_end = 8075 + _KEYVALUE._serialized_start = 8094 + _KEYVALUE._serialized_end = 8159 + _CONFIGREQUEST._serialized_start = 8162 + _CONFIGREQUEST._serialized_end = 9321 + _CONFIGREQUEST_OPERATION._serialized_start = 8470 + _CONFIGREQUEST_OPERATION._serialized_end = 8968 + _CONFIGREQUEST_SET._serialized_start = 8970 + _CONFIGREQUEST_SET._serialized_end = 9022 + _CONFIGREQUEST_GET._serialized_start = 9024 + _CONFIGREQUEST_GET._serialized_end = 9049 + _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 9051 + _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 9114 + _CONFIGREQUEST_GETOPTION._serialized_start = 9116 + _CONFIGREQUEST_GETOPTION._serialized_end = 9147 + _CONFIGREQUEST_GETALL._serialized_start = 9149 + _CONFIGREQUEST_GETALL._serialized_end = 9197 + _CONFIGREQUEST_UNSET._serialized_start = 9199 + _CONFIGREQUEST_UNSET._serialized_end = 9226 + _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 9228 + _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 9262 + _CONFIGRESPONSE._serialized_start = 9324 + _CONFIGRESPONSE._serialized_end = 9499 + _ADDARTIFACTSREQUEST._serialized_start = 9502 + _ADDARTIFACTSREQUEST._serialized_end = 10504 + _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 9977 + _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 10030 + _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 10032 + _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 10143 + _ADDARTIFACTSREQUEST_BATCH._serialized_start = 10145 + _ADDARTIFACTSREQUEST_BATCH._serialized_end = 10238 + _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 10241 + _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 10434 + _ADDARTIFACTSRESPONSE._serialized_start = 10507 + _ADDARTIFACTSRESPONSE._serialized_end = 10779 + _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 10698 + _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 10779 + _ARTIFACTSTATUSESREQUEST._serialized_start = 10782 + _ARTIFACTSTATUSESREQUEST._serialized_end = 11108 + _ARTIFACTSTATUSESRESPONSE._serialized_start = 11111 + _ARTIFACTSTATUSESRESPONSE._serialized_end = 11463 + _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_start = 11306 + _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_end = 11421 + _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_start = 11423 + _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_end = 11463 + _INTERRUPTREQUEST._serialized_start = 11466 + _INTERRUPTREQUEST._serialized_end = 12069 + _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 11869 + _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 11997 + _INTERRUPTRESPONSE._serialized_start = 12072 + _INTERRUPTRESPONSE._serialized_end = 12216 + _REATTACHOPTIONS._serialized_start = 12218 + _REATTACHOPTIONS._serialized_end = 12271 + _REATTACHEXECUTEREQUEST._serialized_start = 12274 + _REATTACHEXECUTEREQUEST._serialized_end = 12680 + _RELEASEEXECUTEREQUEST._serialized_start = 12683 + _RELEASEEXECUTEREQUEST._serialized_end = 13268 + _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_start = 13137 + _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_end = 13149 + _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_start = 13151 + _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_end = 13198 + _RELEASEEXECUTERESPONSE._serialized_start = 13271 + _RELEASEEXECUTERESPONSE._serialized_end = 13436 + _RELEASESESSIONREQUEST._serialized_start = 13439 + _RELEASESESSIONREQUEST._serialized_end = 13610 + _RELEASESESSIONRESPONSE._serialized_start = 13612 + _RELEASESESSIONRESPONSE._serialized_end = 13720 + _FETCHERRORDETAILSREQUEST._serialized_start = 13723 + _FETCHERRORDETAILSREQUEST._serialized_end = 14055 + _FETCHERRORDETAILSRESPONSE._serialized_start = 14058 + _FETCHERRORDETAILSRESPONSE._serialized_end = 15613 + _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_start = 14287 + _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 14461 + _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_start = 14464 + _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_end = 14832 + _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_start = 14795 + _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_end = 14832 + _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_start = 14835 + _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_end = 15244 + _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_start = 15146 + _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_end = 15214 + _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 15247 + _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 15594 + _SPARKCONNECTSERVICE._serialized_start = 15616 + _SPARKCONNECTSERVICE._serialized_end = 16562 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi index 0e6eca7865888..18f132eba06af 100644 --- a/python/pyspark/sql/connect/proto/base_pb2.pyi +++ b/python/pyspark/sql/connect/proto/base_pb2.pyi @@ -1451,43 +1451,68 @@ class ExecutePlanResponse(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor - NUM_TASKS_FIELD_NUMBER: builtins.int - NUM_COMPLETED_TASKS_FIELD_NUMBER: builtins.int - NUM_STAGES_FIELD_NUMBER: builtins.int - NUM_COMPLETED_STAGES_FIELD_NUMBER: builtins.int - INPUT_BYTES_READ_FIELD_NUMBER: builtins.int + class StageInfo(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + STAGE_ID_FIELD_NUMBER: builtins.int + NUM_TASKS_FIELD_NUMBER: builtins.int + NUM_COMPLETED_TASKS_FIELD_NUMBER: builtins.int + INPUT_BYTES_READ_FIELD_NUMBER: builtins.int + DONE_FIELD_NUMBER: builtins.int + stage_id: builtins.int + num_tasks: builtins.int + num_completed_tasks: builtins.int + input_bytes_read: builtins.int + done: builtins.bool + def __init__( + self, + *, + stage_id: builtins.int = ..., + num_tasks: builtins.int = ..., + num_completed_tasks: builtins.int = ..., + input_bytes_read: builtins.int = ..., + done: builtins.bool = ..., + ) -> None: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "done", + b"done", + "input_bytes_read", + b"input_bytes_read", + "num_completed_tasks", + b"num_completed_tasks", + "num_tasks", + b"num_tasks", + "stage_id", + b"stage_id", + ], + ) -> None: ... + + STAGES_FIELD_NUMBER: builtins.int NUM_INFLIGHT_TASKS_FIELD_NUMBER: builtins.int - num_tasks: builtins.int - num_completed_tasks: builtins.int - num_stages: builtins.int - num_completed_stages: builtins.int - input_bytes_read: builtins.int + @property + def stages( + self, + ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ + global___ExecutePlanResponse.ExecutionProgress.StageInfo + ]: + """Captures the progress of each individual stage.""" num_inflight_tasks: builtins.int + """Captures the currently in progress tasks.""" def __init__( self, *, - num_tasks: builtins.int = ..., - num_completed_tasks: builtins.int = ..., - num_stages: builtins.int = ..., - num_completed_stages: builtins.int = ..., - input_bytes_read: builtins.int = ..., + stages: collections.abc.Iterable[ + global___ExecutePlanResponse.ExecutionProgress.StageInfo + ] + | None = ..., num_inflight_tasks: builtins.int = ..., ) -> None: ... def ClearField( self, field_name: typing_extensions.Literal[ - "input_bytes_read", - b"input_bytes_read", - "num_completed_stages", - b"num_completed_stages", - "num_completed_tasks", - b"num_completed_tasks", - "num_inflight_tasks", - b"num_inflight_tasks", - "num_stages", - b"num_stages", - "num_tasks", - b"num_tasks", + "num_inflight_tasks", b"num_inflight_tasks", "stages", b"stages" ], ) -> None: ... diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index 9e1080ce4befe..0c7c6cd9eb797 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -17,11 +17,14 @@ """Implementation of a progress bar that is displayed while a query is running.""" import abc +from dataclasses import dataclass import time import sys import typing from typing import Iterable, Any +from pyspark.sql.connect.proto import ExecutePlanResponse + try: from IPython.utils.terminal import get_terminal_size except ImportError: @@ -33,19 +36,41 @@ def get_terminal_size(defaultx: Any = None, defaulty: Any = None) -> Any: from pyspark.sql.connect.shell import progress_bar_enabled +@dataclass +class StageInfo: + stage_id: int + num_tasks: int + num_completed_tasks: int + num_bytes_read: int + done: bool + + class ProgressHandler(abc.ABC): @abc.abstractmethod def __call__( self, - total_tasks: int, - tasks_completed: int, - bytes_read: int, + stages: Iterable[StageInfo], inflight_tasks: int, done: bool, ) -> None: pass +def from_proto(proto: ExecutePlanResponse) -> typing.Tuple[Iterable[StageInfo], int]: + result = [] + for stage in proto.stages: + result.append( + StageInfo( + stage_id=stage.stage_id, + num_tasks=stage.num_tasks, + num_completed_tasks=stage.num_completed_tasks, + num_bytes_read=stage.input_bytes_read, + done=stage.done, + ) + ) + return (result, proto.num_inflight_tasks) + + class Progress: """This is a small helper class to visualize a textual progress bar. he interface is very simple and assumes that nothing else prints to the @@ -98,7 +123,7 @@ def _notify(self, done: bool = False) -> None: done=done, ) - def update_ticks(self, ticks: int, current: int, bytes_read: int, inflight_tasks: int) -> None: + def update_ticks(self, stages: Iterable[StageInfo], inflight_tasks: int) -> None: """This method is called from the execution to update the progress bar with a new total tick counter and the current position. This is necessary in case new stages get added with new tasks and so the total task number will be updated as well. @@ -111,10 +136,12 @@ def update_ticks(self, ticks: int, current: int, bytes_read: int, inflight_tasks inflight_tasks int The number of tasks that are currently running """ - if ticks > 0 and current != self._tick: - self._ticks = ticks - self._tick = current - self._bytes_read = bytes_read + total_tasks = sum(map(lambda x: x.num_tasks, stages)) + completed_tasks = sum(map(lambda x: x.num_completed_tasks, stages)) + if total_tasks > 0 and completed_tasks != self._tick: + self._ticks = total_tasks + self._tick = completed_tasks + self._bytes_read = sum(map(lambda x: x.num_bytes_read, stages)) if self._tick > 0: self.output() self._running = inflight_tasks diff --git a/python/pyspark/sql/tests/connect/shell/test_progress.py b/python/pyspark/sql/tests/connect/shell/test_progress.py index 8caadb70d79de..e96daf8d0cabe 100644 --- a/python/pyspark/sql/tests/connect/shell/test_progress.py +++ b/python/pyspark/sql/tests/connect/shell/test_progress.py @@ -23,15 +23,16 @@ connect_requirement_message, ) from pyspark.testing.utils import PySparkErrorTestUtils -from pyspark.sql.connect.shell.progress import Progress +from pyspark.sql.connect.shell.progress import Progress, StageInfo @unittest.skipIf(not should_test_connect, connect_requirement_message) class ProgressBarTest(unittest.TestCase, PySparkErrorTestUtils): def test_simple_progress(self): + stages = [StageInfo(0, 100, 50, 999, False)] buffer = StringIO() p = Progress(output=buffer, enabled=True) - p.update_ticks(100, 50, 999, 10) + p.update_ticks(stages, 10) val = buffer.getvalue() self.assertIn("50.00%", val, "Current progress is 50%") self.assertIn("****", val, "Should use the default char to print.") @@ -42,28 +43,34 @@ def test_simple_progress(self): self.assertTrue(val.endswith("\r"), "Line should be empty") def test_configure_char(self): + stages = [StageInfo(0, 100, 50, 999, False)] buffer = StringIO() p = Progress(char="+", output=buffer, enabled=True) - p.update_ticks(100, 50, 999, 10) + p.update_ticks(stages, 10) val = buffer.getvalue() self.assertIn("++++++", val, "Updating the char works.") def test_disabled_does_not_print(self): + stages = [StageInfo(0, 100, 50, 999, False)] buffer = StringIO() p = Progress(char="+", output=buffer, enabled=False) - p.update_ticks(100, 50, 999, 10) - p.update_ticks(100, 51, 999, 10) + p.update_ticks(stages, 10) + stages = [StageInfo(0, 100, 51, 999, False)] + p.update_ticks(stages, 10) val = buffer.getvalue() self.assertEqual(0, len(val), "If the printing is disabled, don't print.") def test_finish_progress(self): + stages = [StageInfo(0, 100, 50, 999, False)] buffer = StringIO() p = Progress(char="+", output=buffer, enabled=True) - p.update_ticks(100, 50, 999, 10) + p.update_ticks(stages, 10) p.finish() self.assertTrue(buffer.getvalue().endswith("\r"), "Last line should be empty") def test_progress_handler(self): + stages = [StageInfo(0, 0, 0, 0, False)] + handler_called = 0 done = False @@ -78,8 +85,9 @@ def handler(**kwargs): buffer = StringIO() p = Progress(char="+", output=buffer, enabled=True, handlers=[handler]) - p.update_ticks(100, 0, 0, 1) - p.update_ticks(100, 50, 999, 10) + p.update_ticks(stages, 1) + stages = [StageInfo(0, 100, 50, 999, False)] + p.update_ticks(stages, 10) self.assertIn("++++++", buffer.getvalue(), "Updating the char works.") self.assertEqual(1, handler_called, "Handler should be called.") self.assertFalse(done, "Before finish, done should be False") From 71033d0c68c1ea6ee80e12f4a5142906b6b14c77 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Thu, 28 Mar 2024 13:53:42 +0100 Subject: [PATCH 17/27] fix --- .../scala/org/apache/spark/sql/SparkSessionE2ESuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala index ec6e87ec0f2b8..b967245d90c26 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala @@ -235,8 +235,8 @@ class SparkSessionE2ESuite extends RemoteSparkSession { .repartition(1000) .collectResult() assert(result.length == 10000) - assert(result.progress.totalTasks > 100) - assert(result.progress.completedTasks > 100) + assert(result.progress.stages.map(_.numTasks).sum > 100) + assert(result.progress.stages.map(_.completedTasks).sum > 100) } test("interrupt operation") { From 5687f6ce8e718987657b36de8a45a2c004f76ec2 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Fri, 29 Mar 2024 07:58:19 +0100 Subject: [PATCH 18/27] fix --- python/pyspark/sql/connect/shell/progress.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index 0c7c6cd9eb797..c57404fbe57ae 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -56,7 +56,9 @@ def __call__( pass -def from_proto(proto: ExecutePlanResponse) -> typing.Tuple[Iterable[StageInfo], int]: +def from_proto( + proto: ExecutePlanResponse.ExecutionProgress, +) -> typing.Tuple[Iterable[StageInfo], int]: result = [] for stage in proto.stages: result.append( From 2d75941a45090ebe708caf99a2af1912c82de847 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Sat, 30 Mar 2024 10:30:44 +0100 Subject: [PATCH 19/27] fix tests and lint --- python/pyspark/sql/connect/session.py | 19 ++++--------------- python/pyspark/sql/connect/shell/progress.py | 8 ++++---- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py index fa29b6afe4f1d..428624b78169e 100644 --- a/python/pyspark/sql/connect/session.py +++ b/python/pyspark/sql/connect/session.py @@ -96,6 +96,7 @@ from pyspark.sql.connect.catalog import Catalog from pyspark.sql.connect.udf import UDFRegistration from pyspark.sql.connect.udtf import UDTFRegistration + from pyspark.sql.connect.shell.progress import ProgressHandler try: @@ -319,30 +320,17 @@ def readStream(self) -> "DataStreamReader": readStream.__doc__ = PySparkSession.readStream.__doc__ - def registerProgressHandler(self, handler: Callable) -> None: + def registerProgressHandler(self, handler: "ProgressHandler") -> None: """ Register a progress handler to be called when a progress update is received from the server. .. versionadded:: 4.0 - - Examples - -------- - >>> import os - >>> handler = lambda **kwargs: os.write(str(kwargs)) - >>> spark.registerProgressHandler(handler) """ self._client.register_progress_handler(handler) - def removeProgressHandler(self, handler: Callable) -> None: + def removeProgressHandler(self, handler: "ProgressHandler") -> None: """ Remove a progress handler that was previously registered. .. versionadded:: 4.0 - - Examples - -------- - >>> import os - >>> handler = lambda **kwargs: os.write(str(kwargs)) - >>> spark.registerProgressHandler(handler) - >>> spark.removeProgressHandler(handler) """ self._client.remove_progress_handler(handler) @@ -990,6 +978,7 @@ def profile(self) -> Profile: def _test() -> None: import sys + import os import doctest from pyspark.sql import SparkSession as PySparkSession import pyspark.sql.connect.session diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index c57404fbe57ae..6350c5a994173 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -49,7 +49,7 @@ class ProgressHandler(abc.ABC): @abc.abstractmethod def __call__( self, - stages: Iterable[StageInfo], + stages: typing.Optional[Iterable[StageInfo]], inflight_tasks: int, done: bool, ) -> None: @@ -114,13 +114,12 @@ def __init__( self._out = output self._running = 0 self._handlers = handlers + self._stages = [] def _notify(self, done: bool = False) -> None: for handler in self._handlers: handler( - total_tasks=self._ticks, - tasks_completed=self._tick, - bytes_read=self._bytes_read, + stages=self._stages, inflight_tasks=self._running, done=done, ) @@ -147,6 +146,7 @@ def update_ticks(self, stages: Iterable[StageInfo], inflight_tasks: int) -> None if self._tick > 0: self.output() self._running = inflight_tasks + self._stages = stages self._notify(False) def finish(self) -> None: From 453bda97953adc47c845fd490ab55b4f4cf15612 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Sat, 30 Mar 2024 17:41:40 +0100 Subject: [PATCH 20/27] lint --- python/pyspark/sql/connect/session.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py index 1e80334aa2a6e..9030ca3cd8676 100644 --- a/python/pyspark/sql/connect/session.py +++ b/python/pyspark/sql/connect/session.py @@ -37,7 +37,6 @@ Iterable, TYPE_CHECKING, ClassVar, - Callable, ) import numpy as np @@ -1031,7 +1030,7 @@ def profile(self) -> Profile: def _test() -> None: import sys - import os + import os # noqa: F401 import doctest from pyspark.sql import SparkSession as PySparkSession import pyspark.sql.connect.session From cc864c911a8046fb0662877a2c39cd005fa95c9c Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Sat, 30 Mar 2024 22:36:42 +0100 Subject: [PATCH 21/27] lint --- python/pyspark/sql/connect/shell/progress.py | 2 +- .../sql/tests/connect/shell/test_progress.py | 21 ++++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index 6350c5a994173..809c6eec0053b 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -114,7 +114,7 @@ def __init__( self._out = output self._running = 0 self._handlers = handlers - self._stages = [] + self._stages: Iterable[StageInfo] = [] def _notify(self, done: bool = False) -> None: for handler in self._handlers: diff --git a/python/pyspark/sql/tests/connect/shell/test_progress.py b/python/pyspark/sql/tests/connect/shell/test_progress.py index e96daf8d0cabe..c8cfa72a8fbb3 100644 --- a/python/pyspark/sql/tests/connect/shell/test_progress.py +++ b/python/pyspark/sql/tests/connect/shell/test_progress.py @@ -17,6 +17,7 @@ from io import StringIO import unittest +from typing import Iterable from pyspark.testing.connectutils import ( should_test_connect, @@ -72,16 +73,16 @@ def test_progress_handler(self): stages = [StageInfo(0, 0, 0, 0, False)] handler_called = 0 - done = False + done_called = False - def handler(**kwargs): - nonlocal handler_called, done + def handler(stages: Iterable[StageInfo], inflight_tasks: int, done: bool): + nonlocal handler_called, done_called handler_called = 1 - self.assertEqual(100, kwargs["total_tasks"]) - self.assertEqual(50, kwargs["tasks_completed"]) - self.assertEqual(999, kwargs["bytes_read"]) - self.assertEqual(10, kwargs["inflight_tasks"]) - done = kwargs["done"] + self.assertEqual(100, sum(map(lambda x: x.num_tasks, stages))) + self.assertEqual(50, sum(map(lambda x: x.num_completed_tasks, stages))) + self.assertEqual(999, sum(map(lambda x: x.num_bytes_read, stages))) + self.assertEqual(10, inflight_tasks) + done_called = done buffer = StringIO() p = Progress(char="+", output=buffer, enabled=True, handlers=[handler]) @@ -90,10 +91,10 @@ def handler(**kwargs): p.update_ticks(stages, 10) self.assertIn("++++++", buffer.getvalue(), "Updating the char works.") self.assertEqual(1, handler_called, "Handler should be called.") - self.assertFalse(done, "Before finish, done should be False") + self.assertFalse(done_called, "Before finish, done should be False") p.finish() self.assertTrue(buffer.getvalue().endswith("\r"), "Last line should be empty") - self.assertTrue(done, "After finish, done should be True") + self.assertTrue(done_called, "After finish, done should be True") if __name__ == "__main__": From ad4791e00b076f9ab32a1874c329b48cf1a222df Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Mon, 1 Apr 2024 20:47:08 +0200 Subject: [PATCH 22/27] review comments --- .../spark/sql/connect/config/Connect.scala | 5 +- .../ConnectProgressExecutionListener.scala | 2 +- .../execution/ExecuteGrpcResponseSender.scala | 49 +++++++++++-------- .../org/apache/spark/deploy/SparkSubmit.scala | 2 +- .../reference/pyspark.sql/spark_session.rst | 13 +++-- python/pyspark/shell.py | 1 + python/pyspark/sql/connect/client/core.py | 8 ++- python/pyspark/sql/connect/session.py | 27 +++++++++- python/pyspark/sql/connect/shell/progress.py | 27 +++++----- .../sql/tests/connect/shell/test_progress.py | 4 +- 10 files changed, 93 insertions(+), 45 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala index 74c8458019fce..6ba100af1bb9a 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala @@ -267,8 +267,9 @@ object Connect { .createWithDefault(1024) val CONNECT_PROGRESS_REPORT_INTERVAL = - buildStaticConf("spark.connect.progress.reportInterval") - .doc("The interval at which the progress of a query is reported to the client.") + buildConf("spark.connect.progress.reportInterval") + .doc("The interval at which the progress of a query is reported to the client." + + " If the value is set to a negative value the progress reports will be disabled.") .version("4.0.0") .timeConf(TimeUnit.MILLISECONDS) .createWithDefaultString("2s") diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala index f48553a3f3d01..954956363505c 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala @@ -36,7 +36,7 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi * A tracker for a given tag. This is used to track the progress of an operation is being * executed through the connect API. */ - class ExecutionTracker(var tag: String) { + class ExecutionTracker(val tag: String) { class StageInfo( val stageId: Int, diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala index d53e854219950..a9444862b3aa6 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala @@ -140,25 +140,27 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( * client, but rather enqueued to in the response observer. */ private def enqueueProgressMessage(): Unit = { - SparkConnectService.executionListener.foreach { listener => - // It is possible, that the tracker is no longer available and in this - // case we simply ignore it and do not send any progress message. This avoids - // having to synchronize on the listener. - listener.tryGetTracker(executeHolder.jobTag).foreach { tracker => - // Only send progress message if there is something new to report. - tracker.yieldWhenDirty { (stages, inflightTasks) => - val response = ExecutePlanResponse - .newBuilder() - .setExecutionProgress( - ExecutePlanResponse.ExecutionProgress - .newBuilder() - .addAllStages(stages.map(_.toProto()).asJava) - .setNumInflightTasks(inflightTasks)) - .build() - // There is a special case when the response observer has alreaady determined - // that the final message is send (and the stream will be closed) but we might want - // to send the progress message. In this case we ignore the result of the `onNext` call. - executeHolder.responseObserver.tryOnNext(response) + if (executeHolder.sessionHolder.session.conf.get(CONNECT_PROGRESS_REPORT_INTERVAL) > 0) { + SparkConnectService.executionListener.foreach { listener => + // It is possible, that the tracker is no longer available and in this + // case we simply ignore it and do not send any progress message. This avoids + // having to synchronize on the listener. + listener.tryGetTracker(executeHolder.jobTag).foreach { tracker => + // Only send progress message if there is something new to report. + tracker.yieldWhenDirty { (stages, inflightTasks) => + val response = ExecutePlanResponse + .newBuilder() + .setExecutionProgress( + ExecutePlanResponse.ExecutionProgress + .newBuilder() + .addAllStages(stages.map(_.toProto()).asJava) + .setNumInflightTasks(inflightTasks)) + .build() + // There is a special case when the response observer has alreaady determined + // that the final message is send (and the stream will be closed) but we might want + // to send the progress message. In this case we ignore the result of the `onNext` call. + executeHolder.responseObserver.tryOnNext(response) + } } } } @@ -236,7 +238,14 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message]( // monitor, and will notify upon state change. if (response.isEmpty) { // Wake up more frequently to send the progress updates. - val timeout = SparkEnv.get.conf.get(CONNECT_PROGRESS_REPORT_INTERVAL) + val progressTimeout = + executeHolder.sessionHolder.session.conf.get(CONNECT_PROGRESS_REPORT_INTERVAL) + // If the progress feature is disabled, wait for the deadline. + val timeout = if (progressTimeout > 0) { + progressTimeout + } else { + Math.max(1, deadlineTimeMillis - System.currentTimeMillis()) + } logTrace(s"Wait for response to become available with timeout=$timeout ms.") executionObserver.responseLock.wait(timeout) enqueueProgressMessage() diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 78ee9a19b766f..9d51b9a09d50f 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -747,7 +747,7 @@ private[spark] class SparkSubmit extends Logging { // when Spark Connect is in local mode, because Spark Connect support its own progress // reporting. if (isShell(args.primaryResource) && !sparkConf.contains(UI_SHOW_CONSOLE_PROGRESS) && - !sparkConf.contains("spark.local.connect")) { + !sparkConf.contains("spark.local.connect")) { sparkConf.set(UI_SHOW_CONSOLE_PROGRESS, true) } diff --git a/python/docs/source/reference/pyspark.sql/spark_session.rst b/python/docs/source/reference/pyspark.sql/spark_session.rst index ea71249e292e9..4e679da59c163 100644 --- a/python/docs/source/reference/pyspark.sql/spark_session.rst +++ b/python/docs/source/reference/pyspark.sql/spark_session.rst @@ -78,12 +78,15 @@ Spark Connect Only SparkSession.addArtifact SparkSession.addArtifacts - SparkSession.copyFromLocalToFs + SparkSession.addTag + SparkSession.clearProgressHandlers + SparkSession.clearTags SparkSession.client + SparkSession.copyFromLocalToFs + SparkSession.getTags SparkSession.interruptAll - SparkSession.interruptTag SparkSession.interruptOperation - SparkSession.addTag + SparkSession.interruptTag + SparkSession.registerProgressHandler + SparkSession.removeProgressHandler SparkSession.removeTag - SparkSession.getTags - SparkSession.clearTags diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index 90fbc8433268b..5bc5ac3b57fac 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -140,3 +140,4 @@ with open(_pythonstartup) as f: code = compile(f.read(), _pythonstartup, "exec") exec(code) + diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py index 53970ce492479..725a03a533465 100644 --- a/python/pyspark/sql/connect/client/core.py +++ b/python/pyspark/sql/connect/client/core.py @@ -700,9 +700,11 @@ class ClientThreadLocals(threading.local): def register_progress_handler(self, handler: ProgressHandler) -> None: """ Register a progress handler to be called when a progress message is received. + Parameters ---------- - handler - The callable that will be called with the progress information. + handler : ProgressHandler + The callable that will be called with the progress information. """ if handler in self._progress_handlers: @@ -715,9 +717,11 @@ def clear_progress_handlers(self) -> None: def remove_progress_handler(self, handler: ProgressHandler) -> None: """ Remove a progress handler from the list of registered handlers. + Parameters ---------- - handler - The callable to remove from the list of progress handlers. + handler : ProgressHandler + The callable to remove from the list of progress handlers. """ self._progress_handlers.remove(handler) diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py index 9030ca3cd8676..2939603dc5e53 100644 --- a/python/pyspark/sql/connect/session.py +++ b/python/pyspark/sql/connect/session.py @@ -330,22 +330,46 @@ def readStream(self) -> "DataStreamReader": def registerProgressHandler(self, handler: "ProgressHandler") -> None: """ Register a progress handler to be called when a progress update is received from the server. + .. versionadded:: 4.0 + + Parameters + ---------- + handler : ProgressHandler + A callable that follows the ProgressHandler interface. This handler will be called + on every progress update. + + Examples + -------- + + >>> def progress_handler(stages, inflight_tasks, done): + ... print(f"{len(stages)} Stages known, Done: {done}") + >>> spark.registerProgressHandler(progress_handler) + >>> res = spark.range(10).repartition(1).collect() + 3 Stages known, Done: False + 3 Stages known, Done: True + >>> spark.clearProgressHandlers() """ self._client.register_progress_handler(handler) def removeProgressHandler(self, handler: "ProgressHandler") -> None: """ Remove a progress handler that was previously registered. + .. versionadded:: 4.0 + + Parameters + ---------- + handler : ProgressHandler + The handler to remove if present in the list of progress handlers. """ self._client.remove_progress_handler(handler) def clearProgressHandlers(self) -> None: """ Clear all registered progress handlers. - .. versionadded:: 4.0 + .. versionadded:: 4.0 """ self._client.clear_progress_handlers() @@ -1030,7 +1054,6 @@ def profile(self) -> Profile: def _test() -> None: import sys - import os # noqa: F401 import doctest from pyspark.sql import SparkSession as PySparkSession import pyspark.sql.connect.session diff --git a/python/pyspark/sql/connect/shell/progress.py b/python/pyspark/sql/connect/shell/progress.py index 809c6eec0053b..8a8064c29cdc6 100644 --- a/python/pyspark/sql/connect/shell/progress.py +++ b/python/pyspark/sql/connect/shell/progress.py @@ -93,13 +93,19 @@ def __init__( Constructs a new Progress bar. The progress bar is typically used in the blocking query execution path to process the execution progress methods from the server. + Parameters ---------- - char str the Default character to be used for printing the bar. - min_width numeric The minimum width of the progress bar - output file The output device to write the progress bar to. - enabled bool Whether the progress bar printing should be enabled or not. - handlers list A list of handlers that will be called when the progress bar is updated. + char : str + The Default character to be used for printing the bar. + min_width : numeric + The minimum width of the progress bar + output : file + The output device to write the progress bar to. + enabled : bool + Whether the progress bar printing should be enabled or not. + handlers : list of ProgressHandler + A list of handlers that will be called when the progress bar is updated. """ self._ticks = 0 self._tick = 0 @@ -130,12 +136,11 @@ def update_ticks(self, stages: Iterable[StageInfo], inflight_tasks: int) -> None new tasks and so the total task number will be updated as well. Parameters - ========== - ticks int The total number of ticks to be processed - current int The current tick position - bytes_read int The number of bytes read - inflight_tasks int The number of tasks that are currently running - + ---------- + stages : list + A list of StageInfo objects reporting progress in each stage. + inflight_tasks : int + The number of tasks that are currently running. """ total_tasks = sum(map(lambda x: x.num_tasks, stages)) completed_tasks = sum(map(lambda x: x.num_completed_tasks, stages)) diff --git a/python/pyspark/sql/tests/connect/shell/test_progress.py b/python/pyspark/sql/tests/connect/shell/test_progress.py index c8cfa72a8fbb3..7d99a699eefad 100644 --- a/python/pyspark/sql/tests/connect/shell/test_progress.py +++ b/python/pyspark/sql/tests/connect/shell/test_progress.py @@ -24,7 +24,9 @@ connect_requirement_message, ) from pyspark.testing.utils import PySparkErrorTestUtils -from pyspark.sql.connect.shell.progress import Progress, StageInfo + +if should_test_connect: + from pyspark.sql.connect.shell.progress import Progress, StageInfo @unittest.skipIf(not should_test_connect, connect_requirement_message) From b662410c295e15c54d573364ebfc63999103be7a Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Mon, 1 Apr 2024 22:19:36 +0200 Subject: [PATCH 23/27] lint --- python/pyspark/shell.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index 5bc5ac3b57fac..90fbc8433268b 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -140,4 +140,3 @@ with open(_pythonstartup) as f: code = compile(f.read(), _pythonstartup, "exec") exec(code) - From ac919827b67ffec73a9fff62be2ad4e159f2fd98 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Tue, 2 Apr 2024 11:21:20 +0200 Subject: [PATCH 24/27] doc update --- python/pyspark/sql/connect/session.py | 43 +++---------------- python/pyspark/sql/session.py | 59 +++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 37 deletions(-) diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py index 2939603dc5e53..39f6fa092a35f 100644 --- a/python/pyspark/sql/connect/session.py +++ b/python/pyspark/sql/connect/session.py @@ -328,51 +328,20 @@ def readStream(self) -> "DataStreamReader": readStream.__doc__ = PySparkSession.readStream.__doc__ def registerProgressHandler(self, handler: "ProgressHandler") -> None: - """ - Register a progress handler to be called when a progress update is received from the server. - - .. versionadded:: 4.0 - - Parameters - ---------- - handler : ProgressHandler - A callable that follows the ProgressHandler interface. This handler will be called - on every progress update. - - Examples - -------- - - >>> def progress_handler(stages, inflight_tasks, done): - ... print(f"{len(stages)} Stages known, Done: {done}") - >>> spark.registerProgressHandler(progress_handler) - >>> res = spark.range(10).repartition(1).collect() - 3 Stages known, Done: False - 3 Stages known, Done: True - >>> spark.clearProgressHandlers() - """ self._client.register_progress_handler(handler) - def removeProgressHandler(self, handler: "ProgressHandler") -> None: - """ - Remove a progress handler that was previously registered. - - .. versionadded:: 4.0 + registerProgressHandler.__doc__ = PySparkSession.registerProgressHandler.__doc__ - Parameters - ---------- - handler : ProgressHandler - The handler to remove if present in the list of progress handlers. - """ + def removeProgressHandler(self, handler: "ProgressHandler") -> None: self._client.remove_progress_handler(handler) - def clearProgressHandlers(self) -> None: - """ - Clear all registered progress handlers. + removeProgressHandler.__doc__ = PySparkSession.removeProgressHandler.__doc__ - .. versionadded:: 4.0 - """ + def clearProgressHandlers(self) -> None: self._client.clear_progress_handlers() + clearProgressHandlers.__doc__ = PySparkSession.clearProgressHandlers.__doc__ + def _inferSchemaFromList( self, data: Iterable[Any], names: Optional[List[str]] = None ) -> StructType: diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 4a8a653fd4669..de979789a8b2e 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -80,6 +80,7 @@ # Running MyPy type checks will always require pandas and # other dependencies so importing here is fine. from pyspark.sql.connect.client import SparkConnectClient + from pyspark.sql.connect.shell.progress import ProgressHandler try: import memory_profiler # noqa: F401 @@ -1967,6 +1968,8 @@ def client(self) -> "SparkConnectClient": message_parameters={"feature": "SparkSession.client"}, ) + def + def addArtifacts( self, *path: str, pyfile: bool = False, archive: bool = False, file: bool = False ) -> None: @@ -2002,6 +2005,62 @@ def addArtifacts( addArtifact = addArtifacts + def registerProgressHandler(self, handler: "ProgressHandler") -> None: + """ + Register a progress handler to be called when a progress update is received from the server. + + .. versionadded:: 4.0 + + Parameters + ---------- + handler : ProgressHandler + A callable that follows the ProgressHandler interface. This handler will be called + on every progress update. + + Examples + -------- + + >>> def progress_handler(stages, inflight_tasks, done): + ... print(f"{len(stages)} Stages known, Done: {done}") + >>> spark.registerProgressHandler(progress_handler) + >>> res = spark.range(10).repartition(1).collect() + 3 Stages known, Done: False + 3 Stages known, Done: True + >>> spark.clearProgressHandlers() + """ + raise PySparkRuntimeError( + error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT", + message_parameters={"feature": "SparkSession.registerProgressHandler"}, + ) + + def removeProgressHandler(self, handler: "ProgressHandler") -> None: + """ + Remove a progress handler that was previously registered. + + .. versionadded:: 4.0 + + Parameters + ---------- + handler : ProgressHandler + The handler to remove if present in the list of progress handlers. + """ + raise PySparkRuntimeError( + error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT", + message_parameters={"feature": "SparkSession.removeProgressHandler"}, + ) + + def clearProgressHandlers(self) -> None: + """ + Clear all registered progress handlers. + + .. versionadded:: 4.0 + """ + raise PySparkRuntimeError( + error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT", + message_parameters={"feature": "SparkSession.clearProgressHandlers"}, + ) + + def copyFromLocalToFs(self, local_path: str, dest_path: str) -> None: """ Copy file from local to cloud storage file system. From deffbbcc0733b8d8d57ae6166e201b78bd7906b4 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Tue, 2 Apr 2024 11:58:22 +0200 Subject: [PATCH 25/27] doc update --- python/pyspark/sql/session.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index de979789a8b2e..718c5c14815d6 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -1968,8 +1968,6 @@ def client(self) -> "SparkConnectClient": message_parameters={"feature": "SparkSession.client"}, ) - def - def addArtifacts( self, *path: str, pyfile: bool = False, archive: bool = False, file: bool = False ) -> None: @@ -2060,7 +2058,6 @@ def clearProgressHandlers(self) -> None: message_parameters={"feature": "SparkSession.clearProgressHandlers"}, ) - def copyFromLocalToFs(self, local_path: str, dest_path: str) -> None: """ Copy file from local to cloud storage file system. From 415bdd8a3bfe3ca8dc4dea68093e1600115c9fea Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Tue, 2 Apr 2024 13:59:32 +0200 Subject: [PATCH 26/27] doc update --- python/pyspark/sql/session.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 718c5c14815d6..704dcf8f7347e 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -2223,6 +2223,12 @@ def _test() -> None: os.chdir(os.environ["SPARK_HOME"]) + # Disable Doc Tests for Spark Connect only functions: + pyspark.sql.session.SparkSession.registerProgressHandler.__doc__ = None + pyspark.sql.session.SparkSession.removeProgressHandler.__doc__ = None + pyspark.sql.session.SparkSession.clearProgressHandlers.__doc__ = None + + globs = pyspark.sql.session.__dict__.copy() globs["spark"] = ( SparkSession.builder.master("local[4]").appName("sql.session tests").getOrCreate() From 6fcc36fb9a150e26a767af95a2540f92fdff0db0 Mon Sep 17 00:00:00 2001 From: Martin Grund Date: Tue, 2 Apr 2024 21:48:51 +0200 Subject: [PATCH 27/27] fix lint --- python/pyspark/sql/session.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 704dcf8f7347e..a85b7b8bc19b0 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -2228,7 +2228,6 @@ def _test() -> None: pyspark.sql.session.SparkSession.removeProgressHandler.__doc__ = None pyspark.sql.session.SparkSession.clearProgressHandlers.__doc__ = None - globs = pyspark.sql.session.__dict__.copy() globs["spark"] = ( SparkSession.builder.master("local[4]").appName("sql.session tests").getOrCreate()