From f0185aa55aebf9e769f629c0dd90b2b786feceb8 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 19 May 2022 10:12:22 +0800 Subject: [PATCH 1/4] fix divide by 0; map key not exist --- core/src/main/resources/error/error-classes.json | 4 ++-- .../main/scala/org/apache/spark/ErrorInfo.scala | 9 ++++++--- .../scala/org/apache/spark/SparkException.scala | 14 ++++++++++---- .../spark/sql/errors/QueryExecutionErrors.scala | 6 +++--- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index f4eadd4a3680f..7e3eb2a857a4a 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -38,7 +38,7 @@ "sqlState" : "22008" }, "DIVIDE_BY_ZERO" : { - "message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to \"false\" (except for ANSI interval type) to bypass this error.
" ], + "message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], "sqlState" : "22012" }, "DUPLICATE_KEY" : { @@ -138,7 +138,7 @@ "sqlState" : "42000" }, "MAP_KEY_DOES_NOT_EXIST" : { - "message" : [ "Key does not exist. To return NULL instead, use `try_element_at`. If necessary set to \"false\" to bypass this error.
" ] + "message" : [ "Key does not exist. To return NULL instead, use `try_element_at`. If necessary set to \"false\" to bypass this error." ] }, "MISSING_COLUMN" : { "message" : [ "Column '' does not exist. Did you mean one of the following? []" ], diff --git a/core/src/main/scala/org/apache/spark/ErrorInfo.scala b/core/src/main/scala/org/apache/spark/ErrorInfo.scala index 0447572bb1c29..e11e6485851c2 100644 --- a/core/src/main/scala/org/apache/spark/ErrorInfo.scala +++ b/core/src/main/scala/org/apache/spark/ErrorInfo.scala @@ -71,7 +71,10 @@ private[spark] object SparkThrowableHelper { mapper.readValue(errorClassesUrl, new TypeReference[SortedMap[String, ErrorInfo]]() {}) } - def getMessage(errorClass: String, messageParameters: Array[String]): String = { + def getMessage( + errorClass: String, + messageParameters: Array[String], + queryContext: String = ""): String = { val errorInfo = errorClassToInfoMap.getOrElse(errorClass, throw new IllegalArgumentException(s"Cannot find error class '$errorClass'")) if (errorInfo.subClass.isDefined) { @@ -82,11 +85,11 @@ private[spark] object SparkThrowableHelper { val subMessageParameters = messageParameters.tail "[" + errorClass + "." + subErrorClass + "] " + String.format((errorInfo.messageFormat + errorSubInfo.messageFormat).replaceAll("<[a-zA-Z0-9_-]+>", "%s"), - subMessageParameters: _*) + subMessageParameters: _*) + queryContext } else { "[" + errorClass + "] " + String.format( errorInfo.messageFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"), - messageParameters: _*) + messageParameters: _*) + queryContext } } diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala index 4feea6151b908..500c88bc0f8c8 100644 --- a/core/src/main/scala/org/apache/spark/SparkException.scala +++ b/core/src/main/scala/org/apache/spark/SparkException.scala @@ -84,8 +84,12 @@ private[spark] class SparkUpgradeException( /** * Arithmetic exception thrown from Spark with an error class. */ -private[spark] class SparkArithmeticException(errorClass: String, messageParameters: Array[String]) - extends ArithmeticException(SparkThrowableHelper.getMessage(errorClass, messageParameters)) +private[spark] class SparkArithmeticException( + errorClass: String, + messageParameters: Array[String], + queryContext: String = "") + extends ArithmeticException( + SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext)) with SparkThrowable { override def getErrorClass: String = errorClass @@ -274,9 +278,11 @@ private[spark] class SparkSQLException( */ private[spark] class SparkNoSuchElementException( errorClass: String, - messageParameters: Array[String]) + messageParameters: Array[String], + queryContext: String) extends NoSuchElementException( - SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable { + SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext)) + with SparkThrowable { override def getErrorClass: String = errorClass } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 6c750ab49cf04..f10cd4be368ef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -180,7 +180,7 @@ object QueryExecutionErrors extends QueryErrorsBase { def divideByZeroError(context: String): ArithmeticException = { new SparkArithmeticException( errorClass = "DIVIDE_BY_ZERO", - messageParameters = Array(toSQLConf(SQLConf.ANSI_ENABLED.key), context)) + messageParameters = Array(toSQLConf(SQLConf.ANSI_ENABLED.key))) } def invalidArrayIndexError(index: Int, numElements: Int): ArrayIndexOutOfBoundsException = { @@ -218,8 +218,8 @@ object QueryExecutionErrors extends QueryErrorsBase { errorClass = "MAP_KEY_DOES_NOT_EXIST", messageParameters = Array( toSQLValue(key, dataType), - toSQLConf(SQLConf.ANSI_ENABLED.key), - context)) + toSQLConf(SQLConf.ANSI_ENABLED.key)), + context) } def invalidFractionOfSecondError(): DateTimeException = { From aef707186f576e3dbe7203a2be815264beeb23b2 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 19 May 2022 10:23:31 +0800 Subject: [PATCH 2/4] fix more --- .../spark/memory/SparkOutOfMemoryError.java | 2 +- .../main/resources/error/error-classes.json | 6 ++--- .../org/apache/spark/SparkException.scala | 20 ++++++++++++----- .../sql/errors/QueryExecutionErrors.scala | 22 ++++++++++--------- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java index 22dfe4d4dbe1e..c5f19a0c2012b 100644 --- a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java +++ b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java @@ -39,7 +39,7 @@ public SparkOutOfMemoryError(OutOfMemoryError e) { } public SparkOutOfMemoryError(String errorClass, String[] messageParameters) { - super(SparkThrowableHelper.getMessage(errorClass, messageParameters)); + super(SparkThrowableHelper.getMessage(errorClass, messageParameters, "")); this.errorClass = errorClass; this.messageParameters = messageParameters; } diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 7e3eb2a857a4a..21fde82adbb3f 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -4,7 +4,7 @@ "sqlState" : "42000" }, "ARITHMETIC_OVERFLOW" : { - "message" : [ ". If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], + "message" : [ ". If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], "sqlState" : "22003" }, "CANNOT_CAST_DATATYPE" : { @@ -12,7 +12,7 @@ "sqlState" : "22005" }, "CANNOT_CHANGE_DECIMAL_PRECISION" : { - "message" : [ " cannot be represented as Decimal(, ). If necessary set to \"false\" to bypass this error.
" ], + "message" : [ " cannot be represented as Decimal(, ). If necessary set to \"false\" to bypass this error." ], "sqlState" : "22005" }, "CANNOT_PARSE_DECIMAL" : { @@ -23,7 +23,7 @@ "message" : [ "Cannot up cast from to .\n
" ] }, "CAST_INVALID_INPUT" : { - "message" : [ "The value of the type cannot be cast to because it is malformed. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error.
" ], + "message" : [ "The value of the type cannot be cast to because it is malformed. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error." ], "sqlState" : "42000" }, "CAST_OVERFLOW" : { diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala index 500c88bc0f8c8..c28624cc7a01a 100644 --- a/core/src/main/scala/org/apache/spark/SparkException.scala +++ b/core/src/main/scala/org/apache/spark/SparkException.scala @@ -136,9 +136,13 @@ private[spark] class SparkConcurrentModificationException( /** * Datetime exception thrown from Spark with an error class. */ -private[spark] class SparkDateTimeException(errorClass: String, messageParameters: Array[String]) +private[spark] class SparkDateTimeException( + errorClass: String, + messageParameters: Array[String], + queryContext: String = "") extends DateTimeException( - SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable { + SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext)) + with SparkThrowable { override def getErrorClass: String = errorClass } @@ -172,9 +176,11 @@ private[spark] class SparkFileNotFoundException( */ private[spark] class SparkNumberFormatException( errorClass: String, - messageParameters: Array[String]) + messageParameters: Array[String], + queryContext: String) extends NumberFormatException( - SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable { + SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext)) + with SparkThrowable { override def getErrorClass: String = errorClass } @@ -230,9 +236,11 @@ private[spark] class SparkIOException( private[spark] class SparkRuntimeException( errorClass: String, messageParameters: Array[String], - cause: Throwable = null) + cause: Throwable = null, + queryContext: String = "") extends RuntimeException( - SparkThrowableHelper.getMessage(errorClass, messageParameters), cause) with SparkThrowable { + SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext), cause) + with SparkThrowable { override def getErrorClass: String = errorClass } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index f10cd4be368ef..68cf643985031 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -104,8 +104,8 @@ object QueryExecutionErrors extends QueryErrorsBase { value.toDebugString, decimalPrecision.toString, decimalScale.toString, - toSQLConf(SQLConf.ANSI_ENABLED.key), - context)) + toSQLConf(SQLConf.ANSI_ENABLED.key)), + context) } def invalidInputInCastToDatetimeError( @@ -119,8 +119,8 @@ object QueryExecutionErrors extends QueryErrorsBase { toSQLValue(value, from), toSQLType(from), toSQLType(to), - toSQLConf(SQLConf.ANSI_ENABLED.key), - errorContext)) + toSQLConf(SQLConf.ANSI_ENABLED.key)), + errorContext) } def invalidInputSyntaxForBooleanError( @@ -132,8 +132,8 @@ object QueryExecutionErrors extends QueryErrorsBase { toSQLValue(s, StringType), toSQLType(StringType), toSQLType(BooleanType), - toSQLConf(SQLConf.ANSI_ENABLED.key), - errorContext)) + toSQLConf(SQLConf.ANSI_ENABLED.key)), + queryContext = errorContext) } def invalidInputInCastToNumberError( @@ -146,8 +146,8 @@ object QueryExecutionErrors extends QueryErrorsBase { toSQLValue(s, StringType), toSQLType(StringType), toSQLType(to), - toSQLConf(SQLConf.ANSI_ENABLED.key), - errorContext)) + toSQLConf(SQLConf.ANSI_ENABLED.key)), + errorContext) } def cannotCastFromNullTypeError(to: DataType): Throwable = { @@ -180,7 +180,8 @@ object QueryExecutionErrors extends QueryErrorsBase { def divideByZeroError(context: String): ArithmeticException = { new SparkArithmeticException( errorClass = "DIVIDE_BY_ZERO", - messageParameters = Array(toSQLConf(SQLConf.ANSI_ENABLED.key))) + messageParameters = Array(toSQLConf(SQLConf.ANSI_ENABLED.key)), + queryContext = context) } def invalidArrayIndexError(index: Int, numElements: Int): ArrayIndexOutOfBoundsException = { @@ -478,7 +479,8 @@ object QueryExecutionErrors extends QueryErrorsBase { errorContext: String = ""): ArithmeticException = { val alternative = if (hint.nonEmpty) s" To return NULL instead, use '$hint'." else "" new SparkArithmeticException("ARITHMETIC_OVERFLOW", - Array(message, alternative, SQLConf.ANSI_ENABLED.key, errorContext)) + Array(message, alternative, SQLConf.ANSI_ENABLED.key), + errorContext) } def unaryMinusCauseOverflowError(originValue: Int): ArithmeticException = { From 9cfb13a219bec4a84380d52038fa133d40bdaa91 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 19 May 2022 10:48:31 +0800 Subject: [PATCH 3/4] use named parameters --- .../spark/sql/errors/QueryExecutionErrors.scala | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 68cf643985031..a155b0694b5f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -105,7 +105,7 @@ object QueryExecutionErrors extends QueryErrorsBase { decimalPrecision.toString, decimalScale.toString, toSQLConf(SQLConf.ANSI_ENABLED.key)), - context) + queryContext = context) } def invalidInputInCastToDatetimeError( @@ -120,7 +120,7 @@ object QueryExecutionErrors extends QueryErrorsBase { toSQLType(from), toSQLType(to), toSQLConf(SQLConf.ANSI_ENABLED.key)), - errorContext) + queryContext = errorContext) } def invalidInputSyntaxForBooleanError( @@ -147,7 +147,7 @@ object QueryExecutionErrors extends QueryErrorsBase { toSQLType(StringType), toSQLType(to), toSQLConf(SQLConf.ANSI_ENABLED.key)), - errorContext) + queryContext = errorContext) } def cannotCastFromNullTypeError(to: DataType): Throwable = { @@ -220,7 +220,7 @@ object QueryExecutionErrors extends QueryErrorsBase { messageParameters = Array( toSQLValue(key, dataType), toSQLConf(SQLConf.ANSI_ENABLED.key)), - context) + queryContext = context) } def invalidFractionOfSecondError(): DateTimeException = { @@ -478,9 +478,10 @@ object QueryExecutionErrors extends QueryErrorsBase { hint: String = "", errorContext: String = ""): ArithmeticException = { val alternative = if (hint.nonEmpty) s" To return NULL instead, use '$hint'." else "" - new SparkArithmeticException("ARITHMETIC_OVERFLOW", - Array(message, alternative, SQLConf.ANSI_ENABLED.key), - errorContext) + new SparkArithmeticException( + errorClass = "ARITHMETIC_OVERFLOW", + messageParameters = Array(message, alternative, SQLConf.ANSI_ENABLED.key), + queryContext = errorContext) } def unaryMinusCauseOverflowError(originValue: Int): ArithmeticException = { From ea55ee398374995eec3b8a0cedf1d975de76f0af Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 19 May 2022 13:30:56 +0800 Subject: [PATCH 4/4] fix --- core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala index 6321d8a78ed16..d6348e1aff1e7 100644 --- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala @@ -127,7 +127,7 @@ class SparkThrowableSuite extends SparkFunSuite { assert(getMessage("DIVIDE_BY_ZERO", Array("foo", "bar", "baz")) == "[DIVIDE_BY_ZERO] Division by zero. " + "To return NULL instead, use `try_divide`. If necessary set foo to \"false\" " + - "(except for ANSI interval type) to bypass this error.bar") + "(except for ANSI interval type) to bypass this error.") } test("Error message is formatted") {