From c5bfde319a2ec267f7bc5613c8299d2e4ceb6508 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Thu, 20 Jun 2024 16:17:53 -0700 Subject: [PATCH 1/5] Skip regex in ConfigReader.substitute for inputs that cannot possibly require substitutions --- .../scala/org/apache/spark/internal/config/ConfigReader.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala index c1ab22150d024..639d2a16b17f2 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala @@ -84,7 +84,9 @@ private[spark] class ConfigReader(conf: ConfigProvider) { def substitute(input: String): String = substitute(input, Set()) private def substitute(input: String, usedRefs: Set[String]): String = { - if (input != null) { + // Performance optimization: skip the costly regex processing + // if the string cannot possibly contain a variable reference: + if (input != null && input.contains("${")) { ConfigReader.REF_RE.replaceAllIn(input, { m => val prefix = m.group(1) val name = m.group(2) From 5a03cffcb0d1240b9cba9c66cd83a27afad0598d Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Thu, 20 Jun 2024 16:50:39 -0700 Subject: [PATCH 2/5] ConfigEntry collections operation optimizations --- .../spark/internal/config/ConfigEntry.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala index a295ef06a6376..75a9332493547 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala @@ -89,14 +89,14 @@ private[spark] abstract class ConfigEntry[T] ( def defaultValueString: String protected def readString(reader: ConfigReader): Option[String] = { - val values = Seq( - prependedKey.flatMap(reader.get(_)), - alternatives.foldLeft(reader.get(key))((res, nextKey) => res.orElse(reader.get(nextKey))) - ).flatten - if (values.nonEmpty) { - Some(values.mkString(prependSeparator)) - } else { - None + val maybePrependedValue: Option[String] = prependedKey.flatMap(reader.get) + val value: Option[String] = alternatives + .foldLeft(reader.get(key))((res, nextKey) => res.orElse(reader.get(nextKey))) + (maybePrependedValue, value) match { + case (Some(prependedValue), Some(value)) => Some(s"$prependedValue$prependSeparator$value") + case (Some(prepend), None) => Some(prepend) + case (None, Some(value)) => Some(value) + case (None, None) => None } } From b4af01da41cf8d826268ebbd7ac5124d86d7f08d Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Thu, 20 Jun 2024 16:50:50 -0700 Subject: [PATCH 3/5] Add code comments. --- .../scala/org/apache/spark/internal/config/ConfigEntry.scala | 2 ++ .../scala/org/apache/spark/internal/config/ConfigReader.scala | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala index 75a9332493547..e15c942735d5d 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala @@ -89,6 +89,8 @@ private[spark] abstract class ConfigEntry[T] ( def defaultValueString: String protected def readString(reader: ConfigReader): Option[String] = { + // SPARK-48678: performance optimization: this code could be expressed more succinctly + // using flatten and mkString, but doings so adds lots of Scala collections perf. overhead. val maybePrependedValue: Option[String] = prependedKey.flatMap(reader.get) val value: Option[String] = alternatives .foldLeft(reader.get(key))((res, nextKey) => res.orElse(reader.get(nextKey))) diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala index 639d2a16b17f2..8824d196489a8 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala @@ -84,7 +84,7 @@ private[spark] class ConfigReader(conf: ConfigProvider) { def substitute(input: String): String = substitute(input, Set()) private def substitute(input: String, usedRefs: Set[String]): String = { - // Performance optimization: skip the costly regex processing + // SPARK-48678: performance optimization: skip the costly regex processing // if the string cannot possibly contain a variable reference: if (input != null && input.contains("${")) { ConfigReader.REF_RE.replaceAllIn(input, { m => From 9ee3ebbd047d410b3dccf92458675609f0e7e1fa Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Thu, 20 Jun 2024 19:49:32 -0700 Subject: [PATCH 4/5] Update core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala Co-authored-by: Hyukjin Kwon --- .../scala/org/apache/spark/internal/config/ConfigEntry.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala index e15c942735d5d..808394e499421 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala @@ -90,7 +90,7 @@ private[spark] abstract class ConfigEntry[T] ( protected def readString(reader: ConfigReader): Option[String] = { // SPARK-48678: performance optimization: this code could be expressed more succinctly - // using flatten and mkString, but doings so adds lots of Scala collections perf. overhead. + // using flatten and mkString, but doing so adds lots of Scala collections perf. overhead. val maybePrependedValue: Option[String] = prependedKey.flatMap(reader.get) val value: Option[String] = alternatives .foldLeft(reader.get(key))((res, nextKey) => res.orElse(reader.get(nextKey))) From a18177ff323d11db8cac2bb994e767951adcc3b1 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 21 Jun 2024 10:33:55 -0700 Subject: [PATCH 5/5] Address review feedback: remove shadowing and some branches --- .../org/apache/spark/internal/config/ConfigEntry.scala | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala index 808394e499421..17d3329e6b494 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala @@ -92,13 +92,11 @@ private[spark] abstract class ConfigEntry[T] ( // SPARK-48678: performance optimization: this code could be expressed more succinctly // using flatten and mkString, but doing so adds lots of Scala collections perf. overhead. val maybePrependedValue: Option[String] = prependedKey.flatMap(reader.get) - val value: Option[String] = alternatives + val maybeValue: Option[String] = alternatives .foldLeft(reader.get(key))((res, nextKey) => res.orElse(reader.get(nextKey))) - (maybePrependedValue, value) match { + (maybePrependedValue, maybeValue) match { case (Some(prependedValue), Some(value)) => Some(s"$prependedValue$prependSeparator$value") - case (Some(prepend), None) => Some(prepend) - case (None, Some(value)) => Some(value) - case (None, None) => None + case _ => maybeValue.orElse(maybePrependedValue) } }