From 3a255792e5b67ebbedca2ded35a5396361dc567a Mon Sep 17 00:00:00 2001 From: panbingkun Date: Thu, 4 Apr 2024 10:17:07 +0800 Subject: [PATCH 1/5] [SPARK-47723][CORE][TESTS] Introduce a tool that can sort alphabetically enumeration field in `LogEntry` automatically --- .../org/apache/spark/internal/LogKey.scala | 2 +- .../org/apache/spark/util/LogKeySuite.scala | 66 ++++++++++++++++++- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala index b8a43a03d8b62..86ea648d47c12 100644 --- a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala +++ b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala @@ -30,8 +30,8 @@ object LogKey extends Enumeration { val MAX_EXECUTOR_FAILURES = Value val MAX_SIZE = Value val MIN_SIZE = Value - val REMOTE_ADDRESS = Value val POD_ID = Value + val REMOTE_ADDRESS = Value type LogKey = Value } diff --git a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala index 39229f4b910bf..7a964c3345142 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala @@ -17,6 +17,13 @@ package org.apache.spark.util +import java.nio.charset.StandardCharsets +import java.nio.file.{Files, Path} +import java.util.{ArrayList => JList} + +import scala.jdk.CollectionConverters._ + +import org.apache.commons.io.FileUtils import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite import org.apache.spark.internal.{Logging, LogKey} @@ -25,8 +32,61 @@ class LogKeySuite extends AnyFunSuite // scalastyle:ignore funsuite with Logging { - test("LogKey enumeration fields must be sorted alphabetically") { - val keys = LogKey.values.toSeq - assert(keys === keys.sorted, "LogKey enumeration fields must be sorted alphabetically") + // scalastyle:off line.size.limit + /* Used to regenerate the LogKey class file. Run: + {{{ + SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \ + "common-utils/testOnly *LogKeySuite -- -t \"LogKey enumeration fields are correctly sorted\"" + }}} + */ + // scalastyle:on line.size.limit + + /** + * Get a Path relative to the root project. It is assumed that a spark home is set. + */ + protected final def getWorkspaceFilePath(first: String, more: String*): Path = { + if (!(sys.props.contains("spark.test.home") || sys.env.contains("SPARK_HOME"))) { + fail("spark.test.home or SPARK_HOME is not set.") + } + val sparkHome = sys.props.getOrElse("spark.test.home", sys.env("SPARK_HOME")) + java.nio.file.Paths.get(sparkHome, first +: more: _*) + } + + private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1" + + private val logKeyFilePath = getWorkspaceFilePath("common", "utils", "src", "main", "scala", + "org", "apache", "spark", "internal", "LogKey.scala") + + test("LogKey enumeration fields are correctly sorted") { + val originalKeys = LogKey.values.toSeq + val sortedKeys = originalKeys.sortBy(_.toString) + if (regenerateGoldenFiles) { + if (originalKeys != sortedKeys) { + val logKeyFile = logKeyFilePath.toFile + logInfo(s"Regenerating LogKey file $logKeyFile") + val originalContents = FileUtils.readLines(logKeyFile, StandardCharsets.UTF_8) + val sortedContents = new JList[String]() + var firstMatch = false + originalContents.asScala.foreach { line => + if (line.trim.startsWith("val ") && line.trim.endsWith(" = Value")) { + if (!firstMatch) { + sortedKeys.foreach { logKey => + sortedContents.add(s" val ${logKey.toString} = Value") + } + firstMatch = true + } else { + // ignore it + } + } else { + sortedContents.add(line) + } + } + Files.delete(logKeyFile.toPath) + FileUtils.writeLines(logKeyFile, StandardCharsets.UTF_8.name(), sortedContents) + } + } else { + assert(originalKeys === sortedKeys, + "LogKey enumeration fields must be sorted alphabetically") + } } } From e5411bddde21bbf4ec82546a84fec2fd85ef15d7 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Thu, 4 Apr 2024 13:55:30 +0800 Subject: [PATCH 2/5] fix --- .../org/apache/spark/util/LogKeySuite.scala | 51 +++++++++++-------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala index 7965f584e88ab..03b521f44195c 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala @@ -27,6 +27,7 @@ import org.apache.commons.io.FileUtils import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite import org.apache.spark.internal.{Logging, LogKey} +import org.apache.spark.internal.LogKey.LogKey class LogKeySuite extends AnyFunSuite // scalastyle:ignore funsuite @@ -36,7 +37,7 @@ class LogKeySuite /* Used to regenerate the LogKey class file. Run: {{{ SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \ - "common-utils/testOnly *LogKeySuite -- -t \"LogKey enumeration fields are correctly sorted\"" + "common-utils/testOnly *LogKeySuite -- \"LogKey enumeration fields are correctly sorted\"" }}} */ // scalastyle:on line.size.limit @@ -57,31 +58,37 @@ class LogKeySuite private val logKeyFilePath = getWorkspaceFilePath("common", "utils", "src", "main", "scala", "org", "apache", "spark", "internal", "LogKey.scala") - test("LogKey enumeration fields are correctly sorted") { - val originalKeys = LogKey.values.toSeq - val sortedKeys = originalKeys.sortBy(_.toString) - if (regenerateGoldenFiles) { - if (originalKeys != sortedKeys) { - val logKeyFile = logKeyFilePath.toFile - logInfo(s"Regenerating LogKey file $logKeyFile") - val originalContents = FileUtils.readLines(logKeyFile, StandardCharsets.UTF_8) - val sortedContents = new JList[String]() - var firstMatch = false - originalContents.asScala.foreach { line => - if (line.trim.startsWith("val ") && line.trim.endsWith(" = Value")) { - if (!firstMatch) { - sortedKeys.foreach { logKey => - sortedContents.add(s" val ${logKey.toString} = Value") - } - firstMatch = true + // regenerate the file `LogKey.scala` with its enumeration fields sorted alphabetically + private def regenerateLogKeyFile( + originalKeys: Seq[LogKey], sortedKeys: Seq[LogKey]): Unit = { + if (originalKeys != sortedKeys) { + val logKeyFile = logKeyFilePath.toFile + logInfo(s"Regenerating LogKey file $logKeyFile") + val originalContents = FileUtils.readLines(logKeyFile, StandardCharsets.UTF_8) + val sortedContents = new JList[String]() + var firstMatch = false + originalContents.asScala.foreach { line => + if (line.trim.startsWith("val ") && line.trim.endsWith(" = Value")) { + if (!firstMatch) { + sortedKeys.foreach { logKey => + sortedContents.add(s" val ${logKey.toString} = Value") } - } else { - sortedContents.add(line) + firstMatch = true } + } else { + sortedContents.add(line) } - Files.delete(logKeyFile.toPath) - FileUtils.writeLines(logKeyFile, StandardCharsets.UTF_8.name(), sortedContents) } + Files.delete(logKeyFile.toPath) + FileUtils.writeLines(logKeyFile, StandardCharsets.UTF_8.name(), sortedContents) + } + } + + test("LogKey enumeration fields are correctly sorted") { + val originalKeys = LogKey.values.toSeq + val sortedKeys = originalKeys.sortBy(_.toString) + if (regenerateGoldenFiles) { + regenerateLogKeyFile(originalKeys, sortedKeys) } else { assert(originalKeys === sortedKeys, "LogKey enumeration fields must be sorted alphabetically") From 81b1e652f558494dbe8bc4484fe081c6f5e8058b Mon Sep 17 00:00:00 2001 From: panbingkun Date: Thu, 4 Apr 2024 13:58:26 +0800 Subject: [PATCH 3/5] remove -- -t --- .../src/test/scala/org/apache/spark/util/LogKeySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala index 03b521f44195c..225877e3d2b20 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala @@ -37,7 +37,7 @@ class LogKeySuite /* Used to regenerate the LogKey class file. Run: {{{ SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \ - "common-utils/testOnly *LogKeySuite -- \"LogKey enumeration fields are correctly sorted\"" + "common-utils/testOnly *LogKeySuite \"LogKey enumeration fields are correctly sorted\"" }}} */ // scalastyle:on line.size.limit From aeba333da7c2df1e0ee6cd7f72c96628ee29b822 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Thu, 4 Apr 2024 15:32:13 +0800 Subject: [PATCH 4/5] update LogKeySuite.scala Co-authored-by: Gengliang Wang --- .../src/test/scala/org/apache/spark/util/LogKeySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala index 225877e3d2b20..327acaea60972 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala @@ -37,7 +37,7 @@ class LogKeySuite /* Used to regenerate the LogKey class file. Run: {{{ SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \ - "common-utils/testOnly *LogKeySuite \"LogKey enumeration fields are correctly sorted\"" + "common-utils/testOnly *LogKeySuite" }}} */ // scalastyle:on line.size.limit From 63bcd5845e03b7b318039a028a41b5d9cd2ba06b Mon Sep 17 00:00:00 2001 From: panbingkun Date: Fri, 5 Apr 2024 06:13:20 +0800 Subject: [PATCH 5/5] adjust position about comment --- .../org/apache/spark/util/LogKeySuite.scala | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala index 327acaea60972..24a24538ad72b 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala @@ -29,19 +29,18 @@ import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite import org.apache.spark.internal.{Logging, LogKey} import org.apache.spark.internal.LogKey.LogKey +// scalastyle:off line.size.limit +/** + * To re-generate the LogKey class file, run: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "common-utils/testOnly org.apache.spark.util.LogKeySuite" + * }}} + */ +// scalastyle:on line.size.limit class LogKeySuite extends AnyFunSuite // scalastyle:ignore funsuite with Logging { - // scalastyle:off line.size.limit - /* Used to regenerate the LogKey class file. Run: - {{{ - SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \ - "common-utils/testOnly *LogKeySuite" - }}} - */ - // scalastyle:on line.size.limit - /** * Get a Path relative to the root project. It is assumed that a spark home is set. */