From fd36d2f54939a27406f4077a04a6f7dad328f840 Mon Sep 17 00:00:00 2001 From: osatici Date: Thu, 9 Nov 2017 14:06:05 +0000 Subject: [PATCH 1/3] do not log properties on SaveintoDataSourceCommand.simpleString --- .../spark/internal/config/package.scala | 2 +- .../SaveIntoDataSourceCommand.scala | 7 +++ .../SaveIntoDataSourceCommandSuite.scala | 48 +++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 1588dfec2074a..da3cc04275154 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -247,7 +247,7 @@ package object config { "a property key or value, the value is redacted from the environment UI and various logs " + "like YARN and event logs.") .regexConf - .createWithDefault("(?i)secret|password".r) + .createWithDefault("(?i)secret|password|url|user|username".r) private[spark] val STRING_REDACTION_PATTERN = ConfigBuilder("spark.redaction.string.regex") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala index 6f19ea195c0cd..2556bd1178c31 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala @@ -17,10 +17,12 @@ package org.apache.spark.sql.execution.datasources +import org.apache.spark.SparkEnv import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand +import org.apache.spark.util.Utils /** * Saves the results of `query` in to a data source. @@ -49,4 +51,9 @@ case class SaveIntoDataSourceCommand( Seq.empty[Row] } + + override def simpleString: String = { + val redacted = Utils.redact(SparkEnv.get.conf, options.toSeq).toMap + s"SaveIntoDataSourceCommand ${dataSource}, ${redacted}, ${mode}" + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala new file mode 100644 index 0000000000000..4b3ca8e60cab6 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SaveMode +import org.apache.spark.sql.test.SharedSQLContext + +class SaveIntoDataSourceCommandSuite extends SharedSQLContext { + + override protected def sparkConf: SparkConf = super.sparkConf + .set("spark.redaction.regex", "(?i)password|url") + + test("simpleString is redacted") { + val URL = "connection.url" + val PASS = "123" + val DRIVER = "mydriver" + + val dataSource = DataSource( + sparkSession = spark, + className = "jdbc", + partitionColumns = Nil, + options = Map("password" -> PASS, "url" -> URL, "driver" -> DRIVER)) + + val logicalPlanString = dataSource + .planForWriting(SaveMode.ErrorIfExists, spark.range(1).logicalPlan) + .treeString(true) + + assert(!logicalPlanString.contains(URL)) + assert(!logicalPlanString.contains(PASS)) + assert(logicalPlanString.contains(DRIVER)) + } +} From ba4d590b192cfe220bfed0e1bd690af746cf2ad1 Mon Sep 17 00:00:00 2001 From: osatici Date: Thu, 16 Nov 2017 00:41:57 +0000 Subject: [PATCH 2/3] fix fields to match 2.2 --- .../sql/execution/datasources/SaveIntoDataSourceCommand.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala index 2556bd1178c31..53868d41b7c44 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala @@ -54,6 +54,6 @@ case class SaveIntoDataSourceCommand( override def simpleString: String = { val redacted = Utils.redact(SparkEnv.get.conf, options.toSeq).toMap - s"SaveIntoDataSourceCommand ${dataSource}, ${redacted}, ${mode}" + s"SaveIntoDataSourceCommand ${provider}, ${partitionColumns}, ${redacted}, ${mode}" } } From adbf94eef67780b2ef547eff250ea3e11e4deda2 Mon Sep 17 00:00:00 2001 From: osatici Date: Thu, 16 Nov 2017 01:03:30 +0000 Subject: [PATCH 3/3] create the command object in tests directly --- .../SaveIntoDataSourceCommandSuite.scala | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala index 4b3ca8e60cab6..6b9ddb1b481c7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala @@ -31,18 +31,15 @@ class SaveIntoDataSourceCommandSuite extends SharedSQLContext { val PASS = "123" val DRIVER = "mydriver" - val dataSource = DataSource( - sparkSession = spark, - className = "jdbc", - partitionColumns = Nil, - options = Map("password" -> PASS, "url" -> URL, "driver" -> DRIVER)) - - val logicalPlanString = dataSource - .planForWriting(SaveMode.ErrorIfExists, spark.range(1).logicalPlan) - .treeString(true) - - assert(!logicalPlanString.contains(URL)) - assert(!logicalPlanString.contains(PASS)) - assert(logicalPlanString.contains(DRIVER)) + val simpleString = SaveIntoDataSourceCommand( + spark.range(1).logicalPlan, + "jdbc", + Nil, + Map("password" -> PASS, "url" -> URL, "driver" -> DRIVER), + SaveMode.ErrorIfExists).treeString(true) + + assert(!simpleString.contains(URL)) + assert(!simpleString.contains(PASS)) + assert(simpleString.contains(DRIVER)) } }