Skip to content

Commit dbba3a3

Browse files
jzhugedongjoon-hyun
authored andcommitted
[SPARK-27947][SQL] Enhance redactOptions to accept any Map type
## What changes were proposed in this pull request? Handle the case when ParsedStatement subclass has a Map field but not of type Map[String, String]. In ParsedStatement.productIterator, `case mapArg: Map[_, _]` can match any Map type due to type erasure, thus causing `asInstanceOf[Map[String, String]]` to throw ClassCastException. The following test reproduces the issue: ``` case class TestStatement(p: Map[String, Int]) extends ParsedStatement { override def output: Seq[Attribute] = Nil override def children: Seq[LogicalPlan] = Nil } TestStatement(Map("abc" -> 1)).toString ``` Changing the code to `case mapArg: Map[String, String]` will not help due to type erasure. As a matter of fact, compiler gives this warning: ``` Warning:(41, 18) non-variable type argument String in type pattern scala.collection.immutable.Map[String,String] (the underlying of Map[String,String]) is unchecked since it is eliminated by erasure case mapArg: Map[String, String] => ``` ## How was this patch tested? Add 2 unit tests. Closes apache#24800 from jzhuge/SPARK-27947. Authored-by: John Zhuge <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 3b37bfd commit dbba3a3

File tree

4 files changed

+30
-10
lines changed

4 files changed

+30
-10
lines changed

core/src/main/scala/org/apache/spark/util/Utils.scala

+15-8
Original file line numberDiff line numberDiff line change
@@ -2596,7 +2596,7 @@ private[spark] object Utils extends Logging {
25962596
* Redact the sensitive values in the given map. If a map key matches the redaction pattern then
25972597
* its value is replaced with a dummy text.
25982598
*/
2599-
def redact(regex: Option[Regex], kvs: Seq[(String, String)]): Seq[(String, String)] = {
2599+
def redact[K, V](regex: Option[Regex], kvs: Seq[(K, V)]): Seq[(K, V)] = {
26002600
regex match {
26012601
case None => kvs
26022602
case Some(r) => redact(r, kvs)
@@ -2618,7 +2618,7 @@ private[spark] object Utils extends Logging {
26182618
}
26192619
}
26202620

2621-
private def redact(redactionPattern: Regex, kvs: Seq[(String, String)]): Seq[(String, String)] = {
2621+
private def redact[K, V](redactionPattern: Regex, kvs: Seq[(K, V)]): Seq[(K, V)] = {
26222622
// If the sensitive information regex matches with either the key or the value, redact the value
26232623
// While the original intent was to only redact the value if the key matched with the regex,
26242624
// we've found that especially in verbose mode, the value of the property may contain sensitive
@@ -2632,12 +2632,19 @@ private[spark] object Utils extends Logging {
26322632
// arbitrary property contained the term 'password', we may redact the value from the UI and
26332633
// logs. In order to work around it, user would have to make the spark.redaction.regex property
26342634
// more specific.
2635-
kvs.map { case (key, value) =>
2636-
redactionPattern.findFirstIn(key)
2637-
.orElse(redactionPattern.findFirstIn(value))
2638-
.map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
2639-
.getOrElse((key, value))
2640-
}
2635+
kvs.map {
2636+
case (key: String, value: String) =>
2637+
redactionPattern.findFirstIn(key)
2638+
.orElse(redactionPattern.findFirstIn(value))
2639+
.map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
2640+
.getOrElse((key, value))
2641+
case (key, value: String) =>
2642+
redactionPattern.findFirstIn(value)
2643+
.map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
2644+
.getOrElse((key, value))
2645+
case (key, value) =>
2646+
(key, value)
2647+
}.asInstanceOf[Seq[(K, V)]]
26412648
}
26422649

26432650
/**

core/src/test/scala/org/apache/spark/util/UtilsSuite.scala

+13
Original file line numberDiff line numberDiff line change
@@ -1120,6 +1120,19 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
11201120
assert(redactedCmdArgMap("spark.sensitive.property") === Utils.REDACTION_REPLACEMENT_TEXT)
11211121
}
11221122

1123+
test("redact sensitive information in sequence of key value pairs") {
1124+
val secretKeys = Some("my.password".r)
1125+
assert(Utils.redact(secretKeys, Seq(("spark.my.password", "12345"))) ===
1126+
Seq(("spark.my.password", Utils.REDACTION_REPLACEMENT_TEXT)))
1127+
assert(Utils.redact(secretKeys, Seq(("anything", "spark.my.password=12345"))) ===
1128+
Seq(("anything", Utils.REDACTION_REPLACEMENT_TEXT)))
1129+
assert(Utils.redact(secretKeys, Seq((999, "spark.my.password=12345"))) ===
1130+
Seq((999, Utils.REDACTION_REPLACEMENT_TEXT)))
1131+
// Do not redact when value type is not string
1132+
assert(Utils.redact(secretKeys, Seq(("my.password", 12345))) ===
1133+
Seq(("my.password", 12345)))
1134+
}
1135+
11231136
test("tryWithSafeFinally") {
11241137
var e = new Error("Block0")
11251138
val finallyBlockError = new Error("Finally Block")

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ParsedStatement.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
3737
private[sql] abstract class ParsedStatement extends LogicalPlan {
3838
// Redact properties and options when parsed nodes are used by generic methods like toString
3939
override def productIterator: Iterator[Any] = super.productIterator.map {
40-
case mapArg: Map[_, _] => conf.redactOptions(mapArg.asInstanceOf[Map[String, String]])
40+
case mapArg: Map[_, _] => conf.redactOptions(mapArg)
4141
case other => other
4242
}
4343

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -2353,7 +2353,7 @@ class SQLConf extends Serializable with Logging {
23532353
/**
23542354
* Redacts the given option map according to the description of SQL_OPTIONS_REDACTION_PATTERN.
23552355
*/
2356-
def redactOptions(options: Map[String, String]): Map[String, String] = {
2356+
def redactOptions[K, V](options: Map[K, V]): Map[K, V] = {
23572357
val regexes = Seq(
23582358
getConf(SQL_OPTIONS_REDACTION_PATTERN),
23592359
SECRET_REDACTION_PATTERN.readFrom(reader))

0 commit comments

Comments
 (0)