Skip to content

Commit 20469d4

Browse files
Tony Zhangdongjoon-hyun
Tony Zhang
authored andcommitted
[SPARK-28189][SQL] Use semanticEquals in Dataset drop method for attributes comparison
## What changes were proposed in this pull request? In Dataset drop(col: Column) method, the `equals` comparison method was used instead of `semanticEquals`, which caused the problem of abnormal case-sensitivity behavior. When attributes of LogicalPlan are checked for equality, `semanticEquals` should be used instead. A similar PR I referred to: apache#22713 created by mgaido91 ## How was this patch tested? - Added new unit test case in DataFrameSuite - ./build/sbt "testOnly org.apache.spark.sql.*" - The python code from ticket reporter at https://issues.apache.org/jira/browse/SPARK-28189 Closes apache#25055 from Tonix517/SPARK-28189. Authored-by: Tony Zhang <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent cdbc302 commit 20469d4

File tree

2 files changed

+24
-1
lines changed

2 files changed

+24
-1
lines changed

sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -2322,7 +2322,7 @@ class Dataset[T] private[sql](
23222322
}
23232323
val attrs = this.logicalPlan.output
23242324
val colsAfterDrop = attrs.filter { attr =>
2325-
attr != expression
2325+
!attr.semanticEquals(expression)
23262326
}.map(attr => Column(attr))
23272327
select(colsAfterDrop : _*)
23282328
}

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

+23
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,29 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
572572
assert(df.schema.map(_.name) === Seq("value"))
573573
}
574574

575+
test("SPARK-28189 drop column using drop with column reference with case-insensitive names") {
576+
// With SQL config caseSensitive OFF, case insensitive column name should work
577+
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
578+
val col1 = testData("KEY")
579+
val df1 = testData.drop(col1)
580+
checkAnswer(df1, testData.selectExpr("value"))
581+
assert(df1.schema.map(_.name) === Seq("value"))
582+
583+
val col2 = testData("Key")
584+
val df2 = testData.drop(col2)
585+
checkAnswer(df2, testData.selectExpr("value"))
586+
assert(df2.schema.map(_.name) === Seq("value"))
587+
}
588+
589+
// With SQL config caseSensitive ON, AnalysisException should be thrown
590+
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
591+
val e = intercept[AnalysisException] {
592+
testData("KEY")
593+
}.getMessage
594+
assert(e.contains("Cannot resolve column name"))
595+
}
596+
}
597+
575598
test("drop unknown column (no-op) with column reference") {
576599
val col = Column("random")
577600
val df = testData.drop(col)

0 commit comments

Comments
 (0)