Skip to content

Commit 02d8ae3

Browse files
rednaxelafxdongjoon-hyun
authored andcommitted
[SPARK-26661][SQL] Show actual class name of the writing command in CTAS explain
## What changes were proposed in this pull request? The explain output of the Hive CTAS command, regardless of whether it's actually writing via Hive's SerDe or converted into using Spark's data source, would always show that it's using `InsertIntoHiveTable` because it's hardcoded. e.g. ``` Execute OptimizedCreateHiveTableAsSelectCommand [Database:default, TableName: foo, InsertIntoHiveTable] ``` This CTAS is converted into using Spark's data source, but it still says `InsertIntoHiveTable` in the explain output. It's better to show the actual class name of the writing command used. For the example above, it'd be: ``` Execute OptimizedCreateHiveTableAsSelectCommand [Database:default, TableName: foo, InsertIntoHadoopFsRelationCommand] ``` ## How was this patch tested? Added test case in `HiveExplainSuite` Closes apache#23582 from rednaxelafx/fix-explain-1. Authored-by: Kris Mok <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent dc2da72 commit 02d8ae3

File tree

2 files changed

+38
-2
lines changed

2 files changed

+38
-2
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala

+13-2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import org.apache.spark.sql.execution.SparkPlan
2626
import org.apache.spark.sql.execution.command.{DataWritingCommand, DDLUtils}
2727
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation}
2828
import org.apache.spark.sql.hive.HiveSessionCatalog
29+
import org.apache.spark.util.Utils
2930

3031
trait CreateHiveTableAsSelectBase extends DataWritingCommand {
3132
val tableDesc: CatalogTable
@@ -83,10 +84,14 @@ trait CreateHiveTableAsSelectBase extends DataWritingCommand {
8384
tableDesc: CatalogTable,
8485
tableExists: Boolean): DataWritingCommand
8586

87+
// A subclass should override this with the Class name of the concrete type expected to be
88+
// returned from `getWritingCommand`.
89+
def writingCommandClassName: String
90+
8691
override def argString(maxFields: Int): String = {
87-
s"[Database:${tableDesc.database}, " +
92+
s"[Database: ${tableDesc.database}, " +
8893
s"TableName: ${tableDesc.identifier.table}, " +
89-
s"InsertIntoHiveTable]"
94+
s"${writingCommandClassName}]"
9095
}
9196
}
9297

@@ -118,6 +123,9 @@ case class CreateHiveTableAsSelectCommand(
118123
ifPartitionNotExists = false,
119124
outputColumnNames = outputColumnNames)
120125
}
126+
127+
override def writingCommandClassName: String =
128+
Utils.getSimpleName(classOf[InsertIntoHiveTable])
121129
}
122130

123131
/**
@@ -162,4 +170,7 @@ case class OptimizedCreateHiveTableAsSelectCommand(
162170
Some(hadoopRelation.location),
163171
query.output.map(_.name))
164172
}
173+
174+
override def writingCommandClassName: String =
175+
Utils.getSimpleName(classOf[InsertIntoHadoopFsRelationCommand])
165176
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala

+25
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,13 @@ package org.apache.spark.sql.hive.execution
2020
import org.apache.spark.sql.QueryTest
2121
import org.apache.spark.sql.catalyst.TableIdentifier
2222
import org.apache.spark.sql.catalyst.parser.ParseException
23+
import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand
24+
import org.apache.spark.sql.hive.HiveUtils
25+
import org.apache.spark.sql.hive.execution._
2326
import org.apache.spark.sql.hive.test.TestHiveSingleton
2427
import org.apache.spark.sql.internal.SQLConf
2528
import org.apache.spark.sql.test.SQLTestUtils
29+
import org.apache.spark.util.Utils
2630

2731
/**
2832
* A set of tests that validates support for Hive Explain command.
@@ -182,4 +186,25 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
182186
assert(output.toString.contains(s"Scan hive default.$tableName"))
183187
}
184188
}
189+
190+
test("SPARK-26661: Show actual class name of the writing command in CTAS explain") {
191+
Seq(true, false).foreach { convertCTAS =>
192+
withSQLConf(
193+
HiveUtils.CONVERT_METASTORE_CTAS.key -> convertCTAS.toString,
194+
HiveUtils.CONVERT_METASTORE_PARQUET.key -> convertCTAS.toString) {
195+
196+
val df = sql(s"EXPLAIN CREATE TABLE tab1 STORED AS PARQUET AS SELECT * FROM range(2)")
197+
val keywords = if (convertCTAS) {
198+
Seq(
199+
s"Execute ${Utils.getSimpleName(classOf[OptimizedCreateHiveTableAsSelectCommand])}",
200+
Utils.getSimpleName(classOf[InsertIntoHadoopFsRelationCommand]))
201+
} else {
202+
Seq(
203+
s"Execute ${Utils.getSimpleName(classOf[CreateHiveTableAsSelectCommand])}",
204+
Utils.getSimpleName(classOf[InsertIntoHiveTable]))
205+
}
206+
checkKeywordsExist(df, keywords: _*)
207+
}
208+
}
209+
}
185210
}

0 commit comments

Comments
 (0)