diff --git a/assembly/pom.xml b/assembly/pom.xml index 8070f84007f7d..5b164b9ed22de 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index e1c3425f36de6..cbf5e0af8b322 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 9408b7dea33ba..1211d85f19d49 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 4617eec4073cb..dbb85c555d018 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 1f09e7fa300b5..fd4cd94e5a6bc 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 8de157814be66..0a98259d2d12e 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index bc617b080b390..5e6dd46ea07d3 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index d41ef5a7afa89..a6161a9c1d9bd 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index b73bfce744d78..078d6e4fc5ce4 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../pom.xml diff --git a/core/src/main/scala/org/apache/spark/InternalAccumulator.scala b/core/src/main/scala/org/apache/spark/InternalAccumulator.scala index 18b10d23da94c..69d0b5cf1c0f5 100644 --- a/core/src/main/scala/org/apache/spark/InternalAccumulator.scala +++ b/core/src/main/scala/org/apache/spark/InternalAccumulator.scala @@ -30,6 +30,7 @@ private[spark] object InternalAccumulator { val INPUT_METRICS_PREFIX = METRICS_PREFIX + "input." // Names of internal task level metrics + val ADDITIONAL_METRIC = "additionalMetric" val EXECUTOR_DESERIALIZE_TIME = METRICS_PREFIX + "executorDeserializeTime" val EXECUTOR_DESERIALIZE_CPU_TIME = METRICS_PREFIX + "executorDeserializeCpuTime" val EXECUTOR_RUN_TIME = METRICS_PREFIX + "executorRunTime" diff --git a/core/src/main/scala/org/apache/spark/executor/AdditionalMetricAccumulator.scala b/core/src/main/scala/org/apache/spark/executor/AdditionalMetricAccumulator.scala new file mode 100644 index 0000000000000..9deafb091fa6a --- /dev/null +++ b/core/src/main/scala/org/apache/spark/executor/AdditionalMetricAccumulator.scala @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.executor + +import org.apache.spark.util.AccumulatorV2 + +/** + * additional metric for task + */ +class AdditionalMetricAccumulator extends AccumulatorV2[String, String] { + + private var res = "" + + /** + * Returns if this accumulator is zero value or not. e.g. for a counter accumulator, 0 is zero + * value; for a list accumulator, Nil is zero value. + */ + override def isZero: Boolean = res.isEmpty + + /** + * Creates a new copy of this accumulator. + */ + override def copy(): AccumulatorV2[String, String] = { + val newAccu = new AdditionalMetricAccumulator + newAccu.res = this.res + newAccu + } + + /** + * Resets this accumulator, which is zero value. i.e. call `isZero` must + * return true. + */ + override def reset(): Unit = { + res = "" + } + + /** + * Takes the inputs and accumulates. + */ + override def add(v: String): Unit = { + this.res = this.res + s"\t$v| " + } + + def setValue(newValue: String): Unit = { + this.res = newValue + } + + + /** + * Merges another same-type accumulator into this one and update its state, i.e. this should be + * merge-in-place. + */ + override def merge(other: AccumulatorV2[String, String]): Unit = { + } + + /** + * Defines the current value of this accumulator + */ + override def value: String = res +} diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala index 85b2745a2aec4..192abe74a7764 100644 --- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala +++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala @@ -43,6 +43,8 @@ import org.apache.spark.util._ */ @DeveloperApi class TaskMetrics private[spark] () extends Serializable { + private val additionalMetric = new AdditionalMetricAccumulator + // Each metric is internally represented as an accumulator private val _executorDeserializeTime = new LongAccumulator private val _executorDeserializeCpuTime = new LongAccumulator @@ -126,6 +128,11 @@ class TaskMetrics private[spark] () extends Serializable { } // Setters and increment-ers + def addAdditionalMetric(value: String): Unit = + additionalMetric.add(value) + def setAdditionalMetric(value: String): Unit = + additionalMetric.setValue(value) + private[spark] def setExecutorDeserializeTime(v: Long): Unit = _executorDeserializeTime.setValue(v) private[spark] def setExecutorDeserializeCpuTime(v: Long): Unit = @@ -207,6 +214,7 @@ class TaskMetrics private[spark] () extends Serializable { import InternalAccumulator._ @transient private[spark] lazy val nameToAccums = LinkedHashMap( + ADDITIONAL_METRIC -> additionalMetric, EXECUTOR_DESERIALIZE_TIME -> _executorDeserializeTime, EXECUTOR_DESERIALIZE_CPU_TIME -> _executorDeserializeCpuTime, EXECUTOR_RUN_TIME -> _executorRunTime, diff --git a/examples/pom.xml b/examples/pom.xml index 19dc828f0a0a3..d79ebc51c71fc 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index cb6ac88a84758..cf788b08b3e6b 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index e2318ba773019..56a972b71c5a6 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index d08060bb5f13a..a034e5c1ea4e4 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 98f0c701e65cb..cac1a0083afd2 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 7a0a12c44c2c6..8f10fe1f6b641 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 43e806322f919..061969b866bd1 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 079498333d1d1..841b62a818564 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 15a5ad3bc52a2..d64690e287a3e 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml index 5aad1e62f16b8..e76eacce5defb 100644 --- a/external/kafka-0-8-assembly/pom.xml +++ b/external/kafka-0-8-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml index 8f7dc212a1ca9..b82930f6b344c 100644 --- a/external/kafka-0-8/pom.xml +++ b/external/kafka-0-8/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index e6bcdeb3fd535..477a76fc67569 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 5e5648c964848..2bc6e328db582 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 96bea68537ff8..70a4d0818193a 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 1db98cf7504ee..4d8eb7bd61501 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 3f55ad96057a0..dd7758acd45f5 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 7a481f00b1d7f..93ffdbbdefe9e 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 56e444250af13..51f6ff5d816a6 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index a29180bb4e09b..0f421ee8745f1 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../pom.xml diff --git a/pom.xml b/pom.xml index 28019d0b7dc16..db10a7dd40f6d 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 pom Spark Project Parent POM http://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index ae23855238437..59eff8c7f991c 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index a6ea7315ef016..7464d8e424353 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index def232e1b8906..9263f4123fb21 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 2a8a448d89208..a70c83af51e87 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index b2c58213795ce..a3a56dd9b7ac5 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 4792199c8b209..61431203ee5bc 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index dc73c59f3e37c..b52ac97c97742 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index ea4f1592a7c2e..96385e6d2dca1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -31,6 +31,7 @@ import org.apache.hadoop.mapreduce.lib.input.FileSplit import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl import org.apache.parquet.filter2.compat.FilterCompat import org.apache.parquet.filter2.predicate.FilterApi +import org.apache.parquet.format.ParquetMetrics import org.apache.parquet.format.converter.ParquetMetadataConverter.SKIP_ROW_GROUPS import org.apache.parquet.hadoop._ import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel @@ -409,7 +410,7 @@ class ParquetFileFormat ParquetInputFormat.setFilterPredicate(hadoopAttemptContext.getConfiguration, pushed.get) } val taskContext = Option(TaskContext.get()) - if (enableVectorizedReader) { + val iter = if (enableVectorizedReader) { val vectorizedReader = new VectorizedParquetRecordReader( convertTz.orNull, enableOffHeapColumnVector && taskContext.isDefined, capacity) val iter = new RecordReaderIterator(vectorizedReader) @@ -453,6 +454,11 @@ class ParquetFileFormat .map(d => appendPartitionColumns(joinedRow(d, file.partitionValues))) } } + if (taskContext.isDefined) { + val metrics = taskContext.get.taskMetrics() + metrics.setAdditionalMetric("Parquet Metric:" + ParquetMetrics.get().toString) + } + iter } } diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index e9d0341e4c50b..d6da5cd7649fb 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 9f4b516f7a57a..ea43a0a5c7682 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 4e12d72e3beb0..3ec97d7cc0f39 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index b7d905c07e35b..6cec68bf83b60 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.4.1-kylin-r20 + 2.4.1-kylin-r22 ../pom.xml