1
0

[HUDI-3370] The files recorded in the commit may not match the actual ones for MOR Compaction (#4753)

* use HoodieCommitMetadata to replace writeStatuses computation

Co-authored-by: yuezhang <yuezhang@freewheel.tv>
This commit is contained in:
YueZhang
2022-02-14 11:12:52 +08:00
committed by GitHub
parent 55777fec05
commit 76e2faa28d
17 changed files with 129 additions and 114 deletions

View File

@@ -17,14 +17,13 @@
package org.apache.spark.sql.hudi.command
import org.apache.hudi.client.WriteStatus
import org.apache.hudi.common.model.HoodieTableType
import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieTableType}
import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieTimeline}
import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
import org.apache.hudi.common.util.{HoodieTimer, Option => HOption}
import org.apache.hudi.exception.HoodieException
import org.apache.hudi.{DataSourceUtils, DataSourceWriteOptions, HoodieWriterUtils}
import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.catalyst.plans.logical.CompactionOperation
import org.apache.spark.sql.catalyst.plans.logical.CompactionOperation.{CompactionOperation, RUN, SCHEDULE}
@@ -100,8 +99,8 @@ case class CompactionHoodiePathCommand(path: String,
timer.startTimer()
willCompactionInstants.foreach {compactionInstant =>
val writeResponse = client.compact(compactionInstant)
handlerResponse(writeResponse)
client.commitCompaction(compactionInstant, writeResponse, HOption.empty())
handleResponse(writeResponse.getCommitMetadata.get())
client.commitCompaction(compactionInstant, writeResponse.getCommitMetadata.get(), HOption.empty())
}
logInfo(s"Finish Run compaction at instants: [${willCompactionInstants.mkString(",")}]," +
s" spend: ${timer.endTimer()}ms")
@@ -111,17 +110,13 @@ case class CompactionHoodiePathCommand(path: String,
}
}
private def handlerResponse(writeResponse: JavaRDD[WriteStatus]): Unit = {
private def handleResponse(metadata: HoodieCommitMetadata): Unit = {
// Handle error
val error = writeResponse.rdd.filter(f => f.hasErrors).take(1).headOption
if (error.isDefined) {
if (error.get.hasGlobalError) {
throw error.get.getGlobalError
} else if (!error.get.getErrors.isEmpty) {
val key = error.get.getErrors.asScala.head._1
val exception = error.get.getErrors.asScala.head._2
throw new HoodieException(s"Error in write record: $key", exception)
}
val writeStats = metadata.getPartitionToWriteStats.entrySet().flatMap(e => e.getValue).toList
val errorsCount = writeStats.map(state => state.getTotalWriteErrors).sum
if (errorsCount > 0) {
throw new HoodieException(s" Found $errorsCount when writing record")
}
}