1
0

[HUDI-1873] collect() call causing issues with very large upserts (#2907)

Co-authored-by: Sivabalan Narayanan <sivabala@uber.com>
This commit is contained in:
mpouttu
2021-05-23 22:29:01 -07:00
committed by GitHub
parent 6539813733
commit 369a849337

View File

@@ -540,9 +540,8 @@ private[hudi] object HoodieSparkSqlWriter {
jsc: JavaSparkContext, jsc: JavaSparkContext,
tableInstantInfo: TableInstantInfo tableInstantInfo: TableInstantInfo
): (Boolean, common.util.Option[java.lang.String]) = { ): (Boolean, common.util.Option[java.lang.String]) = {
val errorCount = writeResult.getWriteStatuses.rdd.filter(ws => ws.hasErrors).count() if(writeResult.getWriteStatuses.rdd.filter(ws => ws.hasErrors).isEmpty()) {
if (errorCount == 0) { log.info("Proceeding to commit the write.")
log.info("No errors. Proceeding to commit the write.")
val metaMap = parameters.filter(kv => val metaMap = parameters.filter(kv =>
kv._1.startsWith(parameters(COMMIT_METADATA_KEYPREFIX_OPT_KEY))) kv._1.startsWith(parameters(COMMIT_METADATA_KEYPREFIX_OPT_KEY)))
val commitSuccess = val commitSuccess =
@@ -576,7 +575,7 @@ private[hudi] object HoodieSparkSqlWriter {
} }
(commitSuccess && metaSyncSuccess, compactionInstant) (commitSuccess && metaSyncSuccess, compactionInstant)
} else { } else {
log.error(s"${tableInstantInfo.operation} failed with $errorCount errors :") log.error(s"${tableInstantInfo.operation} failed with errors")
if (log.isTraceEnabled) { if (log.isTraceEnabled) {
log.trace("Printing out the top 100 errors") log.trace("Printing out the top 100 errors")
writeResult.getWriteStatuses.rdd.filter(ws => ws.hasErrors) writeResult.getWriteStatuses.rdd.filter(ws => ws.hasErrors)