1
0

[HUDI-2117] Unpersist the input rdd after the commit is completed to … (#3207)

Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
Shawy Geng
2021-07-29 23:16:58 +08:00
committed by GitHub
parent f109c6cb0d
commit 44e41dc9bb
4 changed files with 26 additions and 28 deletions

View File

@@ -40,12 +40,12 @@ import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
import org.apache.hudi.sync.common.AbstractSyncTool
import org.apache.hudi.table.BulkInsertPartitioner
import org.apache.log4j.LogManager
import org.apache.spark.{SPARK_VERSION, SparkContext}
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, Dataset,Row, SQLContext, SaveMode, SparkSession}
import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, Dataset, Row, SQLContext, SaveMode, SparkSession}
import org.apache.spark.{SPARK_VERSION, SparkContext}
import java.util
import java.util.Properties
@@ -238,22 +238,6 @@ object HoodieSparkSqlWriter {
writeResult, parameters, writeClient, tableConfig, jsc,
TableInstantInfo(basePath, instantTime, commitActionType, operation))
def unpersistRdd(rdd: RDD[_]): Unit = {
if (sparkContext.getPersistentRDDs.contains(rdd.id)) {
try {
rdd.unpersist()
} catch {
case t: Exception => log.warn("Got excepting trying to unpersist rdd", t)
}
}
val parentRdds = rdd.dependencies.map(_.rdd)
parentRdds.foreach { parentRdd =>
unpersistRdd(parentRdd)
}
}
// it's safe to unpersist cached rdds here
unpersistRdd(writeResult.getWriteStatuses.rdd)
(writeSuccessful, common.util.Option.ofNullable(instantTime), compactionInstant, clusteringInstant, writeClient, tableConfig)
}
}