[HUDI-3271] Code optimization and clean up unused code in HoodieSparkSqlWriter (#4631)
This commit is contained in:
@@ -69,8 +69,8 @@ object HoodieSparkSqlWriter {
|
||||
df: DataFrame,
|
||||
hoodieTableConfigOpt: Option[HoodieTableConfig] = Option.empty,
|
||||
hoodieWriteClient: Option[SparkRDDWriteClient[HoodieRecordPayload[Nothing]]] = Option.empty,
|
||||
asyncCompactionTriggerFn: Option[Function1[SparkRDDWriteClient[HoodieRecordPayload[Nothing]], Unit]] = Option.empty,
|
||||
asyncClusteringTriggerFn: Option[Function1[SparkRDDWriteClient[HoodieRecordPayload[Nothing]], Unit]] = Option.empty
|
||||
asyncCompactionTriggerFn: Option[SparkRDDWriteClient[HoodieRecordPayload[Nothing]] => Unit] = Option.empty,
|
||||
asyncClusteringTriggerFn: Option[SparkRDDWriteClient[HoodieRecordPayload[Nothing]] => Unit] = Option.empty
|
||||
)
|
||||
: (Boolean, common.util.Option[String], common.util.Option[String], common.util.Option[String],
|
||||
SparkRDDWriteClient[HoodieRecordPayload[Nothing]], HoodieTableConfig) = {
|
||||
@@ -199,7 +199,7 @@ object HoodieSparkSqlWriter {
|
||||
|
||||
// Get list of partitions to delete
|
||||
val partitionsToDelete = if (parameters.containsKey(DataSourceWriteOptions.PARTITIONS_TO_DELETE.key())) {
|
||||
val partitionColsToDelete = parameters.get(DataSourceWriteOptions.PARTITIONS_TO_DELETE.key()).get.split(",")
|
||||
val partitionColsToDelete = parameters(DataSourceWriteOptions.PARTITIONS_TO_DELETE.key()).split(",")
|
||||
java.util.Arrays.asList(partitionColsToDelete: _*)
|
||||
} else {
|
||||
genericRecords.map(gr => keyGenerator.getKey(gr).getPartitionPath).toJavaRDD().distinct().collect()
|
||||
@@ -317,7 +317,7 @@ object HoodieSparkSqlWriter {
|
||||
if (FSUtils.isTableExists(basePath.toString, fs)) {
|
||||
val tableMetaClient = HoodieTableMetaClient.builder.setConf(sparkContext.hadoopConfiguration).setBasePath(basePath.toString).build()
|
||||
val tableSchemaResolver = new TableSchemaResolver(tableMetaClient)
|
||||
latestSchema = tableSchemaResolver.getLatestSchema(schema, false, null);
|
||||
latestSchema = tableSchemaResolver.getLatestSchema(schema, false, null)
|
||||
}
|
||||
latestSchema
|
||||
}
|
||||
@@ -345,7 +345,7 @@ object HoodieSparkSqlWriter {
|
||||
val sparkContext = sqlContext.sparkContext
|
||||
val fs = basePath.getFileSystem(sparkContext.hadoopConfiguration)
|
||||
tableExists = fs.exists(new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME))
|
||||
var tableConfig = getHoodieTableConfig(sparkContext, path, hoodieTableConfigOpt)
|
||||
val tableConfig = getHoodieTableConfig(sparkContext, path, hoodieTableConfigOpt)
|
||||
validateTableConfig(sqlContext.sparkSession, optParams, tableConfig)
|
||||
|
||||
val (parameters, hoodieConfig) = mergeParamsAndGetHoodieConfig(optParams, tableConfig)
|
||||
@@ -364,7 +364,6 @@ object HoodieSparkSqlWriter {
|
||||
schema = HoodieAvroUtils.getNullSchema.toString
|
||||
}
|
||||
|
||||
|
||||
// Handle various save modes
|
||||
if (mode == SaveMode.Ignore && tableExists) {
|
||||
log.warn(s"hoodie table at $basePath already exists. Ignoring & not performing actual writes.")
|
||||
@@ -453,7 +452,7 @@ object HoodieSparkSqlWriter {
|
||||
// Sort modes are not yet supported when meta fields are disabled
|
||||
new NonSortPartitionerWithRows()
|
||||
}
|
||||
val arePartitionRecordsSorted = bulkInsertPartitionerRows.arePartitionRecordsSorted();
|
||||
val arePartitionRecordsSorted = bulkInsertPartitionerRows.arePartitionRecordsSorted()
|
||||
parameters.updated(HoodieInternalConfig.BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED, arePartitionRecordsSorted.toString)
|
||||
val isGlobalIndex = if (populateMetaFields) {
|
||||
SparkHoodieIndexFactory.isGlobalIndex(writeConfig)
|
||||
|
||||
Reference in New Issue
Block a user