[HUDI-3271] Code optimization and clean up unused code in HoodieSparkSqlWriter (#4631)
This commit is contained in:
@@ -69,8 +69,8 @@ object HoodieSparkSqlWriter {
|
|||||||
df: DataFrame,
|
df: DataFrame,
|
||||||
hoodieTableConfigOpt: Option[HoodieTableConfig] = Option.empty,
|
hoodieTableConfigOpt: Option[HoodieTableConfig] = Option.empty,
|
||||||
hoodieWriteClient: Option[SparkRDDWriteClient[HoodieRecordPayload[Nothing]]] = Option.empty,
|
hoodieWriteClient: Option[SparkRDDWriteClient[HoodieRecordPayload[Nothing]]] = Option.empty,
|
||||||
asyncCompactionTriggerFn: Option[Function1[SparkRDDWriteClient[HoodieRecordPayload[Nothing]], Unit]] = Option.empty,
|
asyncCompactionTriggerFn: Option[SparkRDDWriteClient[HoodieRecordPayload[Nothing]] => Unit] = Option.empty,
|
||||||
asyncClusteringTriggerFn: Option[Function1[SparkRDDWriteClient[HoodieRecordPayload[Nothing]], Unit]] = Option.empty
|
asyncClusteringTriggerFn: Option[SparkRDDWriteClient[HoodieRecordPayload[Nothing]] => Unit] = Option.empty
|
||||||
)
|
)
|
||||||
: (Boolean, common.util.Option[String], common.util.Option[String], common.util.Option[String],
|
: (Boolean, common.util.Option[String], common.util.Option[String], common.util.Option[String],
|
||||||
SparkRDDWriteClient[HoodieRecordPayload[Nothing]], HoodieTableConfig) = {
|
SparkRDDWriteClient[HoodieRecordPayload[Nothing]], HoodieTableConfig) = {
|
||||||
@@ -199,7 +199,7 @@ object HoodieSparkSqlWriter {
|
|||||||
|
|
||||||
// Get list of partitions to delete
|
// Get list of partitions to delete
|
||||||
val partitionsToDelete = if (parameters.containsKey(DataSourceWriteOptions.PARTITIONS_TO_DELETE.key())) {
|
val partitionsToDelete = if (parameters.containsKey(DataSourceWriteOptions.PARTITIONS_TO_DELETE.key())) {
|
||||||
val partitionColsToDelete = parameters.get(DataSourceWriteOptions.PARTITIONS_TO_DELETE.key()).get.split(",")
|
val partitionColsToDelete = parameters(DataSourceWriteOptions.PARTITIONS_TO_DELETE.key()).split(",")
|
||||||
java.util.Arrays.asList(partitionColsToDelete: _*)
|
java.util.Arrays.asList(partitionColsToDelete: _*)
|
||||||
} else {
|
} else {
|
||||||
genericRecords.map(gr => keyGenerator.getKey(gr).getPartitionPath).toJavaRDD().distinct().collect()
|
genericRecords.map(gr => keyGenerator.getKey(gr).getPartitionPath).toJavaRDD().distinct().collect()
|
||||||
@@ -317,7 +317,7 @@ object HoodieSparkSqlWriter {
|
|||||||
if (FSUtils.isTableExists(basePath.toString, fs)) {
|
if (FSUtils.isTableExists(basePath.toString, fs)) {
|
||||||
val tableMetaClient = HoodieTableMetaClient.builder.setConf(sparkContext.hadoopConfiguration).setBasePath(basePath.toString).build()
|
val tableMetaClient = HoodieTableMetaClient.builder.setConf(sparkContext.hadoopConfiguration).setBasePath(basePath.toString).build()
|
||||||
val tableSchemaResolver = new TableSchemaResolver(tableMetaClient)
|
val tableSchemaResolver = new TableSchemaResolver(tableMetaClient)
|
||||||
latestSchema = tableSchemaResolver.getLatestSchema(schema, false, null);
|
latestSchema = tableSchemaResolver.getLatestSchema(schema, false, null)
|
||||||
}
|
}
|
||||||
latestSchema
|
latestSchema
|
||||||
}
|
}
|
||||||
@@ -345,7 +345,7 @@ object HoodieSparkSqlWriter {
|
|||||||
val sparkContext = sqlContext.sparkContext
|
val sparkContext = sqlContext.sparkContext
|
||||||
val fs = basePath.getFileSystem(sparkContext.hadoopConfiguration)
|
val fs = basePath.getFileSystem(sparkContext.hadoopConfiguration)
|
||||||
tableExists = fs.exists(new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME))
|
tableExists = fs.exists(new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME))
|
||||||
var tableConfig = getHoodieTableConfig(sparkContext, path, hoodieTableConfigOpt)
|
val tableConfig = getHoodieTableConfig(sparkContext, path, hoodieTableConfigOpt)
|
||||||
validateTableConfig(sqlContext.sparkSession, optParams, tableConfig)
|
validateTableConfig(sqlContext.sparkSession, optParams, tableConfig)
|
||||||
|
|
||||||
val (parameters, hoodieConfig) = mergeParamsAndGetHoodieConfig(optParams, tableConfig)
|
val (parameters, hoodieConfig) = mergeParamsAndGetHoodieConfig(optParams, tableConfig)
|
||||||
@@ -364,7 +364,6 @@ object HoodieSparkSqlWriter {
|
|||||||
schema = HoodieAvroUtils.getNullSchema.toString
|
schema = HoodieAvroUtils.getNullSchema.toString
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Handle various save modes
|
// Handle various save modes
|
||||||
if (mode == SaveMode.Ignore && tableExists) {
|
if (mode == SaveMode.Ignore && tableExists) {
|
||||||
log.warn(s"hoodie table at $basePath already exists. Ignoring & not performing actual writes.")
|
log.warn(s"hoodie table at $basePath already exists. Ignoring & not performing actual writes.")
|
||||||
@@ -453,7 +452,7 @@ object HoodieSparkSqlWriter {
|
|||||||
// Sort modes are not yet supported when meta fields are disabled
|
// Sort modes are not yet supported when meta fields are disabled
|
||||||
new NonSortPartitionerWithRows()
|
new NonSortPartitionerWithRows()
|
||||||
}
|
}
|
||||||
val arePartitionRecordsSorted = bulkInsertPartitionerRows.arePartitionRecordsSorted();
|
val arePartitionRecordsSorted = bulkInsertPartitionerRows.arePartitionRecordsSorted()
|
||||||
parameters.updated(HoodieInternalConfig.BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED, arePartitionRecordsSorted.toString)
|
parameters.updated(HoodieInternalConfig.BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED, arePartitionRecordsSorted.toString)
|
||||||
val isGlobalIndex = if (populateMetaFields) {
|
val isGlobalIndex = if (populateMetaFields) {
|
||||||
SparkHoodieIndexFactory.isGlobalIndex(writeConfig)
|
SparkHoodieIndexFactory.isGlobalIndex(writeConfig)
|
||||||
|
|||||||
Reference in New Issue
Block a user