[HUDI-1591] Implement Spark's FileIndex for Hudi to support queries via Hudi DataSource using non-globbed table path and partition pruning (#2651)
This commit is contained in:
@@ -20,6 +20,7 @@ package org.apache.hudi.utilities.deltastreamer;
|
||||
|
||||
import org.apache.hudi.DataSourceUtils;
|
||||
import org.apache.hudi.HoodieSparkUtils;
|
||||
import org.apache.hudi.HoodieWriterUtils;
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.client.SparkRDDWriteClient;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
@@ -235,12 +236,15 @@ public class DeltaSync implements Serializable {
|
||||
}
|
||||
} else {
|
||||
this.commitTimelineOpt = Option.empty();
|
||||
String partitionColumns = HoodieWriterUtils.getPartitionColumns(keyGenerator);
|
||||
|
||||
HoodieTableMetaClient.withPropertyBuilder()
|
||||
.setTableType(cfg.tableType)
|
||||
.setTableName(cfg.targetTableName)
|
||||
.setArchiveLogFolder("archived")
|
||||
.setPayloadClassName(cfg.payloadClassName)
|
||||
.setBaseFileFormat(cfg.baseFileFormat)
|
||||
.setPartitionColumns(partitionColumns)
|
||||
.initTable(new Configuration(jssc.hadoopConfiguration()),
|
||||
cfg.targetBasePath);
|
||||
}
|
||||
@@ -326,12 +330,14 @@ public class DeltaSync implements Serializable {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
String partitionColumns = HoodieWriterUtils.getPartitionColumns(keyGenerator);
|
||||
HoodieTableMetaClient.withPropertyBuilder()
|
||||
.setTableType(cfg.tableType)
|
||||
.setTableName(cfg.targetTableName)
|
||||
.setArchiveLogFolder("archived")
|
||||
.setPayloadClassName(cfg.payloadClassName)
|
||||
.setBaseFileFormat(cfg.baseFileFormat)
|
||||
.setPartitionColumns(partitionColumns)
|
||||
.initTable(new Configuration(jssc.hadoopConfiguration()), cfg.targetBasePath);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user