1
0

[HUDI-1591] Implement Spark's FileIndex for Hudi to support queries via Hudi DataSource using non-globbed table path and partition pruning (#2651)

This commit is contained in:
pengzhiwei
2021-04-02 02:12:28 +08:00
committed by GitHub
parent 9804662bc8
commit 684622c7c9
22 changed files with 1074 additions and 82 deletions

View File

@@ -20,6 +20,7 @@ package org.apache.hudi.utilities.deltastreamer;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.HoodieSparkUtils;
import org.apache.hudi.HoodieWriterUtils;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.client.SparkRDDWriteClient;
import org.apache.hudi.client.WriteStatus;
@@ -235,12 +236,15 @@ public class DeltaSync implements Serializable {
}
} else {
this.commitTimelineOpt = Option.empty();
String partitionColumns = HoodieWriterUtils.getPartitionColumns(keyGenerator);
HoodieTableMetaClient.withPropertyBuilder()
.setTableType(cfg.tableType)
.setTableName(cfg.targetTableName)
.setArchiveLogFolder("archived")
.setPayloadClassName(cfg.payloadClassName)
.setBaseFileFormat(cfg.baseFileFormat)
.setPartitionColumns(partitionColumns)
.initTable(new Configuration(jssc.hadoopConfiguration()),
cfg.targetBasePath);
}
@@ -326,12 +330,14 @@ public class DeltaSync implements Serializable {
}
}
} else {
String partitionColumns = HoodieWriterUtils.getPartitionColumns(keyGenerator);
HoodieTableMetaClient.withPropertyBuilder()
.setTableType(cfg.tableType)
.setTableName(cfg.targetTableName)
.setArchiveLogFolder("archived")
.setPayloadClassName(cfg.payloadClassName)
.setBaseFileFormat(cfg.baseFileFormat)
.setPartitionColumns(partitionColumns)
.initTable(new Configuration(jssc.hadoopConfiguration()), cfg.targetBasePath);
}