1
0

Adding a config to control whether date partitioning can be assumed

- false by default
 - CAUTION: If you have an existing tables without partition metadata, you need to set this to "true"
This commit is contained in:
Vinoth Chandar
2017-03-26 17:40:20 -07:00
committed by vinoth chandar
parent f9fd16069d
commit dce35ff0d7
7 changed files with 32 additions and 12 deletions

View File

@@ -476,7 +476,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
+ lastCommitRetained);
Map<String, List<String>> latestFilesMap = jsc.parallelize(
FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath()))
FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning()))
.mapToPair((PairFunction<String, String, List<String>>) partitionPath -> {
// Scan all partitions files with this commit time
logger.info("Collecting latest files in partition path " + partitionPath);
@@ -650,7 +650,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
logger.info("Clean out all parquet files generated for commits: " + commits);
final LongAccumulator numFilesDeletedCounter = jsc.sc().longAccumulator();
List<HoodieRollbackStat> stats = jsc.parallelize(
FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath()))
FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning()))
.map((Function<String, HoodieRollbackStat>) partitionPath -> {
// Scan all partitions files with this commit time
logger.info("Cleaning path " + partitionPath);
@@ -739,7 +739,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
.getHoodieTable(new HoodieTableMetaClient(fs, config.getBasePath(), true), config);
List<String> partitionsToClean =
FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath());
FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning());
// shuffle to distribute cleaning work across partitions evenly
Collections.shuffle(partitionsToClean);
logger.info("Partitions to clean up : " + partitionsToClean + ", with policy " + config

View File

@@ -51,6 +51,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
private static final String DEFAULT_WRITE_STATUS_STORAGE_LEVEL = "MEMORY_AND_DISK_SER";
private static final String HOODIE_AUTO_COMMIT_PROP = "hoodie.auto.commit";
private static final String DEFAULT_HOODIE_AUTO_COMMIT = "true";
private static final String HOODIE_ASSUME_DATE_PARTITIONING_PROP = "hoodie.assume.date.partitioning";
private static final String DEFAULT_ASSUME_DATE_PARTITIONING = "false";
private HoodieWriteConfig(Properties props) {
@@ -76,6 +78,10 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
return Boolean.parseBoolean(props.getProperty(HOODIE_AUTO_COMMIT_PROP));
}
public Boolean shouldAssumeDatePartitioning() {
return Boolean.parseBoolean(props.getProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP));
}
public int getInsertShuffleParallelism() {
return Integer.parseInt(props.getProperty(INSERT_PARALLELISM));
}
@@ -337,6 +343,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
WRITE_STATUS_STORAGE_LEVEL, DEFAULT_WRITE_STATUS_STORAGE_LEVEL);
setDefaultOnCondition(props, !props.containsKey(HOODIE_AUTO_COMMIT_PROP),
HOODIE_AUTO_COMMIT_PROP, DEFAULT_HOODIE_AUTO_COMMIT);
setDefaultOnCondition(props, !props.containsKey(HOODIE_ASSUME_DATE_PARTITIONING_PROP),
HOODIE_ASSUME_DATE_PARTITIONING_PROP, DEFAULT_ASSUME_DATE_PARTITIONING);
setDefaultOnCondition(props, !isIndexConfigSet, HoodieIndexConfig.newBuilder().build());

View File

@@ -77,7 +77,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
String compactionCommit = startCompactionCommit(hoodieTable);
log.info("Compacting " + metaClient.getBasePath() + " with commit " + compactionCommit);
List<String> partitionPaths =
FSUtils.getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath());
FSUtils.getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(), config.shouldAssumeDatePartitioning());
log.info("Compaction looking for files to compact in " + partitionPaths + " partitions");
List<CompactionOperation> operations =