[HUDI-3180] Include files from completed commits while bootstrapping metadata table (#4519)
This commit is contained in:
committed by
GitHub
parent
bc95571caa
commit
7a8b94c82d
@@ -746,9 +746,16 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
|
||||
HoodieData<HoodieRecord> partitionRecords = engineContext.parallelize(Arrays.asList(allPartitionRecord), 1);
|
||||
if (!partitionInfoList.isEmpty()) {
|
||||
HoodieData<HoodieRecord> fileListRecords = engineContext.parallelize(partitionInfoList, partitionInfoList.size()).map(partitionInfo -> {
|
||||
Map<String, Long> fileNameToSizeMap = partitionInfo.getFileNameToSizeMap();
|
||||
// filter for files that are part of the completed commits
|
||||
Map<String, Long> validFileNameToSizeMap = fileNameToSizeMap.entrySet().stream().filter(fileSizePair -> {
|
||||
String commitTime = FSUtils.getCommitTime(fileSizePair.getKey());
|
||||
return HoodieTimeline.compareTimestamps(commitTime, HoodieTimeline.LESSER_THAN_OR_EQUALS, createInstantTime);
|
||||
}).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
||||
|
||||
// Record which saves files within a partition
|
||||
return HoodieMetadataPayload.createPartitionFilesRecord(
|
||||
partitionInfo.getRelativePath().isEmpty() ? NON_PARTITIONED_NAME : partitionInfo.getRelativePath(), Option.of(partitionInfo.getFileNameToSizeMap()), Option.empty());
|
||||
partitionInfo.getRelativePath().isEmpty() ? NON_PARTITIONED_NAME : partitionInfo.getRelativePath(), Option.of(validFileNameToSizeMap), Option.empty());
|
||||
});
|
||||
partitionRecords = partitionRecords.union(fileListRecords);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user