[HUDI-1504] Allow log files generated during restore/rollback to be synced as well
- TestHoodieBackedMetadata#testSync etc now run for MOR tables - HUDI-1502 is still pending and has issues for MOR/rollbacks - Also addressed bunch of code review comments.
This commit is contained in:
committed by
vinoth chandar
parent
1a0579ca7d
commit
31e674eb57
@@ -206,7 +206,7 @@ public class FSUtils {
|
||||
public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr) throws IOException {
|
||||
// If the basePathStr is a folder within the .hoodie directory then we are listing partitions within an
|
||||
// internal table.
|
||||
final boolean isMetadataTable = basePathStr.contains(HoodieTableMetaClient.METAFOLDER_NAME);
|
||||
final boolean isMetadataTable = HoodieTableMetadata.isMetadataTable(basePathStr);
|
||||
final Path basePath = new Path(basePathStr);
|
||||
final List<String> partitions = new ArrayList<>();
|
||||
processFiles(fs, basePathStr, (locatedFileStatus) -> {
|
||||
|
||||
@@ -270,10 +270,10 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
|
||||
}
|
||||
|
||||
HoodieTableMetaClient datasetMetaClient = new HoodieTableMetaClient(hadoopConf.get(), datasetBasePath);
|
||||
List<HoodieInstant> unsyncedInstants = findInstantsToSync(datasetMetaClient);
|
||||
List<HoodieInstant> unSyncedInstants = findInstantsToSync(datasetMetaClient);
|
||||
Schema schema = HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
|
||||
timelineRecordScanner =
|
||||
new HoodieMetadataMergedInstantRecordScanner(datasetMetaClient, unsyncedInstants, getSyncedInstantTime(), schema, MAX_MEMORY_SIZE_IN_BYTES, spillableMapDirectory, null);
|
||||
new HoodieMetadataMergedInstantRecordScanner(datasetMetaClient, unSyncedInstants, getSyncedInstantTime(), schema, MAX_MEMORY_SIZE_IN_BYTES, spillableMapDirectory, null);
|
||||
}
|
||||
|
||||
protected List<HoodieInstant> findInstantsToSync() {
|
||||
|
||||
@@ -171,7 +171,7 @@ public class HoodieMetadataPayload implements HoodieRecordPayload<HoodieMetadata
|
||||
* Returns the list of filenames deleted as part of this record.
|
||||
*/
|
||||
public List<String> getDeletions() {
|
||||
return filterFileInfoEntries(true).map(e -> e.getKey()).sorted().collect(Collectors.toList());
|
||||
return filterFileInfoEntries(true).map(Map.Entry::getKey).sorted().collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -238,13 +238,20 @@ public class HoodieTableMetadataUtil {
|
||||
Option<String> lastSyncTs) {
|
||||
|
||||
rollbackMetadata.getPartitionMetadata().values().forEach(pm -> {
|
||||
// Has this rollback produced new files?
|
||||
boolean hasAppendFiles = pm.getAppendFiles().values().stream().mapToLong(Long::longValue).sum() > 0;
|
||||
// If commit being rolled back has not been synced to metadata table yet then there is no need to update metadata
|
||||
if (lastSyncTs.isPresent() && HoodieTimeline.compareTimestamps(rollbackMetadata.getCommitsRollback().get(0), HoodieTimeline.GREATER_THAN, lastSyncTs.get())) {
|
||||
boolean shouldSkip = lastSyncTs.isPresent()
|
||||
&& HoodieTimeline.compareTimestamps(rollbackMetadata.getCommitsRollback().get(0), HoodieTimeline.GREATER_THAN, lastSyncTs.get());
|
||||
|
||||
if (!hasAppendFiles && shouldSkip) {
|
||||
LOG.info(String.format("Skipping syncing of rollbackMetadata at %s, given metadata table is already synced upto to %s",
|
||||
rollbackMetadata.getCommitsRollback().get(0), lastSyncTs.get()));
|
||||
return;
|
||||
}
|
||||
|
||||
final String partition = pm.getPartitionPath();
|
||||
if (!pm.getSuccessDeleteFiles().isEmpty()) {
|
||||
if (!pm.getSuccessDeleteFiles().isEmpty() && !shouldSkip) {
|
||||
if (!partitionToDeletedFiles.containsKey(partition)) {
|
||||
partitionToDeletedFiles.put(partition, new ArrayList<>());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user