1
0

[HUDI-3799] Fixing not deleting empty instants w/o archiving (#5261)

This commit is contained in:
Sivabalan Narayanan
2022-04-11 21:02:43 -07:00
committed by GitHub
parent 3d8fc78c66
commit f91e9e63e1
7 changed files with 88 additions and 22 deletions

View File

@@ -588,20 +588,17 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
List<IndexedRecord> records = new ArrayList<>(); List<IndexedRecord> records = new ArrayList<>();
for (HoodieInstant hoodieInstant : instants) { for (HoodieInstant hoodieInstant : instants) {
try { try {
if (table.getActiveTimeline().isEmpty(hoodieInstant)
&& (
hoodieInstant.getAction().equals(HoodieTimeline.CLEAN_ACTION)
|| (hoodieInstant.getAction().equals(HoodieTimeline.ROLLBACK_ACTION) && hoodieInstant.isCompleted())
)
) {
table.getActiveTimeline().deleteEmptyInstantIfExists(hoodieInstant);
} else {
deleteAnyLeftOverMarkers(context, hoodieInstant); deleteAnyLeftOverMarkers(context, hoodieInstant);
// in local FS and HDFS, there could be empty completed instants due to crash.
if (table.getActiveTimeline().isEmpty(hoodieInstant) && hoodieInstant.isCompleted()) {
// lets add an entry to the archival, even if not for the plan.
records.add(createAvroRecordFromEmptyInstant(hoodieInstant));
} else {
records.add(convertToAvroRecord(hoodieInstant)); records.add(convertToAvroRecord(hoodieInstant));
}
if (records.size() >= this.config.getCommitArchivalBatchSize()) { if (records.size() >= this.config.getCommitArchivalBatchSize()) {
writeToFile(wrapperSchema, records); writeToFile(wrapperSchema, records);
} }
}
} catch (Exception e) { } catch (Exception e) {
LOG.error("Failed to archive commits, .commit file: " + hoodieInstant.getFileName(), e); LOG.error("Failed to archive commits, .commit file: " + hoodieInstant.getFileName(), e);
if (this.config.isFailOnTimelineArchivingEnabled()) { if (this.config.isFailOnTimelineArchivingEnabled()) {
@@ -637,4 +634,8 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
throws IOException { throws IOException {
return MetadataConversionUtils.createMetaWrapper(hoodieInstant, metaClient); return MetadataConversionUtils.createMetaWrapper(hoodieInstant, metaClient);
} }
private IndexedRecord createAvroRecordFromEmptyInstant(HoodieInstant hoodieInstant) throws IOException {
return MetadataConversionUtils.createMetaWrapperForEmptyInstant(hoodieInstant);
}
} }

View File

@@ -125,6 +125,46 @@ public class MetadataConversionUtils {
return archivedMetaWrapper; return archivedMetaWrapper;
} }
public static HoodieArchivedMetaEntry createMetaWrapperForEmptyInstant(HoodieInstant hoodieInstant) throws IOException {
HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry();
archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp());
archivedMetaWrapper.setActionState(hoodieInstant.getState().name());
switch (hoodieInstant.getAction()) {
case HoodieTimeline.CLEAN_ACTION: {
archivedMetaWrapper.setActionType(ActionType.clean.name());
break;
}
case HoodieTimeline.COMMIT_ACTION: {
archivedMetaWrapper.setActionType(ActionType.commit.name());
break;
}
case HoodieTimeline.DELTA_COMMIT_ACTION: {
archivedMetaWrapper.setActionType(ActionType.deltacommit.name());
break;
}
case HoodieTimeline.REPLACE_COMMIT_ACTION: {
archivedMetaWrapper.setActionType(ActionType.replacecommit.name());
break;
}
case HoodieTimeline.ROLLBACK_ACTION: {
archivedMetaWrapper.setActionType(ActionType.rollback.name());
break;
}
case HoodieTimeline.SAVEPOINT_ACTION: {
archivedMetaWrapper.setActionType(ActionType.savepoint.name());
break;
}
case HoodieTimeline.COMPACTION_ACTION: {
archivedMetaWrapper.setActionType(ActionType.compaction.name());
break;
}
default: {
throw new UnsupportedOperationException("Action not fully supported yet");
}
}
return archivedMetaWrapper;
}
public static Option<HoodieCommitMetadata> getInflightReplaceMetadata(HoodieTableMetaClient metaClient, HoodieInstant instant) throws IOException { public static Option<HoodieCommitMetadata> getInflightReplaceMetadata(HoodieTableMetaClient metaClient, HoodieInstant instant) throws IOException {
Option<byte[]> inflightContent = metaClient.getActiveTimeline().getInstantDetails(instant); Option<byte[]> inflightContent = metaClient.getActiveTimeline().getInstantDetails(instant);
if (!inflightContent.isPresent() || inflightContent.get().length == 0) { if (!inflightContent.isPresent() || inflightContent.get().length == 0) {

View File

@@ -889,12 +889,19 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
int startInstant = 1; int startInstant = 1;
List<HoodieInstant> expectedArchivedInstants = new ArrayList<>();
for (int i = 0; i < maxInstantsToKeep + 1; i++, startInstant++) { for (int i = 0; i < maxInstantsToKeep + 1; i++, startInstant++) {
createCleanMetadata(startInstant + "", false, isEmpty || i % 2 == 0); createCleanMetadata(startInstant + "", false, false, isEmpty || i % 2 == 0);
expectedArchivedInstants.add(new HoodieInstant(State.REQUESTED, HoodieTimeline.CLEAN_ACTION, startInstant + ""));
expectedArchivedInstants.add(new HoodieInstant(State.INFLIGHT, HoodieTimeline.CLEAN_ACTION, startInstant + ""));
expectedArchivedInstants.add(new HoodieInstant(State.COMPLETED, HoodieTimeline.CLEAN_ACTION, startInstant + ""));
} }
for (int i = 0; i < maxInstantsToKeep + 1; i++, startInstant += 2) { for (int i = 0; i < maxInstantsToKeep + 1; i++, startInstant += 2) {
createCommitAndRollbackFile(startInstant + 1 + "", startInstant + "", false, isEmpty || i % 2 == 0); createCommitAndRollbackFile(startInstant + 1 + "", startInstant + "", false, isEmpty || i % 2 == 0);
expectedArchivedInstants.add(new HoodieInstant(State.REQUESTED, HoodieTimeline.ROLLBACK_ACTION, startInstant + ""));
expectedArchivedInstants.add(new HoodieInstant(State.INFLIGHT, HoodieTimeline.ROLLBACK_ACTION, startInstant + ""));
expectedArchivedInstants.add(new HoodieInstant(State.COMPLETED, HoodieTimeline.ROLLBACK_ACTION, startInstant + ""));
} }
if (enableMetadataTable) { if (enableMetadataTable) {
@@ -916,6 +923,14 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
assertTrue(actionInstantMap.containsKey("rollback"), "Rollback Action key must be preset"); assertTrue(actionInstantMap.containsKey("rollback"), "Rollback Action key must be preset");
assertEquals(minInstantsToKeep, actionInstantMap.get("rollback").size(), "Should have min instant"); assertEquals(minInstantsToKeep, actionInstantMap.get("rollback").size(), "Should have min instant");
// verify all expected instants are part of archived timeline
metaClient.getArchivedTimeline().loadCompletedInstantDetailsInMemory();
HoodieInstant firstInstant = metaClient.reloadActiveTimeline().firstInstant().get();
expectedArchivedInstants = expectedArchivedInstants.stream()
.filter(entry -> HoodieTimeline.compareTimestamps(entry.getTimestamp(), HoodieTimeline.LESSER_THAN, firstInstant.getTimestamp()
)).collect(Collectors.toList());
expectedArchivedInstants.forEach(entry -> assertTrue(metaClient.getArchivedTimeline().containsInstant(entry)));
} }
@ParameterizedTest @ParameterizedTest
@@ -1271,7 +1286,8 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
private List<HoodieInstant> getArchivedInstants(HoodieInstant instant) { private List<HoodieInstant> getArchivedInstants(HoodieInstant instant) {
List<HoodieInstant> instants = new ArrayList<>(); List<HoodieInstant> instants = new ArrayList<>();
if (instant.getAction() == HoodieTimeline.COMMIT_ACTION || instant.getAction() == HoodieTimeline.DELTA_COMMIT_ACTION || instant.getAction() == HoodieTimeline.CLEAN_ACTION) { if (instant.getAction().equals(HoodieTimeline.COMMIT_ACTION) || instant.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION)
|| instant.getAction().equals(HoodieTimeline.CLEAN_ACTION) || instant.getAction().equals(HoodieTimeline.ROLLBACK_ACTION)) {
instants.add(new HoodieInstant(State.REQUESTED, instant.getAction(), instant.getTimestamp())); instants.add(new HoodieInstant(State.REQUESTED, instant.getAction(), instant.getTimestamp()));
} }
instants.add(new HoodieInstant(State.INFLIGHT, instant.getAction(), instant.getTimestamp())); instants.add(new HoodieInstant(State.INFLIGHT, instant.getAction(), instant.getTimestamp()));

View File

@@ -744,7 +744,7 @@ public class TestCleaner extends HoodieClientTestBase {
for (int i = 0; i < cleanCount; i++, startInstant++) { for (int i = 0; i < cleanCount; i++, startInstant++) {
String commitTime = makeNewCommitTime(startInstant, "%09d"); String commitTime = makeNewCommitTime(startInstant, "%09d");
createCleanMetadata(commitTime + "", false, true); createEmptyCleanMetadata(commitTime + "", false);
} }
int instantClean = startInstant; int instantClean = startInstant;

View File

@@ -696,10 +696,14 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
} }
public HoodieInstant createCleanMetadata(String instantTime, boolean inflightOnly) throws IOException { public HoodieInstant createCleanMetadata(String instantTime, boolean inflightOnly) throws IOException {
return createCleanMetadata(instantTime, inflightOnly, false); return createCleanMetadata(instantTime, inflightOnly, false, false);
} }
public HoodieInstant createCleanMetadata(String instantTime, boolean inflightOnly, boolean isEmpty) throws IOException { public HoodieInstant createEmptyCleanMetadata(String instantTime, boolean inflightOnly) throws IOException {
return createCleanMetadata(instantTime, inflightOnly, true, true);
}
public HoodieInstant createCleanMetadata(String instantTime, boolean inflightOnly, boolean isEmptyForAll, boolean isEmptyCompleted) throws IOException {
HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant("", "", ""), "", new HashMap<>(), HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant("", "", ""), "", new HashMap<>(),
CleanPlanV2MigrationHandler.VERSION, new HashMap<>(), new ArrayList<>()); CleanPlanV2MigrationHandler.VERSION, new HashMap<>(), new ArrayList<>());
if (inflightOnly) { if (inflightOnly) {
@@ -713,7 +717,7 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
Collections.emptyList(), Collections.emptyList(),
instantTime); instantTime);
HoodieCleanMetadata cleanMetadata = convertCleanMetadata(instantTime, Option.of(0L), Collections.singletonList(cleanStats)); HoodieCleanMetadata cleanMetadata = convertCleanMetadata(instantTime, Option.of(0L), Collections.singletonList(cleanStats));
HoodieTestTable.of(metaClient).addClean(instantTime, cleanerPlan, cleanMetadata, isEmpty); HoodieTestTable.of(metaClient).addClean(instantTime, cleanerPlan, cleanMetadata, isEmptyForAll, isEmptyCompleted);
} }
return new HoodieInstant(inflightOnly, "clean", instantTime); return new HoodieInstant(inflightOnly, "clean", instantTime);
} }

View File

@@ -248,6 +248,10 @@ public class FileCreateUtils {
createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_ROLLBACK_EXTENSION, content); createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_ROLLBACK_EXTENSION, content);
} }
public static void createRequestedRollbackFile(String basePath, String instantTime) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_ROLLBACK_EXTENSION);
}
public static void createInflightRollbackFile(String basePath, String instantTime) throws IOException { public static void createInflightRollbackFile(String basePath, String instantTime) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION); createMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION);
} }

View File

@@ -282,13 +282,13 @@ public class HoodieTestTable {
} }
public HoodieTestTable addClean(String instantTime, HoodieCleanerPlan cleanerPlan, HoodieCleanMetadata metadata) throws IOException { public HoodieTestTable addClean(String instantTime, HoodieCleanerPlan cleanerPlan, HoodieCleanMetadata metadata) throws IOException {
return addClean(instantTime, cleanerPlan, metadata, false); return addClean(instantTime, cleanerPlan, metadata, false, false);
} }
public HoodieTestTable addClean(String instantTime, HoodieCleanerPlan cleanerPlan, HoodieCleanMetadata metadata, boolean isEmpty) throws IOException { public HoodieTestTable addClean(String instantTime, HoodieCleanerPlan cleanerPlan, HoodieCleanMetadata metadata, boolean isEmptyForAll, boolean isEmptyCompleted) throws IOException {
createRequestedCleanFile(basePath, instantTime, cleanerPlan, isEmpty); createRequestedCleanFile(basePath, instantTime, cleanerPlan, isEmptyForAll);
createInflightCleanFile(basePath, instantTime, cleanerPlan, isEmpty); createInflightCleanFile(basePath, instantTime, cleanerPlan, isEmptyForAll);
createCleanFile(basePath, instantTime, metadata, isEmpty); createCleanFile(basePath, instantTime, metadata, isEmptyCompleted);
currentInstantTime = instantTime; currentInstantTime = instantTime;
return this; return this;
} }
@@ -335,6 +335,7 @@ public class HoodieTestTable {
} }
public HoodieTestTable addRollback(String instantTime, HoodieRollbackMetadata rollbackMetadata, boolean isEmpty) throws IOException { public HoodieTestTable addRollback(String instantTime, HoodieRollbackMetadata rollbackMetadata, boolean isEmpty) throws IOException {
createRequestedRollbackFile(basePath, instantTime);
createInflightRollbackFile(basePath, instantTime); createInflightRollbackFile(basePath, instantTime);
createRollbackFile(basePath, instantTime, rollbackMetadata, isEmpty); createRollbackFile(basePath, instantTime, rollbackMetadata, isEmpty);
currentInstantTime = instantTime; currentInstantTime = instantTime;