1
0

[HUDI-2947] Fixing checkpoint fetch in detlastreamer (#4485)

* Fixing checkpoint fetch in detlastreamer

* Addressing comments
This commit is contained in:
Sivabalan Narayanan
2022-01-07 11:38:58 -05:00
committed by GitHub
parent b1df60672b
commit 2e561defe9
3 changed files with 72 additions and 42 deletions

View File

@@ -292,14 +292,23 @@ public class HoodieDeltaStreamerTestBase extends UtilitiesTestBase {
}
static void addCommitToTimeline(HoodieTableMetaClient metaCient, Map<String, String> extraMetadata) throws IOException {
addCommitToTimeline(metaCient, WriteOperationType.UPSERT, HoodieTimeline.COMMIT_ACTION, extraMetadata);
}
static void addReplaceCommitToTimeline(HoodieTableMetaClient metaCient, Map<String, String> extraMetadata) throws IOException {
addCommitToTimeline(metaCient, WriteOperationType.CLUSTER, HoodieTimeline.REPLACE_COMMIT_ACTION, extraMetadata);
}
static void addCommitToTimeline(HoodieTableMetaClient metaCient, WriteOperationType writeOperationType, String commitActiontype,
Map<String, String> extraMetadata) throws IOException {
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
commitMetadata.setOperationType(WriteOperationType.UPSERT);
commitMetadata.setOperationType(writeOperationType);
extraMetadata.forEach((k,v) -> commitMetadata.getExtraMetadata().put(k, v));
String commitTime = HoodieActiveTimeline.createNewInstantTime();
metaCient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, commitTime));
metaCient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, commitTime));
metaCient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.REQUESTED, commitActiontype, commitTime));
metaCient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.INFLIGHT, commitActiontype, commitTime));
metaCient.getActiveTimeline().saveAsComplete(
new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, commitTime),
new HoodieInstant(HoodieInstant.State.INFLIGHT, commitActiontype, commitTime),
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
}

View File

@@ -1758,14 +1758,20 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
extraMetadata.put(HoodieWriteConfig.DELTASTREAMER_CHECKPOINT_KEY, "abc");
addCommitToTimeline(metaClient, extraMetadata);
metaClient.reloadActiveTimeline();
assertEquals(testDeltaSync.getPreviousCheckpoint(metaClient.getActiveTimeline().getCommitsTimeline()).get(), "abc");
assertEquals(testDeltaSync.getLatestCommitMetadataWithValidCheckpointInfo(metaClient.getActiveTimeline()
.getCommitsTimeline()).get().getMetadata(CHECKPOINT_KEY), "abc");
addCommitToTimeline(metaClient, Collections.emptyMap());
metaClient.reloadActiveTimeline();
extraMetadata.put(HoodieWriteConfig.DELTASTREAMER_CHECKPOINT_KEY, "def");
addCommitToTimeline(metaClient, extraMetadata);
metaClient.reloadActiveTimeline();
assertEquals(testDeltaSync.getPreviousCheckpoint(metaClient.getActiveTimeline().getCommitsTimeline()).get(), "def");
assertEquals(testDeltaSync.getLatestCommitMetadataWithValidCheckpointInfo(metaClient.getActiveTimeline()
.getCommitsTimeline()).get().getMetadata(CHECKPOINT_KEY), "def");
// add a replace commit which does not have CEHCKPOINT_KEY. Deltastreamer should be able to go back and pick the right checkpoint.
addReplaceCommitToTimeline(metaClient, Collections.emptyMap());
metaClient.reloadActiveTimeline();
assertEquals(testDeltaSync.getLatestCommitMetadataWithValidCheckpointInfo(metaClient.getActiveTimeline()
.getCommitsTimeline()).get().getMetadata(CHECKPOINT_KEY), "def");
}
class TestDeltaSync extends DeltaSync {
@@ -1776,8 +1782,8 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
super(cfg, sparkSession, schemaProvider, props, jssc, fs, conf, onInitializingHoodieWriteClient);
}
protected Option<String> getPreviousCheckpoint(HoodieTimeline timeline) throws IOException {
return super.getPreviousCheckpoint(timeline);
protected Option<HoodieCommitMetadata> getLatestCommitMetadataWithValidCheckpointInfo(HoodieTimeline timeline) throws IOException {
return super.getLatestCommitMetadataWithValidCheckpointInfo(timeline);
}
}