1
0

[HUDI-1738] Emit deletes for flink MOR table streaming read (#2742)

Current we did a soft delete for DELETE row data when writes into hoodie
table. For streaming read of MOR table, the Flink reader detects the
delete records and still emit them if the record key semantics are still
kept.

This is useful and actually a must for streaming ETL pipeline
incremental computation.
This commit is contained in:
Danny Chan
2021-04-01 15:25:31 +08:00
committed by GitHub
parent fe16d0de7c
commit 9804662bc8
20 changed files with 557 additions and 158 deletions

View File

@@ -173,10 +173,10 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
deleteInstantFile(instant);
}
public void deletePending(HoodieInstant.State state, String action, String instantStr) {
public void deletePendingIfExists(HoodieInstant.State state, String action, String instantStr) {
HoodieInstant instant = new HoodieInstant(state, action, instantStr);
ValidationUtils.checkArgument(!instant.isCompleted());
deleteInstantFile(instant);
deleteInstantFileIfExists(instant);
}
public void deleteCompactionRequested(HoodieInstant instant) {
@@ -185,6 +185,25 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
deleteInstantFile(instant);
}
private void deleteInstantFileIfExists(HoodieInstant instant) {
LOG.info("Deleting instant " + instant);
Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), instant.getFileName());
try {
if (metaClient.getFs().exists(inFlightCommitFilePath)) {
boolean result = metaClient.getFs().delete(inFlightCommitFilePath, false);
if (result) {
LOG.info("Removed instant " + instant);
} else {
throw new HoodieIOException("Could not delete instant " + instant);
}
} else {
LOG.warn("The commit " + inFlightCommitFilePath + " to remove does not exist");
}
} catch (IOException e) {
throw new HoodieIOException("Could not remove inflight commit " + inFlightCommitFilePath, e);
}
}
private void deleteInstantFile(HoodieInstant instant) {
LOG.info("Deleting instant " + instant);
Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), instant.getFileName());