1
0

[HUDI-2752] The MOR DELETE block breaks the event time sequence of CDC (#4880)

This commit is contained in:
Danny Chan
2022-04-01 20:46:51 +08:00
committed by GitHub
parent 98b4e9796e
commit 6df14f15a3
18 changed files with 356 additions and 71 deletions

View File

@@ -23,10 +23,10 @@ import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.BaseFile;
import org.apache.hudi.common.model.DeleteRecord;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
import org.apache.hudi.common.model.HoodieDeltaWriteStat;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieOperation;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
@@ -92,7 +92,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
// Buffer for holding records in memory before they are flushed to disk
private final List<IndexedRecord> recordList = new ArrayList<>();
// Buffer for holding records (to be deleted) in memory before they are flushed to disk
private final List<HoodieKey> keysToDelete = new ArrayList<>();
private final List<DeleteRecord> recordsToDelete = new ArrayList<>();
// Incoming records to be written to logs.
protected Iterator<HoodieRecord<T>> recordItr;
// Writer to log into the file group's latest slice.
@@ -402,15 +402,15 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
blocks.add(getBlock(config, pickLogDataBlockFormat(), recordList, header, keyField));
}
if (keysToDelete.size() > 0) {
blocks.add(new HoodieDeleteBlock(keysToDelete.toArray(new HoodieKey[keysToDelete.size()]), header));
if (recordsToDelete.size() > 0) {
blocks.add(new HoodieDeleteBlock(recordsToDelete.toArray(new DeleteRecord[0]), header));
}
if (blocks.size() > 0) {
AppendResult appendResult = writer.appendBlocks(blocks);
processAppendResult(appendResult, recordList);
recordList.clear();
keysToDelete.clear();
recordsToDelete.clear();
}
} catch (Exception e) {
throw new HoodieAppendException("Failed while appending records to " + writer.getLogFile().getPath(), e);
@@ -472,7 +472,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
}
private Writer createLogWriter(Option<FileSlice> fileSlice, String baseCommitTime)
throws IOException, InterruptedException {
throws IOException {
Option<HoodieLogFile> latestLogFile = fileSlice.get().getLatestLogFile();
return HoodieLogFormat.newWriterBuilder()
@@ -507,14 +507,16 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
record.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
record.seal();
}
// fetch the ordering val first in case the record was deflated.
final Comparable<?> orderingVal = record.getData().getOrderingValue();
Option<IndexedRecord> indexedRecord = getIndexedRecord(record);
if (indexedRecord.isPresent()) {
// Skip the Ignore Record.
// Skip the ignored record.
if (!indexedRecord.get().equals(IGNORE_RECORD)) {
recordList.add(indexedRecord.get());
}
} else {
keysToDelete.add(record.getKey());
recordsToDelete.add(DeleteRecord.create(record.getKey(), orderingVal));
}
numberOfRecords++;
}

View File

@@ -31,12 +31,12 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.ConsistencyGuardConfig;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.DeleteRecord;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieCleaningPolicy;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieRecord;
@@ -656,7 +656,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
// Archival of data table has a dependency on compaction(base files) in metadata table.
// It is assumed that as of time Tx of base instant (/compaction time) in metadata table,
// all commits in data table is in sync with metadata table. So, we always start with log file for any fileGroup.
final HoodieDeleteBlock block = new HoodieDeleteBlock(new HoodieKey[0], blockHeader);
final HoodieDeleteBlock block = new HoodieDeleteBlock(new DeleteRecord[0], blockHeader);
LOG.info(String.format("Creating %d file groups for partition %s with base fileId %s at instant time %s",
fileGroupCount, metadataPartition.getPartitionPath(), metadataPartition.getFileIdPrefix(), instantTime));