Introducing HoodieLogFormat V2 with versioning support
- HoodieLogFormat V2 has support for LogFormat evolution through versioning - LogVersion is associated with a LogBlock not a LogFile - Based on a version for a LogBlock, approporiate code path is executed - Implemented LazyReading of Hoodie Log Blocks with Memory / IO tradeoff - Implemented Reverse pointer to be able to traverse the log in reverse - Introduce new MAGIC for backwards compatibility with logs without versions
This commit is contained in:
committed by
vinoth chandar
parent
dfd1979c51
commit
5405a6287b
@@ -159,11 +159,14 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
// TODO (NA) - Perform a schema check of current input record with the last schema on log file
|
||||
// to make sure we don't append records with older (shorter) schema than already appended
|
||||
public void doAppend() {
|
||||
|
||||
int maxBlockSize = config.getLogFileDataBlockMaxSize(); int numberOfRecords = 0;
|
||||
Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
|
||||
metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, commitTime);
|
||||
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, commitTime);
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
|
||||
while (recordItr.hasNext()) {
|
||||
HoodieRecord record = recordItr.next();
|
||||
// update the new location of the record, so we know where to find it next
|
||||
@@ -178,7 +181,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
// Recompute averageRecordSize before writing a new block and update existing value with avg of new and old
|
||||
logger.info("AvgRecordSize => " + averageRecordSize);
|
||||
averageRecordSize = (averageRecordSize + SizeEstimator.estimate(record))/2;
|
||||
doAppend(metadata);
|
||||
doAppend(header);
|
||||
numberOfRecords = 0;
|
||||
}
|
||||
Optional<IndexedRecord> indexedRecord = getIndexedRecord(record);
|
||||
@@ -189,18 +192,18 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
}
|
||||
numberOfRecords++;
|
||||
}
|
||||
doAppend(metadata);
|
||||
doAppend(header);
|
||||
}
|
||||
|
||||
private void doAppend(Map<HoodieLogBlock.LogMetadataType, String> metadata) {
|
||||
private void doAppend(Map<HoodieLogBlock.HeaderMetadataType, String> header) {
|
||||
try {
|
||||
if (recordList.size() > 0) {
|
||||
writer = writer.appendBlock(new HoodieAvroDataBlock(recordList, schema, metadata));
|
||||
writer = writer.appendBlock(new HoodieAvroDataBlock(recordList, header));
|
||||
recordList.clear();
|
||||
}
|
||||
if (keysToDelete.size() > 0) {
|
||||
writer = writer.appendBlock(
|
||||
new HoodieDeleteBlock(keysToDelete.stream().toArray(String[]::new), metadata));
|
||||
new HoodieDeleteBlock(keysToDelete.stream().toArray(String[]::new), header));
|
||||
keysToDelete.clear();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
|
||||
@@ -18,6 +18,7 @@ package com.uber.hoodie.io;
|
||||
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.uber.hoodie.avro.model.HoodieArchivedMetaEntry;
|
||||
import com.uber.hoodie.avro.model.HoodieCleanMetadata;
|
||||
@@ -30,6 +31,7 @@ import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFormat;
|
||||
import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
|
||||
import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieArchivedTimeline;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||
import com.uber.hoodie.common.util.AvroUtils;
|
||||
@@ -39,6 +41,7 @@ import com.uber.hoodie.exception.HoodieException;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import com.uber.hoodie.table.HoodieTable;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.file.DataFileStream;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
@@ -47,6 +50,7 @@ import org.apache.log4j.Logger;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
@@ -190,7 +194,9 @@ public class HoodieCommitArchiveLog {
|
||||
for (HoodieInstant hoodieInstant : instants) {
|
||||
records.add(convertToAvroRecord(commitTimeline, hoodieInstant));
|
||||
}
|
||||
HoodieAvroDataBlock block = new HoodieAvroDataBlock(records, wrapperSchema);
|
||||
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, wrapperSchema.toString());
|
||||
HoodieAvroDataBlock block = new HoodieAvroDataBlock(records, header);
|
||||
this.writer = writer.appendBlock(block);
|
||||
} catch (Exception e) {
|
||||
throw new HoodieCommitException("Failed to archive commits", e);
|
||||
|
||||
@@ -154,7 +154,8 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
|
||||
|
||||
HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs,
|
||||
metaClient.getBasePath(),
|
||||
operation.getDeltaFilePaths(), readerSchema, maxInstantTime, config.getMaxMemorySizePerCompactionInBytes());
|
||||
operation.getDeltaFilePaths(), readerSchema, maxInstantTime, config.getMaxMemorySizePerCompactionInBytes(),
|
||||
config.getCompactionLazyBlockReadEnabled(), config.getCompactionReverseLogReadEnabled());
|
||||
if (!scanner.iterator().hasNext()) {
|
||||
return Lists.newArrayList();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user