1
0

Introducing HoodieLogFormat V2 with versioning support

- HoodieLogFormat V2 has support for LogFormat evolution through versioning
			- LogVersion is associated with a LogBlock not a LogFile
			- Based on a version for a LogBlock, approporiate code path is executed
		- Implemented LazyReading of Hoodie Log Blocks with Memory / IO tradeoff
		- Implemented Reverse pointer to be able to traverse the log in reverse
		- Introduce new MAGIC for backwards compatibility with logs without versions
This commit is contained in:
Nishith Agarwal
2018-02-15 11:01:25 -08:00
committed by vinoth chandar
parent dfd1979c51
commit 5405a6287b
32 changed files with 2066 additions and 677 deletions

View File

@@ -72,6 +72,11 @@ public class HoodieRealtimeRecordReader implements RecordReader<Void, ArrayWrita
public static final String COMPACTION_MEMORY_FRACTION_PROP = "compaction.memory.fraction";
public static final String DEFAULT_COMPACTION_MEMORY_FRACTION = "0.75";
// used to choose a trade off between IO vs Memory when performing compaction process
// Depending on outputfile size and memory provided, choose true to avoid OOM for large file size + small memory
public static final String COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP = "compaction.lazy.block.read.enabled";
public static final String DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED = "true";
public static final Log LOG = LogFactory.getLog(HoodieRealtimeRecordReader.class);
private final HashMap<String, ArrayWritable> deltaRecordMap;
@@ -132,7 +137,8 @@ public class HoodieRealtimeRecordReader implements RecordReader<Void, ArrayWrita
split.getDeltaFilePaths(),
readerSchema, split.getMaxCommitTime(),
(long) Math.ceil(Double.valueOf(jobConf.get(COMPACTION_MEMORY_FRACTION_PROP, DEFAULT_COMPACTION_MEMORY_FRACTION))
*jobConf.getMemoryForMapTask()));
*jobConf.getMemoryForMapTask()),
Boolean.valueOf(jobConf.get(COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP, DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED)), false);
// NOTE: HoodieCompactedLogRecordScanner will not return records for an in-flight commit
// but can return records for completed commits > the commit we are trying to read (if using readCommit() API)
for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : compactedLogRecordScanner) {
@@ -140,6 +146,7 @@ public class HoodieRealtimeRecordReader implements RecordReader<Void, ArrayWrita
.get();
String key = hoodieRecord.getRecordKey();
// we assume, a later safe record in the log, is newer than what we have in the map & replace it.
// TODO : handle deletes here
ArrayWritable aWritable = (ArrayWritable) avroToArrayWritable(rec, writerSchema);
deltaRecordMap.put(key, aWritable);
if (LOG.isDebugEnabled()) {
@@ -302,6 +309,7 @@ public class HoodieRealtimeRecordReader implements RecordReader<Void, ArrayWrita
arrayWritableToString(deltaRecordMap.get(key))));
}
if (deltaRecordMap.containsKey(key)) {
// TODO(NA): Invoke preCombine here by converting arrayWritable to Avro ?
Writable[] replaceValue = deltaRecordMap.get(key).get();
Writable[] originalValue = arrayWritable.get();
System.arraycopy(replaceValue, 0, originalValue, 0, originalValue.length);

View File

@@ -91,9 +91,10 @@ public class HoodieRealtimeRecordReaderTest {
records.add(SchemaTestUtil.generateAvroRecordFromJson(schema, i, newCommit, "fileid0"));
}
Schema writeSchema = records.get(0).getSchema();
Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, newCommit);
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, writeSchema, metadata);
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, newCommit);
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, writeSchema.toString());
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);
writer = writer.appendBlock(dataBlock);
long size = writer.getCurrentSize();
return writer;