1
0

- Fixing memory leak due to HoodieLogFileReader holding on to a logblock

- Removed inMemory HashMap usage in merge(..) code in LogScanner
This commit is contained in:
Nishith Agarwal
2018-03-13 22:56:29 -07:00
committed by vinoth chandar
parent d3df32fa03
commit 123da020e2
4 changed files with 27 additions and 41 deletions

View File

@@ -240,14 +240,12 @@ public class HoodieCompactedLogRecordScanner implements
/**
* Iterate over the GenericRecord in the block, read the hoodie key and partition path and merge
* with the application specific payload if the same key was found before Sufficient to just merge
* the log records since the base data is merged on previous compaction
* with the application specific payload if the same key was found before. Sufficient to just merge
* the log records since the base data is merged on previous compaction.
* Finally, merge this log block with the accumulated records
*/
private Map<String, HoodieRecord<? extends HoodieRecordPayload>> loadRecordsFromBlock(
private Map<String, HoodieRecord<? extends HoodieRecordPayload>> merge(
HoodieAvroDataBlock dataBlock) throws IOException {
// TODO (NA) - Instead of creating a new HashMap use the spillable map
Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock = Maps
.newHashMap();
// TODO (NA) - Implemnt getRecordItr() in HoodieAvroDataBlock and use that here
List<IndexedRecord> recs = dataBlock.getRecords();
totalLogRecords.addAndGet(recs.size());
@@ -256,19 +254,19 @@ public class HoodieCompactedLogRecordScanner implements
.toString();
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord =
SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN);
if (recordsFromLastBlock.containsKey(key)) {
if (records.containsKey(key)) {
// Merge and store the merged record
HoodieRecordPayload combinedValue = recordsFromLastBlock.get(key).getData()
HoodieRecordPayload combinedValue = records.get(key).getData()
.preCombine(hoodieRecord.getData());
recordsFromLastBlock
records
.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()),
combinedValue));
} else {
// Put the record as is
recordsFromLastBlock.put(key, hoodieRecord);
records.put(key, hoodieRecord);
}
});
return recordsFromLastBlock;
return records;
}
/**
@@ -277,11 +275,12 @@ public class HoodieCompactedLogRecordScanner implements
private void merge(Map<String, HoodieRecord<? extends HoodieRecordPayload>> records,
Deque<HoodieLogBlock> lastBlocks) throws IOException {
while (!lastBlocks.isEmpty()) {
log.info("Number of remaining logblocks to merge " + lastBlocks.size());
// poll the element at the bottom of the stack since that's the order it was inserted
HoodieLogBlock lastBlock = lastBlocks.pollLast();
switch (lastBlock.getBlockType()) {
case AVRO_DATA_BLOCK:
merge(records, loadRecordsFromBlock((HoodieAvroDataBlock) lastBlock));
merge((HoodieAvroDataBlock) lastBlock);
break;
case DELETE_BLOCK:
// TODO : If delete is the only block written and/or records are present in parquet file
@@ -295,25 +294,6 @@ public class HoodieCompactedLogRecordScanner implements
}
}
/**
* Merge the records read from a single data block with the accumulated records
*/
private void merge(Map<String, HoodieRecord<? extends HoodieRecordPayload>> records,
Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock) {
recordsFromLastBlock.forEach((key, hoodieRecord) -> {
if (records.containsKey(key)) {
// Merge and store the merged record
HoodieRecordPayload combinedValue = records.get(key).getData()
.preCombine(hoodieRecord.getData());
records.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()),
combinedValue));
} else {
// Put the record as is
records.put(key, hoodieRecord);
}
});
}
@Override
public Iterator<HoodieRecord<? extends HoodieRecordPayload>> iterator() {
return records.iterator();

View File

@@ -56,7 +56,6 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
private static final byte[] oldMagicBuffer = new byte[4];
private static final byte[] magicBuffer = new byte[6];
private final Schema readerSchema;
private HoodieLogBlock nextBlock = null;
private LogFormatVersion nextBlockVersion;
private boolean readBlockLazily;
private long reverseLogFilePosition;
@@ -271,8 +270,8 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
if (isEOF) {
return false;
}
this.nextBlock = readBlock();
return nextBlock != null;
// If not hasNext(), we either we reach EOF or throw an exception on invalid magic header
return true;
} catch (IOException e) {
throw new HoodieIOException("IOException when reading logfile " + logFile, e);
}
@@ -322,11 +321,12 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
@Override
public HoodieLogBlock next() {
if (nextBlock == null) {
// may be hasNext is not called
hasNext();
try {
// hasNext() must be called before next()
return readBlock();
} catch(IOException io) {
throw new HoodieIOException("IOException when reading logblock from log file " + logFile, io);
}
return nextBlock;
}
/**
@@ -378,7 +378,7 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
boolean hasNext = hasNext();
reverseLogFilePosition -= blockSize;
lastReverseLogFilePosition = reverseLogFilePosition;
return this.nextBlock;
return next();
}
/**

View File

@@ -24,6 +24,8 @@ import org.apache.hadoop.fs.FileSystem;
import java.io.IOException;
import java.util.List;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
@@ -34,6 +36,8 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
private final boolean readBlocksLazily;
private final boolean reverseLogReader;
private final static Logger log = LogManager.getLogger(HoodieLogFormatReader.class);
HoodieLogFormatReader(FileSystem fs, List<HoodieLogFile> logFiles,
Schema readerSchema, boolean readBlocksLazily, boolean reverseLogReader) throws IOException {
this.logFiles = logFiles;
@@ -77,6 +81,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
} catch (IOException io) {
throw new HoodieIOException("unable to initialize read with log file ", io);
}
log.info("Moving to the next reader for logfile " + currentReader.getLogFile());
return this.currentReader.hasNext();
}
return false;
@@ -84,8 +89,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
@Override
public HoodieLogBlock next() {
HoodieLogBlock block = currentReader.next();
return block;
return currentReader.next();
}
@Override

View File

@@ -374,6 +374,7 @@ public class HoodieLogFormatTest {
assertEquals("Both records lists should be the same. (ordering guaranteed)", copyOfRecords1,
dataBlockRead.getRecords());
reader.hasNext();
nextBlock = reader.next();
dataBlockRead = (HoodieAvroDataBlock) nextBlock;
assertEquals("Read records size should be equal to the written records size",
@@ -381,6 +382,7 @@ public class HoodieLogFormatTest {
assertEquals("Both records lists should be the same. (ordering guaranteed)", copyOfRecords2,
dataBlockRead.getRecords());
reader.hasNext();
nextBlock = reader.next();
dataBlockRead = (HoodieAvroDataBlock) nextBlock;
assertEquals("Read records size should be equal to the written records size",