1
0

Adding metrics for MOR and COW

This commit is contained in:
Nishith Agarwal
2018-03-25 11:12:41 -07:00
committed by vinoth chandar
parent c66004d79a
commit 04655e9e85
19 changed files with 616 additions and 121 deletions

View File

@@ -58,7 +58,7 @@
"type":["null","long"]
},
{
"name":"totalRecordsToBeUpdate",
"name":"totalUpdatedRecordsCompacted",
"type":["null","long"]
}
]

View File

@@ -26,7 +26,7 @@
"type":["null","long"]
},
{
"name":"totalRecordsToBeUpdate",
"name":"totalUpdatedRecordsCompacted",
"type":["null","long"]
},
{

View File

@@ -208,6 +208,92 @@ public class HoodieCommitMetadata implements Serializable {
return totalWriteErrors;
}
public long getTotalRecordsDeleted() {
long totalDeletes = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalDeletes += stat.getNumDeletes();
}
}
return totalDeletes;
}
public Long getTotalLogRecordsCompacted() {
Long totalLogRecords = 0L;
for (Map.Entry<String, List<HoodieWriteStat>> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
totalLogRecords += writeStat.getTotalLogRecords();
}
}
return totalLogRecords;
}
public Long getTotalLogFilesCompacted() {
Long totalLogFiles = 0L;
for (Map.Entry<String, List<HoodieWriteStat>> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
totalLogFiles += writeStat.getTotalLogFilesCompacted();
}
}
return totalLogFiles;
}
public Long getTotalCompactedRecordsUpdated() {
Long totalUpdateRecords = 0L;
for (Map.Entry<String, List<HoodieWriteStat>> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
totalUpdateRecords += writeStat.getTotalUpdatedRecordsCompacted();
}
}
return totalUpdateRecords;
}
public Long getTotalLogFilesSize() {
Long totalLogFilesSize = 0L;
for (Map.Entry<String, List<HoodieWriteStat>> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
totalLogFilesSize += writeStat.getTotalLogSizeCompacted();
}
}
return totalLogFilesSize;
}
public Long getTotalScanTime() {
Long totalScanTime = 0L;
for (Map.Entry<String, List<HoodieWriteStat>> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
if (writeStat.getRuntimeStats() != null) {
totalScanTime += writeStat.getRuntimeStats().getTotalScanTime();
}
}
}
return totalScanTime;
}
public Long getTotalCreateTime() {
Long totalCreateTime = 0L;
for (Map.Entry<String, List<HoodieWriteStat>> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
if (writeStat.getRuntimeStats() != null) {
totalCreateTime += writeStat.getRuntimeStats().getTotalCreateTime();
}
}
}
return totalCreateTime;
}
public Long getTotalUpsertTime() {
Long totalUpsertTime = 0L;
for (Map.Entry<String, List<HoodieWriteStat>> entry : partitionToWriteStats.entrySet()) {
for (HoodieWriteStat writeStat : entry.getValue()) {
if (writeStat.getRuntimeStats() != null) {
totalUpsertTime += writeStat.getRuntimeStats().getTotalUpsertTime();
}
}
}
return totalUpsertTime;
}
@Override
public boolean equals(Object o) {
if (this == o) {
@@ -236,7 +322,7 @@ public class HoodieCommitMetadata implements Serializable {
public static HoodieCommitMetadata fromBytes(byte[] bytes) throws IOException {
return fromJsonString(new String(bytes, Charset.forName("utf-8")));
}
private static ObjectMapper getObjectMapper() {
ObjectMapper mapper = new ObjectMapper();
mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);

View File

@@ -16,6 +16,7 @@
package com.uber.hoodie.common.model;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import java.io.Serializable;
import javax.annotation.Nullable;
@@ -90,19 +91,46 @@ public class HoodieWriteStat implements Serializable {
* Total number of log records that were compacted by a compaction operation
*/
@Nullable
private Long totalLogRecords;
private long totalLogRecords;
/**
* Total number of log files that were compacted by a compaction operation
* Total number of log files compacted for a file slice with this base fileid
*/
@Nullable
private Long totalLogFiles;
private long totalLogFilesCompacted;
/**
* Total size of all log files for a file slice with this base fileid
*/
@Nullable
private long totalLogSizeCompacted;
/**
* Total number of records updated by a compaction operation
*/
@Nullable
private Long totalRecordsToBeUpdate;
private long totalUpdatedRecordsCompacted;
/**
* Total number of log blocks seen in a compaction operation
*/
@Nullable
private long totalLogBlocks;
/**
* Total number of corrupt blocks seen in a compaction operation
*/
@Nullable
private long totalCorruptLogBlock;
/**
* Total number of rollback blocks seen in a compaction operation
*/
private long totalRollbackBlocks;
@Nullable
@JsonIgnore
private RuntimeStats runtimeStats;
public HoodieWriteStat() {
// called by jackson json lib
@@ -180,28 +208,28 @@ public class HoodieWriteStat implements Serializable {
this.partitionPath = partitionPath;
}
public Long getTotalLogRecords() {
public long getTotalLogRecords() {
return totalLogRecords;
}
public void setTotalLogRecords(Long totalLogRecords) {
public void setTotalLogRecords(long totalLogRecords) {
this.totalLogRecords = totalLogRecords;
}
public Long getTotalLogFiles() {
return totalLogFiles;
public long getTotalLogFilesCompacted() {
return totalLogFilesCompacted;
}
public void setTotalLogFiles(Long totalLogFiles) {
this.totalLogFiles = totalLogFiles;
public void setTotalLogFilesCompacted(long totalLogFilesCompacted) {
this.totalLogFilesCompacted = totalLogFilesCompacted;
}
public Long getTotalRecordsToBeUpdate() {
return totalRecordsToBeUpdate;
public long getTotalUpdatedRecordsCompacted() {
return totalUpdatedRecordsCompacted;
}
public void setTotalRecordsToBeUpdate(Long totalRecordsToBeUpdate) {
this.totalRecordsToBeUpdate = totalRecordsToBeUpdate;
public void setTotalUpdatedRecordsCompacted(long totalUpdatedRecordsCompacted) {
this.totalUpdatedRecordsCompacted = totalUpdatedRecordsCompacted;
}
public void setTempPath(String tempPath) {
@@ -212,6 +240,47 @@ public class HoodieWriteStat implements Serializable {
return this.tempPath;
}
public long getTotalLogSizeCompacted() {
return totalLogSizeCompacted;
}
public void setTotalLogSizeCompacted(long totalLogSizeCompacted) {
this.totalLogSizeCompacted = totalLogSizeCompacted;
}
public long getTotalLogBlocks() {
return totalLogBlocks;
}
public void setTotalLogBlocks(long totalLogBlocks) {
this.totalLogBlocks = totalLogBlocks;
}
public long getTotalCorruptLogBlock() {
return totalCorruptLogBlock;
}
public void setTotalCorruptLogBlock(long totalCorruptLogBlock) {
this.totalCorruptLogBlock = totalCorruptLogBlock;
}
public long getTotalRollbackBlocks() {
return totalRollbackBlocks;
}
public void setTotalRollbackBlocks(Long totalRollbackBlocks) {
this.totalRollbackBlocks = totalRollbackBlocks;
}
@Nullable
public RuntimeStats getRuntimeStats() {
return runtimeStats;
}
public void setRuntimeStats(@Nullable RuntimeStats runtimeStats) {
this.runtimeStats = runtimeStats;
}
/**
* Set path and tempPath relative to the given basePath.
*/
@@ -224,17 +293,25 @@ public class HoodieWriteStat implements Serializable {
@Override
public String toString() {
return new StringBuilder()
.append("HoodieWriteStat {")
.append("path=" + path)
.append(", tempPath=" + tempPath)
.append(", prevCommit='" + prevCommit + '\'')
.append(", numWrites=" + numWrites)
.append(", numDeletes=" + numDeletes)
.append(", numUpdateWrites=" + numUpdateWrites)
.append(", numWriteBytes=" + totalWriteBytes)
.append('}')
.toString();
return "HoodieWriteStat{"
+ "fileId='" + fileId + '\''
+ ", path='" + path + '\''
+ ", prevCommit='" + prevCommit + '\''
+ ", numWrites=" + numWrites
+ ", numDeletes=" + numDeletes
+ ", numUpdateWrites=" + numUpdateWrites
+ ", totalWriteBytes=" + totalWriteBytes
+ ", totalWriteErrors=" + totalWriteErrors
+ ", tempPath='" + tempPath + '\''
+ ", partitionPath='" + partitionPath
+ '\'' + ", totalLogRecords=" + totalLogRecords
+ ", totalLogFilesCompacted=" + totalLogFilesCompacted
+ ", totalLogSizeCompacted=" + totalLogSizeCompacted
+ ", totalUpdatedRecordsCompacted=" + totalUpdatedRecordsCompacted
+ ", totalLogBlocks=" + totalLogBlocks
+ ", totalCorruptLogBlock=" + totalCorruptLogBlock
+ ", totalRollbackBlocks=" + totalRollbackBlocks
+ '}';
}
@Override
@@ -260,4 +337,51 @@ public class HoodieWriteStat implements Serializable {
result = 31 * result + prevCommit.hashCode();
return result;
}
public static class RuntimeStats implements Serializable {
/**
* Total time taken to read and merge logblocks in a log file
*/
@Nullable
private long totalScanTime;
/**
* Total time taken by a Hoodie Merge for an existing file
*/
@Nullable
private long totalUpsertTime;
/**
* Total time taken by a Hoodie Insert to a file
*/
@Nullable
private long totalCreateTime;
@Nullable
public long getTotalScanTime() {
return totalScanTime;
}
public void setTotalScanTime(@Nullable long totalScanTime) {
this.totalScanTime = totalScanTime;
}
@Nullable
public long getTotalUpsertTime() {
return totalUpsertTime;
}
public void setTotalUpsertTime(@Nullable long totalUpsertTime) {
this.totalUpsertTime = totalUpsertTime;
}
@Nullable
public long getTotalCreateTime() {
return totalCreateTime;
}
public void setTotalCreateTime(@Nullable long totalCreateTime) {
this.totalCreateTime = totalCreateTime;
}
}
}

View File

@@ -29,6 +29,7 @@ import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
import com.uber.hoodie.common.table.log.block.HoodieCommandBlock;
import com.uber.hoodie.common.table.log.block.HoodieDeleteBlock;
import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
import com.uber.hoodie.common.util.HoodieTimer;
import com.uber.hoodie.common.util.SpillableMapUtils;
import com.uber.hoodie.common.util.collection.ExternalSpillableMap;
import com.uber.hoodie.common.util.collection.converter.HoodieRecordConverter;
@@ -73,8 +74,14 @@ public class HoodieCompactedLogRecordScanner implements
private final Schema readerSchema;
// Total log files read - for metrics
private AtomicLong totalLogFiles = new AtomicLong(0);
// Total log blocks read - for metrics
private AtomicLong totalLogBlocks = new AtomicLong(0);
// Total log records read - for metrics
private AtomicLong totalLogRecords = new AtomicLong(0);
// Total number of rollbacks written across all log files
private AtomicLong totalRollbacks = new AtomicLong(0);
// Total number of corrupt blocks written across all log files
private AtomicLong totalCorruptBlocks = new AtomicLong(0);
// Total final list of compacted/merged records
private long totalRecordsToUpdate;
// Latest valid instant time
@@ -84,6 +91,10 @@ public class HoodieCompactedLogRecordScanner implements
private String payloadClassFQN;
// Store the last instant log blocks (needed to implement rollback)
private Deque<HoodieLogBlock> currentInstantLogBlocks = new ArrayDeque<>();
// Stores the total time taken to perform reading and merging of log blocks
private long totalTimeTakenToReadAndMergeBlocks = 0L;
// A timer for calculating elapsed time in millis
public HoodieTimer timer = new HoodieTimer();
public HoodieCompactedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths,
Schema readerSchema, String latestInstantTime, Long maxMemorySizeInBytes,
@@ -93,6 +104,8 @@ public class HoodieCompactedLogRecordScanner implements
this.hoodieTableMetaClient = new HoodieTableMetaClient(fs.getConf(), basePath);
// load class from the payload fully qualified class name
this.payloadClassFQN = this.hoodieTableMetaClient.getTableConfig().getPayloadClass();
this.totalLogFiles.addAndGet(logFilePaths.size());
timer.startTimer();
try {
// Store merged records for all versions for this log file, set the in-memory footprint to maxInMemoryMapSize
@@ -103,10 +116,10 @@ public class HoodieCompactedLogRecordScanner implements
new HoodieLogFormatReader(fs,
logFilePaths.stream().map(logFile -> new HoodieLogFile(new Path(logFile)))
.collect(Collectors.toList()), readerSchema, readBlocksLazily, reverseReader, bufferSize);
HoodieLogFile logFile;
while (logFormatReaderWrapper.hasNext()) {
HoodieLogFile logFile = logFormatReaderWrapper.getLogFile();
logFile = logFormatReaderWrapper.getLogFile();
log.info("Scanning log file " + logFile);
totalLogFiles.incrementAndGet();
// Use the HoodieLogFileReader to iterate through the blocks in the log file
HoodieLogBlock r = logFormatReaderWrapper.next();
if (r.getBlockType() != CORRUPT_BLOCK
@@ -164,6 +177,7 @@ public class HoodieCompactedLogRecordScanner implements
// and ensures the same rollback block (R1) is used to rollback both B1 & B2 with
// same instant_time
int numBlocksRolledBack = 0;
totalRollbacks.incrementAndGet();
while (!currentInstantLogBlocks.isEmpty()) {
HoodieLogBlock lastBlock = currentInstantLogBlocks.peek();
// handle corrupt blocks separately since they may not have metadata
@@ -200,6 +214,7 @@ public class HoodieCompactedLogRecordScanner implements
break;
case CORRUPT_BLOCK:
log.info("Found a corrupt block in " + logFile.getPath());
totalCorruptBlocks.incrementAndGet();
// If there is a corrupt block - we will assume that this was the next data block
currentInstantLogBlocks.push(r);
break;
@@ -216,14 +231,13 @@ public class HoodieCompactedLogRecordScanner implements
throw new HoodieIOException("IOException when reading log file ");
}
this.totalRecordsToUpdate = records.size();
this.totalTimeTakenToReadAndMergeBlocks = timer.endTimer();
log.info("MaxMemoryInBytes allowed for compaction => " + maxMemorySizeInBytes);
log.info("Number of entries in MemoryBasedMap in ExternalSpillableMap => " + records
.getInMemoryMapNumEntries());
log.info("Total size in bytes of MemoryBasedMap in ExternalSpillableMap => " + records
.getCurrentInMemoryMapSize());
log.info("Number of entries in DiskBasedMap in ExternalSpillableMap => " + records
.getDiskBasedMapNumEntries());
log.info("Number of entries in MemoryBasedMap in ExternalSpillableMap => " + records.getInMemoryMapNumEntries());
log.info("Total size in bytes of MemoryBasedMap in ExternalSpillableMap => " + records.getCurrentInMemoryMapSize());
log.info("Number of entries in DiskBasedMap in ExternalSpillableMap => " + records.getDiskBasedMapNumEntries());
log.info("Size of file spilled to disk => " + records.getSizeOfFileOnDiskInBytes());
log.debug("Total time taken for scanning and compacting log files => " + totalTimeTakenToReadAndMergeBlocks);
}
/**
@@ -307,6 +321,10 @@ public class HoodieCompactedLogRecordScanner implements
return totalLogRecords.get();
}
public long getTotalLogBlocks() {
return totalLogBlocks.get();
}
public Map<String, HoodieRecord<? extends HoodieRecordPayload>> getRecords() {
return records;
}
@@ -314,5 +332,17 @@ public class HoodieCompactedLogRecordScanner implements
public long getTotalRecordsToUpdate() {
return totalRecordsToUpdate;
}
public long getTotalRollbacks() {
return totalRollbacks.get();
}
public long getTotalCorruptBlocks() {
return totalCorruptBlocks.get();
}
public long getTotalTimeTakenToReadAndMergeBlocks() {
return totalTimeTakenToReadAndMergeBlocks;
}
}

View File

@@ -0,0 +1,70 @@
/*
* Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.util;
import com.uber.hoodie.exception.HoodieException;
import java.util.ArrayDeque;
import java.util.Deque;
/**
* Timing utility to help keep track of execution times of code blocks. This class helps to allow multiple
* timers started at the same time and automatically returns the execution time in the order in which the
* timers are stopped.
*/
public class HoodieTimer {
// Ordered stack of TimeInfo's to make sure stopping the timer returns the correct elapsed time
Deque<TimeInfo> timeInfoDeque = new ArrayDeque<>();
class TimeInfo {
// captures the startTime of the code block
long startTime;
// is the timing still running for the last started timer
boolean isRunning;
public TimeInfo(long startTime) {
this.startTime = startTime;
this.isRunning = true;
}
public long getStartTime() {
return startTime;
}
public boolean isRunning() {
return isRunning;
}
public long stop() {
this.isRunning = false;
return System.currentTimeMillis() - startTime;
}
}
public HoodieTimer startTimer() {
timeInfoDeque.push(new TimeInfo(System.currentTimeMillis()));
return this;
}
public long endTimer() {
if (timeInfoDeque.isEmpty()) {
throw new HoodieException("Timer was not started");
}
return timeInfoDeque.pop().stop();
}
}

View File

@@ -27,6 +27,7 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.uber.hoodie.avro.model.HoodieCleanMetadata;
import com.uber.hoodie.common.HoodieCleanStat;
import com.uber.hoodie.common.model.HoodieWriteStat.RuntimeStats;
import com.uber.hoodie.common.table.HoodieTableConfig;
import com.uber.hoodie.common.table.HoodieTableMetaClient;
import com.uber.hoodie.common.table.HoodieTimeline;
@@ -335,4 +336,25 @@ public class HoodieTestUtils {
}
return commits;
}
public static List<HoodieWriteStat> generateFakeHoodieWriteStat(int limit) {
List<HoodieWriteStat> writeStatList = new ArrayList<>();
for (int i = 0; i < limit; i++) {
HoodieWriteStat writeStat = new HoodieWriteStat();
writeStat.setFileId(UUID.randomUUID().toString());
writeStat.setNumDeletes(0);
writeStat.setNumUpdateWrites(100);
writeStat.setNumWrites(100);
writeStat.setPath("/some/fake/path" + i);
writeStat.setPartitionPath("/some/fake/partition/path" + i);
writeStat.setTotalLogFilesCompacted(100L);
RuntimeStats runtimeStats = new RuntimeStats();
runtimeStats.setTotalScanTime(100);
runtimeStats.setTotalCreateTime(100);
runtimeStats.setTotalUpsertTime(100);
writeStat.setRuntimeStats(runtimeStats);
writeStatList.add(writeStat);
}
return writeStatList;
}
}

View File

@@ -0,0 +1,42 @@
/*
* Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import java.util.List;
import org.junit.Assert;
import org.junit.Test;
public class TestHoodieCommitMetadata {
@Test
public void testPerfStatPresenceInHoodieMetadata() throws Exception {
List<HoodieWriteStat> fakeHoodieWriteStats = HoodieTestUtils.generateFakeHoodieWriteStat(100);
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
fakeHoodieWriteStats.stream().forEach(stat -> commitMetadata.addWriteStat(stat.getPartitionPath(), stat));
Assert.assertTrue(commitMetadata.getTotalCreateTime() > 0);
Assert.assertTrue(commitMetadata.getTotalUpsertTime() > 0);
Assert.assertTrue(commitMetadata.getTotalScanTime() > 0);
Assert.assertTrue(commitMetadata.getTotalLogFilesCompacted() > 0);
String serializedCommitMetadata = commitMetadata.toJsonString();
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(serializedCommitMetadata);
// Make sure timing metrics are not written to instant file
Assert.assertTrue(metadata.getTotalScanTime() == 0);
Assert.assertTrue(metadata.getTotalLogFilesCompacted() > 0);
}
}