1
0

capture record metadata before deflating for record counting

This commit is contained in:
Kaushik Devarajaiah
2017-08-22 14:48:04 -07:00
committed by vinoth chandar
parent f2980052cd
commit c98ee057fc
9 changed files with 192 additions and 23 deletions

View File

@@ -29,13 +29,13 @@ import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
import com.uber.hoodie.common.table.log.block.HoodieDeleteBlock;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.common.util.HoodieAvroUtils;
import com.uber.hoodie.common.util.ReflectionUtils;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.exception.HoodieAppendException;
import com.uber.hoodie.exception.HoodieUpsertException;
import com.uber.hoodie.table.HoodieTable;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -45,10 +45,8 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
/**
* IO Operation to append data onto an existing file.
@@ -74,7 +72,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
String fileId,
Iterator<HoodieRecord<T>> recordItr) {
super(config, commitTime, hoodieTable);
WriteStatus writeStatus = new WriteStatus();
WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
writeStatus.setStat(new HoodieDeltaWriteStat());
this.writeStatus = writeStatus;
this.fileId = fileId;
@@ -128,6 +126,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
}
private Optional<IndexedRecord> getIndexedRecord(HoodieRecord<T> hoodieRecord) {
Optional recordMetadata = hoodieRecord.getData().getMetadata();
try {
Optional<IndexedRecord> avroRecord = hoodieRecord.getData().getInsertValue(schema);
@@ -145,11 +144,11 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
}
hoodieRecord.deflate();
writeStatus.markSuccess(hoodieRecord);
writeStatus.markSuccess(hoodieRecord, recordMetadata);
return avroRecord;
} catch (Exception e) {
logger.error("Error writing record " + hoodieRecord, e);
writeStatus.markFailure(hoodieRecord, e);
writeStatus.markFailure(hoodieRecord, e, recordMetadata);
}
return Optional.empty();
}

View File

@@ -23,6 +23,7 @@ import com.uber.hoodie.common.model.HoodieRecordLocation;
import com.uber.hoodie.common.model.HoodieRecordPayload;
import com.uber.hoodie.common.model.HoodieWriteStat;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.common.util.ReflectionUtils;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.exception.HoodieInsertException;
import com.uber.hoodie.io.storage.HoodieStorageWriter;
@@ -50,7 +51,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
public HoodieCreateHandle(HoodieWriteConfig config, String commitTime,
HoodieTable<T> hoodieTable, String partitionPath) {
super(config, commitTime, hoodieTable);
this.status = new WriteStatus();
this.status = ReflectionUtils.loadClass(config.getWriteStatusClassName());
status.setFileId(UUID.randomUUID().toString());
status.setPartitionPath(partitionPath);
@@ -89,6 +90,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
* @param record
*/
public void write(HoodieRecord record) {
Optional recordMetadata = record.getData().getMetadata();
try {
Optional<IndexedRecord> avroRecord = record.getData().getInsertValue(schema);
@@ -100,13 +102,12 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
} else {
recordsDeleted++;
}
record.deflate();
status.markSuccess(record);
status.markSuccess(record, recordMetadata);
} catch (Throwable t) {
// Not throwing exception from here, since we don't want to fail the entire job
// for a single record
status.markFailure(record, t);
status.markFailure(record, t, recordMetadata);
logger.error("Error writing record " + record, t);
}
}

View File

@@ -17,6 +17,7 @@
package com.uber.hoodie.io;
import com.uber.hoodie.common.model.HoodiePartitionMetadata;
import com.uber.hoodie.common.util.ReflectionUtils;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.WriteStatus;
import com.uber.hoodie.common.model.HoodieRecord;
@@ -66,7 +67,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
* Load the new incoming records in a map, and extract the old file path.
*/
private void init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
WriteStatus writeStatus = new WriteStatus();
WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
writeStatus.setStat(new HoodieWriteStat());
this.writeStatus = writeStatus;
this.keyToNewRecords = new HashMap<>();
@@ -129,6 +130,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord, Optional<IndexedRecord> indexedRecord) {
Optional recordMetadata = hoodieRecord.getData().getMetadata();
try {
if(indexedRecord.isPresent()) {
storageWriter.writeAvroWithMetadata(indexedRecord.get(), hoodieRecord);
@@ -139,11 +141,11 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
}
hoodieRecord.deflate();
writeStatus.markSuccess(hoodieRecord);
writeStatus.markSuccess(hoodieRecord, recordMetadata);
return true;
} catch (Exception e) {
logger.error("Error writing record "+ hoodieRecord, e);
writeStatus.markFailure(hoodieRecord, e);
writeStatus.markFailure(hoodieRecord, e, recordMetadata);
}
return false;
}