capture record metadata before deflating for record counting
This commit is contained in:
committed by
vinoth chandar
parent
f2980052cd
commit
c98ee057fc
@@ -24,6 +24,8 @@ import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* Status of a write operation.
|
||||
@@ -47,16 +49,34 @@ public class WriteStatus implements Serializable {
|
||||
private long totalRecords = 0;
|
||||
private long totalErrorRecords = 0;
|
||||
|
||||
public void markSuccess(HoodieRecord record) {
|
||||
writtenRecords.add(record);
|
||||
totalRecords++;
|
||||
/**
|
||||
* Mark write as success, optionally using given parameters for the purpose of calculating
|
||||
* some aggregate metrics. This method is not meant to cache passed arguments, since WriteStatus
|
||||
* objects are collected in Spark Driver.
|
||||
*
|
||||
* @param record deflated {@code HoodieRecord} containing information that uniquely identifies it.
|
||||
* @param optionalRecordMetadata optional metadata related to data contained in {@link HoodieRecord} before deflation.
|
||||
*/
|
||||
public void markSuccess(HoodieRecord record,
|
||||
Optional<Map<String, String>> optionalRecordMetadata) {
|
||||
writtenRecords.add(record);
|
||||
totalRecords++;
|
||||
}
|
||||
|
||||
public void markFailure(HoodieRecord record, Throwable t) {
|
||||
failedRecords.add(record);
|
||||
errors.put(record.getKey(), t);
|
||||
totalRecords++;
|
||||
totalErrorRecords++;
|
||||
/**
|
||||
* Mark write as failed, optionally using given parameters for the purpose of calculating
|
||||
* some aggregate metrics. This method is not meant to cache passed arguments, since WriteStatus
|
||||
* objects are collected in Spark Driver.
|
||||
*
|
||||
* @param record deflated {@code HoodieRecord} containing information that uniquely identifies it.
|
||||
* @param optionalRecordMetadata optional metadata related to data contained in {@link HoodieRecord} before deflation.
|
||||
*/
|
||||
public void markFailure(HoodieRecord record, Throwable t,
|
||||
Optional<Map<String, String>> optionalRecordMetadata) {
|
||||
failedRecords.add(record);
|
||||
errors.put(record.getKey(), t);
|
||||
totalRecords++;
|
||||
totalErrorRecords++;
|
||||
}
|
||||
|
||||
public String getFileId() {
|
||||
|
||||
@@ -18,6 +18,7 @@ package com.uber.hoodie.config;
|
||||
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.uber.hoodie.WriteStatus;
|
||||
import com.uber.hoodie.common.model.HoodieCleaningPolicy;
|
||||
import com.uber.hoodie.common.util.ReflectionUtils;
|
||||
import com.uber.hoodie.index.HoodieIndex;
|
||||
@@ -54,6 +55,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
private static final String DEFAULT_HOODIE_AUTO_COMMIT = "true";
|
||||
private static final String HOODIE_ASSUME_DATE_PARTITIONING_PROP = "hoodie.assume.date.partitioning";
|
||||
private static final String DEFAULT_ASSUME_DATE_PARTITIONING = "false";
|
||||
private static final String HOODIE_WRITE_STATUS_CLASS_PROP = "hoodie.writestatus.class";
|
||||
private static final String DEFAULT_HOODIE_WRITE_STATUS_CLASS = WriteStatus.class.getName();
|
||||
|
||||
private HoodieWriteConfig(Properties props) {
|
||||
super(props);
|
||||
@@ -106,6 +109,10 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
return StorageLevel.fromString(props.getProperty(WRITE_STATUS_STORAGE_LEVEL));
|
||||
}
|
||||
|
||||
public String getWriteStatusClassName() {
|
||||
return props.getProperty(HOODIE_WRITE_STATUS_CLASS_PROP);
|
||||
}
|
||||
|
||||
/**
|
||||
* compaction properties
|
||||
**/
|
||||
@@ -363,6 +370,11 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withWriteStatusClass(Class<? extends WriteStatus> writeStatusClass) {
|
||||
props.setProperty(HOODIE_WRITE_STATUS_CLASS_PROP, writeStatusClass.getName());
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieWriteConfig build() {
|
||||
HoodieWriteConfig config = new HoodieWriteConfig(props);
|
||||
// Check for mandatory properties
|
||||
@@ -383,6 +395,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
HOODIE_AUTO_COMMIT_PROP, DEFAULT_HOODIE_AUTO_COMMIT);
|
||||
setDefaultOnCondition(props, !props.containsKey(HOODIE_ASSUME_DATE_PARTITIONING_PROP),
|
||||
HOODIE_ASSUME_DATE_PARTITIONING_PROP, DEFAULT_ASSUME_DATE_PARTITIONING);
|
||||
setDefaultOnCondition(props, !props.containsKey(HOODIE_WRITE_STATUS_CLASS_PROP),
|
||||
HOODIE_WRITE_STATUS_CLASS_PROP, DEFAULT_HOODIE_WRITE_STATUS_CLASS);
|
||||
|
||||
// Make sure the props is propagated
|
||||
setDefaultOnCondition(props, !isIndexConfigSet,
|
||||
|
||||
@@ -29,13 +29,13 @@ import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
|
||||
import com.uber.hoodie.common.table.log.block.HoodieDeleteBlock;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
||||
import com.uber.hoodie.common.util.ReflectionUtils;
|
||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||
import com.uber.hoodie.exception.HoodieAppendException;
|
||||
import com.uber.hoodie.exception.HoodieUpsertException;
|
||||
import com.uber.hoodie.table.HoodieTable;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
@@ -45,10 +45,8 @@ import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* IO Operation to append data onto an existing file.
|
||||
@@ -74,7 +72,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
String fileId,
|
||||
Iterator<HoodieRecord<T>> recordItr) {
|
||||
super(config, commitTime, hoodieTable);
|
||||
WriteStatus writeStatus = new WriteStatus();
|
||||
WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
|
||||
writeStatus.setStat(new HoodieDeltaWriteStat());
|
||||
this.writeStatus = writeStatus;
|
||||
this.fileId = fileId;
|
||||
@@ -128,6 +126,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
}
|
||||
|
||||
private Optional<IndexedRecord> getIndexedRecord(HoodieRecord<T> hoodieRecord) {
|
||||
Optional recordMetadata = hoodieRecord.getData().getMetadata();
|
||||
try {
|
||||
Optional<IndexedRecord> avroRecord = hoodieRecord.getData().getInsertValue(schema);
|
||||
|
||||
@@ -145,11 +144,11 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
}
|
||||
|
||||
hoodieRecord.deflate();
|
||||
writeStatus.markSuccess(hoodieRecord);
|
||||
writeStatus.markSuccess(hoodieRecord, recordMetadata);
|
||||
return avroRecord;
|
||||
} catch (Exception e) {
|
||||
logger.error("Error writing record " + hoodieRecord, e);
|
||||
writeStatus.markFailure(hoodieRecord, e);
|
||||
writeStatus.markFailure(hoodieRecord, e, recordMetadata);
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ import com.uber.hoodie.common.model.HoodieRecordLocation;
|
||||
import com.uber.hoodie.common.model.HoodieRecordPayload;
|
||||
import com.uber.hoodie.common.model.HoodieWriteStat;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.common.util.ReflectionUtils;
|
||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||
import com.uber.hoodie.exception.HoodieInsertException;
|
||||
import com.uber.hoodie.io.storage.HoodieStorageWriter;
|
||||
@@ -50,7 +51,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
public HoodieCreateHandle(HoodieWriteConfig config, String commitTime,
|
||||
HoodieTable<T> hoodieTable, String partitionPath) {
|
||||
super(config, commitTime, hoodieTable);
|
||||
this.status = new WriteStatus();
|
||||
this.status = ReflectionUtils.loadClass(config.getWriteStatusClassName());
|
||||
status.setFileId(UUID.randomUUID().toString());
|
||||
status.setPartitionPath(partitionPath);
|
||||
|
||||
@@ -89,6 +90,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
* @param record
|
||||
*/
|
||||
public void write(HoodieRecord record) {
|
||||
Optional recordMetadata = record.getData().getMetadata();
|
||||
try {
|
||||
Optional<IndexedRecord> avroRecord = record.getData().getInsertValue(schema);
|
||||
|
||||
@@ -100,13 +102,12 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
} else {
|
||||
recordsDeleted++;
|
||||
}
|
||||
|
||||
record.deflate();
|
||||
status.markSuccess(record);
|
||||
status.markSuccess(record, recordMetadata);
|
||||
} catch (Throwable t) {
|
||||
// Not throwing exception from here, since we don't want to fail the entire job
|
||||
// for a single record
|
||||
status.markFailure(record, t);
|
||||
status.markFailure(record, t, recordMetadata);
|
||||
logger.error("Error writing record " + record, t);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
package com.uber.hoodie.io;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodiePartitionMetadata;
|
||||
import com.uber.hoodie.common.util.ReflectionUtils;
|
||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||
import com.uber.hoodie.WriteStatus;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
@@ -66,7 +67,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
||||
* Load the new incoming records in a map, and extract the old file path.
|
||||
*/
|
||||
private void init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
|
||||
WriteStatus writeStatus = new WriteStatus();
|
||||
WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
|
||||
writeStatus.setStat(new HoodieWriteStat());
|
||||
this.writeStatus = writeStatus;
|
||||
this.keyToNewRecords = new HashMap<>();
|
||||
@@ -129,6 +130,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
||||
|
||||
|
||||
private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord, Optional<IndexedRecord> indexedRecord) {
|
||||
Optional recordMetadata = hoodieRecord.getData().getMetadata();
|
||||
try {
|
||||
if(indexedRecord.isPresent()) {
|
||||
storageWriter.writeAvroWithMetadata(indexedRecord.get(), hoodieRecord);
|
||||
@@ -139,11 +141,11 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
||||
}
|
||||
|
||||
hoodieRecord.deflate();
|
||||
writeStatus.markSuccess(hoodieRecord);
|
||||
writeStatus.markSuccess(hoodieRecord, recordMetadata);
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
logger.error("Error writing record "+ hoodieRecord, e);
|
||||
writeStatus.markFailure(hoodieRecord, e);
|
||||
writeStatus.markFailure(hoodieRecord, e, recordMetadata);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user