Implement HoodieLogFormat replacing Avro as the default log format
This commit is contained in:
committed by
vinoth chandar
parent
3c984447da
commit
240c91241b
@@ -45,7 +45,7 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
|
||||
|
||||
// Run a compaction every N delta commits
|
||||
public static final String INLINE_COMPACT_NUM_DELTA_COMMITS_PROP = "hoodie.compact.inline.max.delta.commits";
|
||||
private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "4";
|
||||
private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "10";
|
||||
|
||||
public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP =
|
||||
"hoodie.cleaner.fileversions.retained";
|
||||
|
||||
@@ -22,15 +22,17 @@ import com.uber.hoodie.common.model.HoodieDeltaWriteStat;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.model.HoodieRecordLocation;
|
||||
import com.uber.hoodie.common.model.HoodieRecordPayload;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogAppendConfig;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFile;
|
||||
import com.uber.hoodie.common.table.log.avro.RollingAvroLogAppender;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFormat;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFormat.Writer;
|
||||
import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||
import com.uber.hoodie.exception.HoodieAppendException;
|
||||
import com.uber.hoodie.exception.HoodieUpsertException;
|
||||
import com.uber.hoodie.table.HoodieTable;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
@@ -51,11 +53,11 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
private final WriteStatus writeStatus;
|
||||
private final String fileId;
|
||||
private String partitionPath;
|
||||
private RollingAvroLogAppender logAppender;
|
||||
private List<HoodieRecord<T>> records;
|
||||
private long recordsWritten = 0;
|
||||
private long recordsDeleted = 0;
|
||||
private HoodieLogFile currentLogFile;
|
||||
private Writer writer;
|
||||
|
||||
public HoodieAppendHandle(HoodieWriteConfig config, String commitTime,
|
||||
HoodieTable<T> hoodieTable, String fileId, Iterator<HoodieRecord<T>> recordItr) {
|
||||
@@ -84,17 +86,15 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
writeStatus.getStat().setFileId(fileId);
|
||||
|
||||
try {
|
||||
HoodieLogAppendConfig logConfig = HoodieLogAppendConfig.newBuilder()
|
||||
.onPartitionPath(
|
||||
new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath))
|
||||
.withFileId(fileId).withBaseCommitTime(baseCommitTime).withSchema(schema)
|
||||
.withFs(fs).withLogFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
|
||||
this.logAppender = new RollingAvroLogAppender(logConfig);
|
||||
this.currentLogFile = logAppender.getConfig().getLogFile();
|
||||
this.writer = HoodieLogFormat.newWriterBuilder()
|
||||
.onParentPath(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath))
|
||||
.withFileId(fileId).overBaseCommit(baseCommitTime)
|
||||
.withFs(fs).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
|
||||
this.currentLogFile = writer.getLogFile();
|
||||
((HoodieDeltaWriteStat) writeStatus.getStat())
|
||||
.setLogVersion(currentLogFile.getLogVersion());
|
||||
((HoodieDeltaWriteStat) writeStatus.getStat())
|
||||
.setLogOffset(logAppender.getCurrentSize());
|
||||
.setLogOffset(writer.getCurrentSize());
|
||||
} catch (Exception e) {
|
||||
logger.error("Error in update task at commit " + commitTime, e);
|
||||
writeStatus.setGlobalError(e);
|
||||
@@ -139,11 +139,11 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
}
|
||||
|
||||
public void doAppend() {
|
||||
Iterator<IndexedRecord> recordItr =
|
||||
List<IndexedRecord> recordItr =
|
||||
records.stream().map(this::getIndexedRecord).filter(Optional::isPresent)
|
||||
.map(Optional::get).iterator();
|
||||
.map(Optional::get).collect(Collectors.toList());
|
||||
try {
|
||||
logAppender.append(recordItr);
|
||||
writer = writer.appendBlock(new HoodieAvroDataBlock(recordItr, schema));
|
||||
} catch (Exception e) {
|
||||
throw new HoodieAppendException(
|
||||
"Failed while appeding records to " + currentLogFile.getPath(), e);
|
||||
@@ -152,8 +152,8 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
|
||||
public void close() {
|
||||
try {
|
||||
if (logAppender != null) {
|
||||
logAppender.close();
|
||||
if (writer != null) {
|
||||
writer.close();
|
||||
}
|
||||
writeStatus.getStat().setNumWrites(recordsWritten);
|
||||
writeStatus.getStat().setNumDeletes(recordsDeleted);
|
||||
|
||||
@@ -25,7 +25,7 @@ import com.uber.hoodie.common.model.HoodieCompactionMetadata;
|
||||
import com.uber.hoodie.common.model.HoodieTableType;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.table.log.avro.HoodieAvroReader;
|
||||
import com.uber.hoodie.common.table.log.HoodieCompactedLogRecordScanner;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
@@ -145,9 +145,8 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
|
||||
// Load all the delta commits since the last compaction commit and get all the blocks to be loaded and load it using CompositeAvroLogReader
|
||||
// Since a DeltaCommit is not defined yet, reading all the records. revisit this soon.
|
||||
|
||||
HoodieAvroReader avroReader = new HoodieAvroReader(fs, operation.getDeltaFilePaths(),
|
||||
readerSchema);
|
||||
if (!avroReader.iterator().hasNext()) {
|
||||
HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, operation.getDeltaFilePaths(), readerSchema);
|
||||
if (!scanner.iterator().hasNext()) {
|
||||
return Lists.newArrayList();
|
||||
}
|
||||
|
||||
@@ -155,15 +154,15 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
|
||||
HoodieCopyOnWriteTable<HoodieAvroPayload> table =
|
||||
new HoodieCopyOnWriteTable<>(config, metaClient);
|
||||
Iterator<List<WriteStatus>> result = table
|
||||
.handleUpdate(commitTime, operation.getFileId(), avroReader.iterator());
|
||||
.handleUpdate(commitTime, operation.getFileId(), scanner.iterator());
|
||||
Iterable<List<WriteStatus>> resultIterable = () -> result;
|
||||
return StreamSupport.stream(resultIterable.spliterator(), false)
|
||||
.flatMap(Collection::stream)
|
||||
.map(WriteStatus::getStat)
|
||||
.map(s -> CompactionWriteStat.newBuilder().withHoodieWriteStat(s)
|
||||
.setTotalRecordsToUpdate(avroReader.getTotalRecordsToUpdate())
|
||||
.setTotalLogFiles(avroReader.getTotalLogFiles())
|
||||
.setTotalLogRecords(avroReader.getTotalLogRecords())
|
||||
.setTotalRecordsToUpdate(scanner.getTotalRecordsToUpdate())
|
||||
.setTotalLogFiles(scanner.getTotalLogFiles())
|
||||
.setTotalLogRecords(scanner.getTotalLogRecords())
|
||||
.onPartition(operation.getPartitionPath()).build())
|
||||
.collect(toList());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user