[HUDI-1771] Propagate CDC format for hoodie (#3285)
This commit is contained in:
@@ -19,7 +19,10 @@
|
||||
package org.apache.hudi.avro;
|
||||
|
||||
import org.apache.avro.specific.SpecificRecordBase;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieOperation;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.StringUtils;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
@@ -151,7 +154,8 @@ public class HoodieAvroUtils {
|
||||
|| HoodieRecord.COMMIT_SEQNO_METADATA_FIELD.equals(fieldName)
|
||||
|| HoodieRecord.RECORD_KEY_METADATA_FIELD.equals(fieldName)
|
||||
|| HoodieRecord.PARTITION_PATH_METADATA_FIELD.equals(fieldName)
|
||||
|| HoodieRecord.FILENAME_METADATA_FIELD.equals(fieldName);
|
||||
|| HoodieRecord.FILENAME_METADATA_FIELD.equals(fieldName)
|
||||
|| HoodieRecord.OPERATION_METADATA_FIELD.equals(fieldName);
|
||||
}
|
||||
|
||||
public static Schema createHoodieWriteSchema(Schema originalSchema) {
|
||||
@@ -164,8 +168,20 @@ public class HoodieAvroUtils {
|
||||
|
||||
/**
|
||||
* Adds the Hoodie metadata fields to the given schema.
|
||||
*
|
||||
* @param schema The schema
|
||||
*/
|
||||
public static Schema addMetadataFields(Schema schema) {
|
||||
return addMetadataFields(schema, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the Hoodie metadata fields to the given schema.
|
||||
*
|
||||
* @param schema The schema
|
||||
* @param withOperationField Whether to include the '_hoodie_operation' field
|
||||
*/
|
||||
public static Schema addMetadataFields(Schema schema, boolean withOperationField) {
|
||||
List<Schema.Field> parentFields = new ArrayList<>();
|
||||
|
||||
Schema.Field commitTimeField =
|
||||
@@ -184,6 +200,13 @@ public class HoodieAvroUtils {
|
||||
parentFields.add(recordKeyField);
|
||||
parentFields.add(partitionPathField);
|
||||
parentFields.add(fileNameField);
|
||||
|
||||
if (withOperationField) {
|
||||
final Schema.Field operationField =
|
||||
new Schema.Field(HoodieRecord.OPERATION_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
|
||||
parentFields.add(operationField);
|
||||
}
|
||||
|
||||
for (Schema.Field field : schema.getFields()) {
|
||||
if (!isMetadataField(field.name())) {
|
||||
Schema.Field newField = new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultVal());
|
||||
@@ -202,7 +225,7 @@ public class HoodieAvroUtils {
|
||||
public static Schema removeMetadataFields(Schema schema) {
|
||||
List<Schema.Field> filteredFields = schema.getFields()
|
||||
.stream()
|
||||
.filter(field -> !HoodieRecord.HOODIE_META_COLUMNS.contains(field.name()))
|
||||
.filter(field -> !HoodieRecord.HOODIE_META_COLUMNS_WITH_OPERATION.contains(field.name()))
|
||||
.map(field -> new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultVal()))
|
||||
.collect(Collectors.toList());
|
||||
Schema filteredSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
|
||||
@@ -268,6 +291,11 @@ public class HoodieAvroUtils {
|
||||
return record;
|
||||
}
|
||||
|
||||
public static GenericRecord addOperationToRecord(GenericRecord record, HoodieOperation operation) {
|
||||
record.put(HoodieRecord.OPERATION_METADATA_FIELD, operation.getName());
|
||||
return record;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add null fields to passed in schema. Caller is responsible for ensuring there is no duplicates. As different query
|
||||
* engines have varying constraints regarding treating the case-sensitivity of fields, its best to let caller
|
||||
@@ -453,6 +481,22 @@ public class HoodieAvroUtils {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string value of the given record {@code rec} and field {@code fieldName}.
|
||||
* The field and value both could be missing.
|
||||
*
|
||||
* @param rec The record
|
||||
* @param fieldName The field name
|
||||
*
|
||||
* @return the string form of the field
|
||||
* or empty if the schema does not contain the field name or the value is null
|
||||
*/
|
||||
public static Option<String> getNullableValAsString(GenericRecord rec, String fieldName) {
|
||||
Schema.Field field = rec.getSchema().getField(fieldName);
|
||||
String fieldVal = field == null ? null : StringUtils.objToString(rec.get(field.pos()));
|
||||
return Option.ofNullable(fieldVal);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method converts values for fields with certain Avro/Parquet data types that require special handling.
|
||||
*
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.common.model;
|
||||
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
/**
|
||||
* Represents the changes that a row can describe in a changelog.
|
||||
*/
|
||||
public enum HoodieOperation {
|
||||
/**
|
||||
* Insert operation.
|
||||
*/
|
||||
INSERT("I", (byte) 0),
|
||||
/**
|
||||
* Update operation with previous record content,
|
||||
* should be used together with {@link #UPDATE_AFTER} for modeling an update operation.
|
||||
*/
|
||||
UPDATE_BEFORE("-U", (byte) 1),
|
||||
/**
|
||||
* Update operation with new record content.
|
||||
*/
|
||||
UPDATE_AFTER("U", (byte) 2),
|
||||
/**
|
||||
* Delete operation.
|
||||
*/
|
||||
DELETE("D", (byte) 4);
|
||||
|
||||
private final String name;
|
||||
private final byte value;
|
||||
|
||||
HoodieOperation(String name, byte value) {
|
||||
this.name = name;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public byte getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public static HoodieOperation fromValue(byte value) {
|
||||
switch (value) {
|
||||
case 0:
|
||||
return INSERT;
|
||||
case 1:
|
||||
return UPDATE_BEFORE;
|
||||
case 2:
|
||||
return UPDATE_AFTER;
|
||||
case 3:
|
||||
return DELETE;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
|
||||
public static HoodieOperation fromName(Option<String> nameOpt) {
|
||||
if (!nameOpt.isPresent()) {
|
||||
return null;
|
||||
}
|
||||
return fromName(nameOpt.get());
|
||||
}
|
||||
|
||||
public static HoodieOperation fromName(String name) {
|
||||
switch (name) {
|
||||
case "I":
|
||||
return INSERT;
|
||||
case "-U":
|
||||
return UPDATE_BEFORE;
|
||||
case "U":
|
||||
return UPDATE_AFTER;
|
||||
case "D":
|
||||
return DELETE;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the operation is INSERT.
|
||||
*/
|
||||
public static boolean isInsert(HoodieOperation operation) {
|
||||
return operation == INSERT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the operation is UPDATE_BEFORE.
|
||||
*/
|
||||
public static boolean isUpdateBefore(HoodieOperation operation) {
|
||||
return operation == UPDATE_BEFORE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the operation is UPDATE_AFTER.
|
||||
*/
|
||||
public static boolean isUpdateAfter(HoodieOperation operation) {
|
||||
return operation == UPDATE_AFTER;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the operation is DELETE.
|
||||
*/
|
||||
public static boolean isDelete(HoodieOperation operation) {
|
||||
return operation == DELETE;
|
||||
}
|
||||
}
|
||||
@@ -39,11 +39,19 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
|
||||
public static final String RECORD_KEY_METADATA_FIELD = "_hoodie_record_key";
|
||||
public static final String PARTITION_PATH_METADATA_FIELD = "_hoodie_partition_path";
|
||||
public static final String FILENAME_METADATA_FIELD = "_hoodie_file_name";
|
||||
public static final String OPERATION_METADATA_FIELD = "_hoodie_operation";
|
||||
|
||||
public static final List<String> HOODIE_META_COLUMNS =
|
||||
CollectionUtils.createImmutableList(COMMIT_TIME_METADATA_FIELD, COMMIT_SEQNO_METADATA_FIELD,
|
||||
RECORD_KEY_METADATA_FIELD, PARTITION_PATH_METADATA_FIELD, FILENAME_METADATA_FIELD);
|
||||
|
||||
// Temporary to support the '_hoodie_operation' field, once we solve
|
||||
// the compatibility problem, it can be removed.
|
||||
public static final List<String> HOODIE_META_COLUMNS_WITH_OPERATION =
|
||||
CollectionUtils.createImmutableList(COMMIT_TIME_METADATA_FIELD, COMMIT_SEQNO_METADATA_FIELD,
|
||||
RECORD_KEY_METADATA_FIELD, PARTITION_PATH_METADATA_FIELD, FILENAME_METADATA_FIELD,
|
||||
OPERATION_METADATA_FIELD);
|
||||
|
||||
public static final Map<String, Integer> HOODIE_META_COLUMNS_NAME_TO_POS =
|
||||
IntStream.range(0, HOODIE_META_COLUMNS.size()).mapToObj(idx -> Pair.of(HOODIE_META_COLUMNS.get(idx), idx))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
@@ -73,12 +81,22 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
|
||||
*/
|
||||
private boolean sealed;
|
||||
|
||||
/**
|
||||
* The cdc operation.
|
||||
*/
|
||||
private HoodieOperation operation;
|
||||
|
||||
public HoodieRecord(HoodieKey key, T data) {
|
||||
this(key, data, null);
|
||||
}
|
||||
|
||||
public HoodieRecord(HoodieKey key, T data, HoodieOperation operation) {
|
||||
this.key = key;
|
||||
this.data = data;
|
||||
this.currentLocation = null;
|
||||
this.newLocation = null;
|
||||
this.sealed = false;
|
||||
this.operation = operation;
|
||||
}
|
||||
|
||||
public HoodieRecord(HoodieRecord<T> record) {
|
||||
@@ -86,6 +104,7 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
|
||||
this.currentLocation = record.currentLocation;
|
||||
this.newLocation = record.newLocation;
|
||||
this.sealed = record.sealed;
|
||||
this.operation = record.operation;
|
||||
}
|
||||
|
||||
public HoodieRecord() {
|
||||
@@ -95,6 +114,10 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
|
||||
return key;
|
||||
}
|
||||
|
||||
public HoodieOperation getOperation() {
|
||||
return operation;
|
||||
}
|
||||
|
||||
public T getData() {
|
||||
if (data == null) {
|
||||
throw new IllegalStateException("Payload already deflated for record.");
|
||||
|
||||
@@ -55,10 +55,16 @@ import org.apache.parquet.schema.MessageType;
|
||||
public class TableSchemaResolver {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(TableSchemaResolver.class);
|
||||
private HoodieTableMetaClient metaClient;
|
||||
private final HoodieTableMetaClient metaClient;
|
||||
private final boolean withOperationField;
|
||||
|
||||
public TableSchemaResolver(HoodieTableMetaClient metaClient) {
|
||||
this(metaClient, false);
|
||||
}
|
||||
|
||||
public TableSchemaResolver(HoodieTableMetaClient metaClient, boolean withOperationField) {
|
||||
this.metaClient = metaClient;
|
||||
this.withOperationField = withOperationField;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -170,7 +176,7 @@ public class TableSchemaResolver {
|
||||
Option<Schema> schemaFromTableConfig = metaClient.getTableConfig().getTableCreateSchema();
|
||||
if (schemaFromTableConfig.isPresent()) {
|
||||
if (includeMetadataFields) {
|
||||
return HoodieAvroUtils.addMetadataFields(schemaFromTableConfig.get());
|
||||
return HoodieAvroUtils.addMetadataFields(schemaFromTableConfig.get(), withOperationField);
|
||||
} else {
|
||||
return schemaFromTableConfig.get();
|
||||
}
|
||||
@@ -256,7 +262,7 @@ public class TableSchemaResolver {
|
||||
|
||||
Schema schema = new Schema.Parser().parse(existingSchemaStr);
|
||||
if (includeMetadataFields) {
|
||||
schema = HoodieAvroUtils.addMetadataFields(schema);
|
||||
schema = HoodieAvroUtils.addMetadataFields(schema, withOperationField);
|
||||
}
|
||||
return Option.of(schema);
|
||||
} catch (Exception e) {
|
||||
|
||||
@@ -96,6 +96,8 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
private final int bufferSize;
|
||||
// optional instant range for incremental block filtering
|
||||
private final Option<InstantRange> instantRange;
|
||||
// Read the operation metadata field from the avro record
|
||||
private final boolean withOperationField;
|
||||
// FileSystem
|
||||
private final FileSystem fs;
|
||||
// Total log files read - for metrics
|
||||
@@ -114,7 +116,8 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
private float progress = 0.0f;
|
||||
|
||||
protected AbstractHoodieLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths, Schema readerSchema,
|
||||
String latestInstantTime, boolean readBlocksLazily, boolean reverseReader, int bufferSize, Option<InstantRange> instantRange) {
|
||||
String latestInstantTime, boolean readBlocksLazily, boolean reverseReader,
|
||||
int bufferSize, Option<InstantRange> instantRange, boolean withOperationField) {
|
||||
this.readerSchema = readerSchema;
|
||||
this.latestInstantTime = latestInstantTime;
|
||||
this.hoodieTableMetaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).build();
|
||||
@@ -131,6 +134,7 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
this.fs = fs;
|
||||
this.bufferSize = bufferSize;
|
||||
this.instantRange = instantRange;
|
||||
this.withOperationField = withOperationField;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -294,7 +298,7 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
private boolean isNewInstantBlock(HoodieLogBlock logBlock) {
|
||||
return currentInstantLogBlocks.size() > 0 && currentInstantLogBlocks.peek().getBlockType() != CORRUPT_BLOCK
|
||||
&& !logBlock.getLogBlockHeader().get(INSTANT_TIME)
|
||||
.contentEquals(currentInstantLogBlocks.peek().getLogBlockHeader().get(INSTANT_TIME));
|
||||
.contentEquals(currentInstantLogBlocks.peek().getLogBlockHeader().get(INSTANT_TIME));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -312,9 +316,9 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
|
||||
protected HoodieRecord<?> createHoodieRecord(IndexedRecord rec) {
|
||||
if (!simpleKeyGenFields.isPresent()) {
|
||||
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN);
|
||||
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN, this.withOperationField);
|
||||
} else {
|
||||
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN, this.simpleKeyGenFields.get());
|
||||
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN, this.simpleKeyGenFields.get(), this.withOperationField);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -392,6 +396,10 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
return totalCorruptBlocks.get();
|
||||
}
|
||||
|
||||
public boolean isWithOperationField() {
|
||||
return withOperationField;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builder used to build {@code AbstractHoodieLogRecordScanner}.
|
||||
*/
|
||||
@@ -417,6 +425,10 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public Builder withOperationField(boolean withOperationField) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public abstract AbstractHoodieLogRecordScanner build();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,8 +45,8 @@ public class HoodieFileSliceReader<T extends HoodieRecordPayload> implements Ite
|
||||
while (baseIterator.hasNext()) {
|
||||
GenericRecord record = (GenericRecord) baseIterator.next();
|
||||
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = simpleKeyGenFieldsOpt.isPresent()
|
||||
? SpillableMapUtils.convertToHoodieRecordPayload(record, payloadClass, simpleKeyGenFieldsOpt.get())
|
||||
: SpillableMapUtils.convertToHoodieRecordPayload(record, payloadClass);
|
||||
? SpillableMapUtils.convertToHoodieRecordPayload(record, payloadClass, simpleKeyGenFieldsOpt.get(), scanner.isWithOperationField())
|
||||
: SpillableMapUtils.convertToHoodieRecordPayload(record, payloadClass, scanner.isWithOperationField());
|
||||
scanner.processNextRecord(hoodieRecord);
|
||||
}
|
||||
return new HoodieFileSliceReader(scanner.iterator());
|
||||
|
||||
@@ -43,7 +43,7 @@ import java.util.Map;
|
||||
/**
|
||||
* Scans through all the blocks in a list of HoodieLogFile and builds up a compacted/merged list of records which will
|
||||
* be used as a lookup table when merging the base columnar file with the redo log file.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: If readBlockLazily is turned on, does not merge, instead keeps reading log blocks and merges everything at once
|
||||
* This is an optimization to avoid seek() back and forth to read new block (forward seek()) and lazily read content of
|
||||
* seen block (reverse and forward seek()) during merge | | Read Block 1 Metadata | | Read Block 1 Data | | | Read Block
|
||||
@@ -72,11 +72,12 @@ public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordScanner
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
protected HoodieMergedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths, Schema readerSchema,
|
||||
String latestInstantTime, Long maxMemorySizeInBytes, boolean readBlocksLazily,
|
||||
boolean reverseReader, int bufferSize, String spillableMapBasePath,
|
||||
Option<InstantRange> instantRange, boolean autoScan,
|
||||
ExternalSpillableMap.DiskMapType diskMapType, boolean isBitCaskDiskMapCompressionEnabled) {
|
||||
super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize, instantRange);
|
||||
String latestInstantTime, Long maxMemorySizeInBytes, boolean readBlocksLazily,
|
||||
boolean reverseReader, int bufferSize, String spillableMapBasePath,
|
||||
Option<InstantRange> instantRange, boolean autoScan,
|
||||
ExternalSpillableMap.DiskMapType diskMapType, boolean isBitCaskDiskMapCompressionEnabled,
|
||||
boolean withOperationField) {
|
||||
super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize, instantRange, withOperationField);
|
||||
try {
|
||||
// Store merged records for all versions for this log file, set the in-memory footprint to maxInMemoryMapSize
|
||||
this.records = new ExternalSpillableMap<>(maxMemorySizeInBytes, spillableMapBasePath, new DefaultSizeEstimator(),
|
||||
@@ -132,8 +133,10 @@ public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordScanner
|
||||
if (records.containsKey(key)) {
|
||||
// Merge and store the merged record. The HoodieRecordPayload implementation is free to decide what should be
|
||||
// done when a delete (empty payload) is encountered before or after an insert/update.
|
||||
|
||||
// Always use the natural order now.
|
||||
HoodieRecordPayload combinedValue = hoodieRecord.getData().preCombine(records.get(key).getData());
|
||||
records.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()), combinedValue));
|
||||
records.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()), combinedValue, hoodieRecord.getOperation()));
|
||||
} else {
|
||||
// Put the record as is
|
||||
records.put(key, hoodieRecord);
|
||||
@@ -177,6 +180,8 @@ public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordScanner
|
||||
private Option<InstantRange> instantRange = Option.empty();
|
||||
// auto scan default true
|
||||
private boolean autoScan = true;
|
||||
// operation field default false
|
||||
private boolean withOperationField = false;
|
||||
|
||||
public Builder withFileSystem(FileSystem fs) {
|
||||
this.fs = fs;
|
||||
@@ -248,12 +253,17 @@ public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordScanner
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withOperationField(boolean withOperationField) {
|
||||
this.withOperationField = withOperationField;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieMergedLogRecordScanner build() {
|
||||
return new HoodieMergedLogRecordScanner(fs, basePath, logFilePaths, readerSchema,
|
||||
latestInstantTime, maxMemorySizeInBytes, readBlocksLazily, reverseReader,
|
||||
bufferSize, spillableMapBasePath, instantRange, autoScan,
|
||||
diskMapType, isBitCaskDiskMapCompressionEnabled);
|
||||
diskMapType, isBitCaskDiskMapCompressionEnabled, withOperationField);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,8 +36,9 @@ public class HoodieUnMergedLogRecordScanner extends AbstractHoodieLogRecordScann
|
||||
private final LogRecordScannerCallback callback;
|
||||
|
||||
private HoodieUnMergedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths, Schema readerSchema,
|
||||
String latestInstantTime, boolean readBlocksLazily, boolean reverseReader, int bufferSize, LogRecordScannerCallback callback) {
|
||||
super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize, Option.empty());
|
||||
String latestInstantTime, boolean readBlocksLazily, boolean reverseReader, int bufferSize,
|
||||
LogRecordScannerCallback callback, Option<InstantRange> instantRange) {
|
||||
super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize, instantRange, false);
|
||||
this.callback = callback;
|
||||
}
|
||||
|
||||
@@ -80,6 +81,7 @@ public class HoodieUnMergedLogRecordScanner extends AbstractHoodieLogRecordScann
|
||||
private boolean readBlocksLazily;
|
||||
private boolean reverseReader;
|
||||
private int bufferSize;
|
||||
private Option<InstantRange> instantRange = Option.empty();
|
||||
// specific configurations
|
||||
private LogRecordScannerCallback callback;
|
||||
|
||||
@@ -123,6 +125,11 @@ public class HoodieUnMergedLogRecordScanner extends AbstractHoodieLogRecordScann
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withInstantRange(Option<InstantRange> instantRange) {
|
||||
this.instantRange = instantRange;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withLogRecordScannerCallback(LogRecordScannerCallback callback) {
|
||||
this.callback = callback;
|
||||
return this;
|
||||
@@ -131,7 +138,7 @@ public class HoodieUnMergedLogRecordScanner extends AbstractHoodieLogRecordScann
|
||||
@Override
|
||||
public HoodieUnMergedLogRecordScanner build() {
|
||||
return new HoodieUnMergedLogRecordScanner(fs, basePath, logFilePaths, readerSchema,
|
||||
latestInstantTime, readBlocksLazily, reverseReader, bufferSize, callback);
|
||||
latestInstantTime, readBlocksLazily, reverseReader, bufferSize, callback, instantRange);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ package org.apache.hudi.common.util;
|
||||
|
||||
import org.apache.hudi.common.fs.SizeAwareDataOutputStream;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieOperation;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.util.collection.BitCaskDiskMap.FileEntry;
|
||||
@@ -32,6 +33,8 @@ import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.util.zip.CRC32;
|
||||
|
||||
import static org.apache.hudi.avro.HoodieAvroUtils.getNullableValAsString;
|
||||
|
||||
/**
|
||||
* A utility class supports spillable map.
|
||||
*/
|
||||
@@ -110,18 +113,23 @@ public class SpillableMapUtils {
|
||||
/**
|
||||
* Utility method to convert bytes to HoodieRecord using schema and payload class.
|
||||
*/
|
||||
public static <R> R convertToHoodieRecordPayload(GenericRecord rec, String payloadClazz) {
|
||||
return convertToHoodieRecordPayload(rec, payloadClazz, Pair.of(HoodieRecord.RECORD_KEY_METADATA_FIELD, HoodieRecord.PARTITION_PATH_METADATA_FIELD));
|
||||
public static <R> R convertToHoodieRecordPayload(GenericRecord rec, String payloadClazz, boolean withOperationField) {
|
||||
return convertToHoodieRecordPayload(rec, payloadClazz, Pair.of(HoodieRecord.RECORD_KEY_METADATA_FIELD, HoodieRecord.PARTITION_PATH_METADATA_FIELD), withOperationField);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to convert bytes to HoodieRecord using schema and payload class.
|
||||
*/
|
||||
public static <R> R convertToHoodieRecordPayload(GenericRecord rec, String payloadClazz, Pair<String, String> recordKeyPartitionPathPair) {
|
||||
public static <R> R convertToHoodieRecordPayload(GenericRecord rec, String payloadClazz,
|
||||
Pair<String, String> recordKeyPartitionPathPair,
|
||||
boolean withOperationField) {
|
||||
String recKey = rec.get(recordKeyPartitionPathPair.getLeft()).toString();
|
||||
String partitionPath = rec.get(recordKeyPartitionPathPair.getRight()).toString();
|
||||
HoodieOperation operation = withOperationField
|
||||
? HoodieOperation.fromName(getNullableValAsString(rec, HoodieRecord.OPERATION_METADATA_FIELD)) : null;
|
||||
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(new HoodieKey(recKey, partitionPath),
|
||||
ReflectionUtils.loadPayload(payloadClazz, new Object[] {Option.of(rec)}, Option.class));
|
||||
ReflectionUtils.loadPayload(payloadClazz, new Object[] {Option.of(rec)}, Option.class), operation);
|
||||
|
||||
return (R) hoodieRecord;
|
||||
}
|
||||
|
||||
|
||||
@@ -132,10 +132,12 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
|
||||
HoodieTimer readTimer = new HoodieTimer().startTimer();
|
||||
Option<GenericRecord> baseRecord = baseFileReader.getRecordByKey(key);
|
||||
if (baseRecord.isPresent()) {
|
||||
hoodieRecord = tableConfig.populateMetaFields() ? SpillableMapUtils.convertToHoodieRecordPayload(baseRecord.get(),
|
||||
tableConfig.getPayloadClass()) : SpillableMapUtils.convertToHoodieRecordPayload(baseRecord.get(),
|
||||
tableConfig.getPayloadClass(), Pair.of(tableConfig.getRecordKeyFieldProp(),
|
||||
tableConfig.getPartitionFieldProp()));
|
||||
hoodieRecord = tableConfig.populateMetaFields()
|
||||
? SpillableMapUtils.convertToHoodieRecordPayload(baseRecord.get(), tableConfig.getPayloadClass(), false)
|
||||
: SpillableMapUtils.convertToHoodieRecordPayload(
|
||||
baseRecord.get(),
|
||||
tableConfig.getPayloadClass(),
|
||||
Pair.of(tableConfig.getRecordKeyFieldProp(), tableConfig.getPartitionFieldProp()), false);
|
||||
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.BASEFILE_READ_STR, readTimer.endTimer()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ public class HoodieMetadataMergedLogRecordScanner extends HoodieMergedLogRecordS
|
||||
String spillableMapBasePath, Set<String> mergeKeyFilter,
|
||||
ExternalSpillableMap.DiskMapType diskMapType, boolean isBitCaskDiskMapCompressionEnabled) {
|
||||
super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, maxMemorySizeInBytes, false, false, bufferSize,
|
||||
spillableMapBasePath, Option.empty(), false, diskMapType, isBitCaskDiskMapCompressionEnabled);
|
||||
spillableMapBasePath, Option.empty(), false, diskMapType, isBitCaskDiskMapCompressionEnabled, false);
|
||||
this.mergeKeyFilter = mergeKeyFilter;
|
||||
|
||||
performScan();
|
||||
|
||||
Reference in New Issue
Block a user