[HUDI-3995] Making perf optimizations for bulk insert row writer path (#5462)
- Avoid using udf for key generator for SimpleKeyGen and NonPartitionedKeyGen. - Fixed NonPartitioned Key generator to directly fetch record key from row rather than involving GenericRecord. - Other minor fixes around using static values instead of looking up hashmap.
This commit is contained in:
committed by
GitHub
parent
6b47ef6ed2
commit
6285a239a3
@@ -400,7 +400,7 @@ public class HoodieAvroUtils {
|
||||
copyOldValueOrSetDefault(genericRecord, newRecord, f);
|
||||
}
|
||||
// do not preserve FILENAME_METADATA_FIELD
|
||||
newRecord.put(HoodieRecord.FILENAME_METADATA_FIELD_POS, fileName);
|
||||
newRecord.put(HoodieRecord.FILENAME_META_FIELD_POS, fileName);
|
||||
if (!GenericData.get().validate(newSchema, newRecord)) {
|
||||
throw new SchemaCompatibilityException(
|
||||
"Unable to validate the rewritten record " + genericRecord + " against schema " + newSchema);
|
||||
@@ -412,7 +412,7 @@ public class HoodieAvroUtils {
|
||||
public static GenericRecord rewriteEvolutionRecordWithMetadata(GenericRecord genericRecord, Schema newSchema, String fileName) {
|
||||
GenericRecord newRecord = HoodieAvroUtils.rewriteRecordWithNewSchema(genericRecord, newSchema, new HashMap<>());
|
||||
// do not preserve FILENAME_METADATA_FIELD
|
||||
newRecord.put(HoodieRecord.FILENAME_METADATA_FIELD_POS, fileName);
|
||||
newRecord.put(HoodieRecord.FILENAME_META_FIELD_POS, fileName);
|
||||
return newRecord;
|
||||
}
|
||||
|
||||
|
||||
@@ -42,8 +42,6 @@ public abstract class HoodieRecord<T> implements Serializable {
|
||||
public static final String OPERATION_METADATA_FIELD = "_hoodie_operation";
|
||||
public static final String HOODIE_IS_DELETED = "_hoodie_is_deleted";
|
||||
|
||||
public static int FILENAME_METADATA_FIELD_POS = 4;
|
||||
|
||||
public static final List<String> HOODIE_META_COLUMNS =
|
||||
CollectionUtils.createImmutableList(COMMIT_TIME_METADATA_FIELD, COMMIT_SEQNO_METADATA_FIELD,
|
||||
RECORD_KEY_METADATA_FIELD, PARTITION_PATH_METADATA_FIELD, FILENAME_METADATA_FIELD);
|
||||
@@ -59,6 +57,10 @@ public abstract class HoodieRecord<T> implements Serializable {
|
||||
IntStream.range(0, HOODIE_META_COLUMNS.size()).mapToObj(idx -> Pair.of(HOODIE_META_COLUMNS.get(idx), idx))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
|
||||
public static int RECORD_KEY_META_FIELD_POS = HOODIE_META_COLUMNS_NAME_TO_POS.get(RECORD_KEY_METADATA_FIELD);
|
||||
public static int PARTITION_PATH_META_FIELD_POS = HOODIE_META_COLUMNS_NAME_TO_POS.get(PARTITION_PATH_METADATA_FIELD);
|
||||
public static int FILENAME_META_FIELD_POS = HOODIE_META_COLUMNS_NAME_TO_POS.get(FILENAME_METADATA_FIELD);
|
||||
|
||||
/**
|
||||
* Identifies the record across the table.
|
||||
*/
|
||||
|
||||
@@ -384,12 +384,14 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
throw new HoodieException(HoodieTableConfig.POPULATE_META_FIELDS.key() + " already disabled for the table. Can't be re-enabled back");
|
||||
}
|
||||
|
||||
// Meta fields can be disabled only when {@code SimpleKeyGenerator} is used
|
||||
if (!getTableConfig().populateMetaFields()
|
||||
&& !properties.getProperty(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key(), "org.apache.hudi.keygen.SimpleKeyGenerator")
|
||||
.equals("org.apache.hudi.keygen.SimpleKeyGenerator")) {
|
||||
throw new HoodieException("Only simple key generator is supported when meta fields are disabled. KeyGenerator used : "
|
||||
+ properties.getProperty(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key()));
|
||||
// meta fields can be disabled only with SimpleKeyGenerator, NonPartitioned and ComplexKeyGen.
|
||||
if (!getTableConfig().populateMetaFields()) {
|
||||
String keyGenClass = properties.getProperty(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key(), "org.apache.hudi.keygen.SimpleKeyGenerator");
|
||||
if (!keyGenClass.equals("org.apache.hudi.keygen.SimpleKeyGenerator") && !keyGenClass.equals("org.apache.hudi.keygen.NonpartitionedKeyGenerator")
|
||||
&& !keyGenClass.equals("org.apache.hudi.keygen.ComplexKeyGenerator")) {
|
||||
throw new HoodieException("Only simple, non partitioned and complex key generator is supported when meta fields are disabled. KeyGenerator used : "
|
||||
+ properties.getProperty(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user