1
0

[HUDI-3995] Making perf optimizations for bulk insert row writer path (#5462)

- Avoid using udf for key generator for SimpleKeyGen and NonPartitionedKeyGen.
- Fixed NonPartitioned Key generator to directly fetch record key from row rather than involving GenericRecord.
- Other minor fixes around using static values instead of looking up hashmap.
This commit is contained in:
Sivabalan Narayanan
2022-05-09 12:40:22 -04:00
committed by GitHub
parent 6b47ef6ed2
commit 6285a239a3
20 changed files with 217 additions and 187 deletions

View File

@@ -400,7 +400,7 @@ public class HoodieAvroUtils {
copyOldValueOrSetDefault(genericRecord, newRecord, f);
}
// do not preserve FILENAME_METADATA_FIELD
newRecord.put(HoodieRecord.FILENAME_METADATA_FIELD_POS, fileName);
newRecord.put(HoodieRecord.FILENAME_META_FIELD_POS, fileName);
if (!GenericData.get().validate(newSchema, newRecord)) {
throw new SchemaCompatibilityException(
"Unable to validate the rewritten record " + genericRecord + " against schema " + newSchema);
@@ -412,7 +412,7 @@ public class HoodieAvroUtils {
public static GenericRecord rewriteEvolutionRecordWithMetadata(GenericRecord genericRecord, Schema newSchema, String fileName) {
GenericRecord newRecord = HoodieAvroUtils.rewriteRecordWithNewSchema(genericRecord, newSchema, new HashMap<>());
// do not preserve FILENAME_METADATA_FIELD
newRecord.put(HoodieRecord.FILENAME_METADATA_FIELD_POS, fileName);
newRecord.put(HoodieRecord.FILENAME_META_FIELD_POS, fileName);
return newRecord;
}

View File

@@ -42,8 +42,6 @@ public abstract class HoodieRecord<T> implements Serializable {
public static final String OPERATION_METADATA_FIELD = "_hoodie_operation";
public static final String HOODIE_IS_DELETED = "_hoodie_is_deleted";
public static int FILENAME_METADATA_FIELD_POS = 4;
public static final List<String> HOODIE_META_COLUMNS =
CollectionUtils.createImmutableList(COMMIT_TIME_METADATA_FIELD, COMMIT_SEQNO_METADATA_FIELD,
RECORD_KEY_METADATA_FIELD, PARTITION_PATH_METADATA_FIELD, FILENAME_METADATA_FIELD);
@@ -59,6 +57,10 @@ public abstract class HoodieRecord<T> implements Serializable {
IntStream.range(0, HOODIE_META_COLUMNS.size()).mapToObj(idx -> Pair.of(HOODIE_META_COLUMNS.get(idx), idx))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
public static int RECORD_KEY_META_FIELD_POS = HOODIE_META_COLUMNS_NAME_TO_POS.get(RECORD_KEY_METADATA_FIELD);
public static int PARTITION_PATH_META_FIELD_POS = HOODIE_META_COLUMNS_NAME_TO_POS.get(PARTITION_PATH_METADATA_FIELD);
public static int FILENAME_META_FIELD_POS = HOODIE_META_COLUMNS_NAME_TO_POS.get(FILENAME_METADATA_FIELD);
/**
* Identifies the record across the table.
*/

View File

@@ -384,12 +384,14 @@ public class HoodieTableMetaClient implements Serializable {
throw new HoodieException(HoodieTableConfig.POPULATE_META_FIELDS.key() + " already disabled for the table. Can't be re-enabled back");
}
// Meta fields can be disabled only when {@code SimpleKeyGenerator} is used
if (!getTableConfig().populateMetaFields()
&& !properties.getProperty(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key(), "org.apache.hudi.keygen.SimpleKeyGenerator")
.equals("org.apache.hudi.keygen.SimpleKeyGenerator")) {
throw new HoodieException("Only simple key generator is supported when meta fields are disabled. KeyGenerator used : "
+ properties.getProperty(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key()));
// meta fields can be disabled only with SimpleKeyGenerator, NonPartitioned and ComplexKeyGen.
if (!getTableConfig().populateMetaFields()) {
String keyGenClass = properties.getProperty(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key(), "org.apache.hudi.keygen.SimpleKeyGenerator");
if (!keyGenClass.equals("org.apache.hudi.keygen.SimpleKeyGenerator") && !keyGenClass.equals("org.apache.hudi.keygen.NonpartitionedKeyGenerator")
&& !keyGenClass.equals("org.apache.hudi.keygen.ComplexKeyGenerator")) {
throw new HoodieException("Only simple, non partitioned and complex key generator is supported when meta fields are disabled. KeyGenerator used : "
+ properties.getProperty(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key()));
}
}
}