[HUDI-1013] Adding Bulk Insert V2 implementation (#1834)
- Adding ability to use native spark row writing for bulk_insert - Controlled by `ENABLE_ROW_WRITER_OPT_KEY` datasource write option - Introduced KeyGeneratorInterface in hudi-client, moved KeyGenerator back to hudi-spark - Simplified the new API additions to just two new methods : getRecordKey(row), getPartitionPath(row) - Fixed all built-in key generators with new APIs - Made the field position map lazily created upon the first call to row based apis - Implemented native row based key generators for CustomKeyGenerator - Fixed all the tests, with these new APIs Co-authored-by: Balaji Varadarajan <varadarb@uber.com> Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
committed by
GitHub
parent
8d04268264
commit
379cf0786f
@@ -18,12 +18,16 @@
|
||||
|
||||
package org.apache.hudi.common.model;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
|
||||
/**
|
||||
* A Single Record managed by Hoodie.
|
||||
@@ -40,6 +44,10 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
|
||||
CollectionUtils.createImmutableList(COMMIT_TIME_METADATA_FIELD, COMMIT_SEQNO_METADATA_FIELD,
|
||||
RECORD_KEY_METADATA_FIELD, PARTITION_PATH_METADATA_FIELD, FILENAME_METADATA_FIELD);
|
||||
|
||||
public static final Map<String, Integer> HOODIE_META_COLUMNS_NAME_TO_POS =
|
||||
IntStream.range(0, HOODIE_META_COLUMNS.size()).mapToObj(idx -> Pair.of(HOODIE_META_COLUMNS.get(idx), idx))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
|
||||
/**
|
||||
* Identifies the record across the table.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user