1
0

[HUDI-1013] Adding Bulk Insert V2 implementation (#1834)

- Adding ability to use native spark row writing for bulk_insert
 - Controlled by `ENABLE_ROW_WRITER_OPT_KEY` datasource write option
 - Introduced KeyGeneratorInterface in hudi-client, moved KeyGenerator back to hudi-spark
 - Simplified the new API additions to just two new methods : getRecordKey(row), getPartitionPath(row)
 - Fixed all built-in key generators with new APIs
 - Made the field position map lazily created upon the first call to row based apis
 - Implemented native row based key generators for CustomKeyGenerator
 - Fixed all the tests, with these new APIs

Co-authored-by: Balaji Varadarajan <varadarb@uber.com>
Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
Sivabalan Narayanan
2020-08-13 03:33:39 -04:00
committed by GitHub
parent 8d04268264
commit 379cf0786f
62 changed files with 4682 additions and 485 deletions

View File

@@ -18,12 +18,16 @@
package org.apache.hudi.common.model;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.Option;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
import org.apache.hudi.common.util.collection.Pair;
/**
* A Single Record managed by Hoodie.
@@ -40,6 +44,10 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
CollectionUtils.createImmutableList(COMMIT_TIME_METADATA_FIELD, COMMIT_SEQNO_METADATA_FIELD,
RECORD_KEY_METADATA_FIELD, PARTITION_PATH_METADATA_FIELD, FILENAME_METADATA_FIELD);
public static final Map<String, Integer> HOODIE_META_COLUMNS_NAME_TO_POS =
IntStream.range(0, HOODIE_META_COLUMNS.size()).mapToObj(idx -> Pair.of(HOODIE_META_COLUMNS.get(idx), idx))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
/**
* Identifies the record across the table.
*/