[HUDI-2439] Replace RDD with HoodieData in HoodieSparkTable and commit executors (#4856)
- Adopt HoodieData in Spark action commit executors - Make Spark independent DeleteHelper, WriteHelper, MergeHelper in hudi-client-common - Make HoodieTable in WriteClient APIs have raw type to decouple with Client's generic types
This commit is contained in:
@@ -18,8 +18,6 @@
|
||||
|
||||
package org.apache.hudi.client;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hudi.client.common.HoodieJavaEngineContext;
|
||||
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
||||
import org.apache.hudi.common.data.HoodieList;
|
||||
@@ -43,6 +41,9 @@ import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.HoodieWriteMetadata;
|
||||
import org.apache.hudi.table.upgrade.JavaUpgradeDowngradeHelper;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -88,9 +89,9 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
}
|
||||
|
||||
@Override
|
||||
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> createTable(HoodieWriteConfig config,
|
||||
Configuration hadoopConf,
|
||||
boolean refreshTimeline) {
|
||||
protected HoodieTable createTable(HoodieWriteConfig config,
|
||||
Configuration hadoopConf,
|
||||
boolean refreshTimeline) {
|
||||
return HoodieJavaTable.create(config, context);
|
||||
}
|
||||
|
||||
@@ -152,7 +153,7 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
@Override
|
||||
public List<WriteStatus> bulkInsert(List<HoodieRecord<T>> records,
|
||||
String instantTime,
|
||||
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner) {
|
||||
Option<BulkInsertPartitioner> userDefinedBulkInsertPartitioner) {
|
||||
throw new HoodieNotSupportedException("BulkInsert is not supported in HoodieJavaClient");
|
||||
}
|
||||
|
||||
@@ -166,7 +167,7 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
@Override
|
||||
public List<WriteStatus> bulkInsertPreppedRecords(List<HoodieRecord<T>> preppedRecords,
|
||||
String instantTime,
|
||||
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner) {
|
||||
Option<BulkInsertPartitioner> bulkInsertPartitioner) {
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table =
|
||||
initTable(WriteOperationType.BULK_INSERT_PREPPED, Option.ofNullable(instantTime));
|
||||
table.validateInsertSchema();
|
||||
@@ -188,7 +189,7 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
@Override
|
||||
protected List<WriteStatus> postWrite(HoodieWriteMetadata<List<WriteStatus>> result,
|
||||
String instantTime,
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> hoodieTable) {
|
||||
HoodieTable hoodieTable) {
|
||||
if (result.getIndexLookupDuration().isPresent()) {
|
||||
metrics.updateIndexMetrics(getOperationType().name(), result.getIndexUpdateDuration().get().toMillis());
|
||||
}
|
||||
@@ -215,7 +216,7 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
|
||||
@Override
|
||||
protected void completeCompaction(HoodieCommitMetadata metadata,
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table,
|
||||
HoodieTable table,
|
||||
String compactionCommitTime) {
|
||||
throw new HoodieNotSupportedException("CompleteCompaction is not supported in HoodieJavaClient");
|
||||
}
|
||||
@@ -232,7 +233,7 @@ public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
|
||||
}
|
||||
|
||||
@Override
|
||||
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime) {
|
||||
protected HoodieTable doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime) {
|
||||
// new JavaUpgradeDowngrade(metaClient, config, context).run(metaClient, HoodieTableVersion.current(), config, context, instantTime);
|
||||
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
|
||||
@@ -114,7 +114,7 @@ public class HoodieJavaCopyOnWriteTable<T extends HoodieRecordPayload>
|
||||
public HoodieWriteMetadata<List<WriteStatus>> bulkInsert(HoodieEngineContext context,
|
||||
String instantTime,
|
||||
List<HoodieRecord<T>> records,
|
||||
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner) {
|
||||
Option<BulkInsertPartitioner> bulkInsertPartitioner) {
|
||||
return new JavaBulkInsertCommitActionExecutor((HoodieJavaEngineContext) context, config,
|
||||
this, instantTime, records, bulkInsertPartitioner).execute();
|
||||
}
|
||||
@@ -152,7 +152,7 @@ public class HoodieJavaCopyOnWriteTable<T extends HoodieRecordPayload>
|
||||
public HoodieWriteMetadata<List<WriteStatus>> bulkInsertPrepped(HoodieEngineContext context,
|
||||
String instantTime,
|
||||
List<HoodieRecord<T>> preppedRecords,
|
||||
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner) {
|
||||
Option<BulkInsertPartitioner> bulkInsertPartitioner) {
|
||||
return new JavaBulkInsertPreppedCommitActionExecutor((HoodieJavaEngineContext) context, config,
|
||||
this, instantTime, preppedRecords, bulkInsertPartitioner).execute();
|
||||
}
|
||||
|
||||
@@ -61,7 +61,7 @@ public class HoodieJavaMergeOnReadTable<T extends HoodieRecordPayload> extends H
|
||||
public HoodieWriteMetadata<List<WriteStatus>> bulkInsertPrepped(HoodieEngineContext context,
|
||||
String instantTime,
|
||||
List<HoodieRecord<T>> preppedRecords,
|
||||
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner) {
|
||||
Option<BulkInsertPartitioner> bulkInsertPartitioner) {
|
||||
return new JavaBulkInsertPreppedCommitActionExecutor((HoodieJavaEngineContext) context, config,
|
||||
this, instantTime, preppedRecords, bulkInsertPartitioner).execute();
|
||||
}
|
||||
|
||||
@@ -36,17 +36,17 @@ import java.util.Map;
|
||||
public class JavaBulkInsertCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseJavaCommitActionExecutor<T> {
|
||||
|
||||
private final List<HoodieRecord<T>> inputRecords;
|
||||
private final Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner;
|
||||
private final Option<BulkInsertPartitioner> bulkInsertPartitioner;
|
||||
|
||||
public JavaBulkInsertCommitActionExecutor(HoodieJavaEngineContext context, HoodieWriteConfig config, HoodieTable table,
|
||||
String instantTime, List<HoodieRecord<T>> inputRecords,
|
||||
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner) {
|
||||
Option<BulkInsertPartitioner> bulkInsertPartitioner) {
|
||||
this(context, config, table, instantTime, inputRecords, bulkInsertPartitioner, Option.empty());
|
||||
}
|
||||
|
||||
public JavaBulkInsertCommitActionExecutor(HoodieJavaEngineContext context, HoodieWriteConfig config, HoodieTable table,
|
||||
String instantTime, List<HoodieRecord<T>> inputRecords,
|
||||
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner,
|
||||
Option<BulkInsertPartitioner> bulkInsertPartitioner,
|
||||
Option<Map<String, String>> extraMetadata) {
|
||||
super(context, config, table, instantTime, WriteOperationType.BULK_INSERT, extraMetadata);
|
||||
this.inputRecords = inputRecords;
|
||||
|
||||
@@ -65,7 +65,7 @@ public class JavaBulkInsertHelper<T extends HoodieRecordPayload, R> extends Base
|
||||
final HoodieWriteConfig config,
|
||||
final BaseCommitActionExecutor<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>, R> executor,
|
||||
final boolean performDedupe,
|
||||
final Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner) {
|
||||
final Option<BulkInsertPartitioner> userDefinedBulkInsertPartitioner) {
|
||||
HoodieWriteMetadata result = new HoodieWriteMetadata();
|
||||
|
||||
// It's possible the transition to inflight could have already happened.
|
||||
@@ -89,7 +89,7 @@ public class JavaBulkInsertHelper<T extends HoodieRecordPayload, R> extends Base
|
||||
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table,
|
||||
HoodieWriteConfig config,
|
||||
boolean performDedupe,
|
||||
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner,
|
||||
Option<BulkInsertPartitioner> userDefinedBulkInsertPartitioner,
|
||||
boolean useWriterSchema,
|
||||
int parallelism,
|
||||
WriteHandleFactory writeHandleFactory) {
|
||||
@@ -106,6 +106,7 @@ public class JavaBulkInsertHelper<T extends HoodieRecordPayload, R> extends Base
|
||||
BulkInsertPartitioner partitioner = userDefinedBulkInsertPartitioner.isPresent()
|
||||
? userDefinedBulkInsertPartitioner.get()
|
||||
: JavaBulkInsertInternalPartitionerFactory.get(config.getBulkInsertSortMode());
|
||||
// only List is supported for Java partitioner, but it is not enforced by BulkInsertPartitioner API. To improve this, TODO HUDI-3463
|
||||
repartitionedRecords = (List<HoodieRecord<T>>) partitioner.repartitionRecords(dedupedRecords, parallelism);
|
||||
|
||||
FileIdPrefixProvider fileIdPrefixProvider = (FileIdPrefixProvider) ReflectionUtils.loadClass(
|
||||
|
||||
@@ -36,12 +36,12 @@ public class JavaBulkInsertPreppedCommitActionExecutor<T extends HoodieRecordPay
|
||||
extends BaseJavaCommitActionExecutor<T> {
|
||||
|
||||
private final List<HoodieRecord<T>> preppedInputRecord;
|
||||
private final Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner;
|
||||
private final Option<BulkInsertPartitioner> userDefinedBulkInsertPartitioner;
|
||||
|
||||
public JavaBulkInsertPreppedCommitActionExecutor(HoodieJavaEngineContext context,
|
||||
HoodieWriteConfig config, HoodieTable table,
|
||||
String instantTime, List<HoodieRecord<T>> preppedInputRecord,
|
||||
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner) {
|
||||
Option<BulkInsertPartitioner> userDefinedBulkInsertPartitioner) {
|
||||
super(context, config, table, instantTime, WriteOperationType.BULK_INSERT);
|
||||
this.preppedInputRecord = preppedInputRecord;
|
||||
this.userDefinedBulkInsertPartitioner = userDefinedBulkInsertPartitioner;
|
||||
|
||||
Reference in New Issue
Block a user