1
0

[HUDI-2439] Replace RDD with HoodieData in HoodieSparkTable and commit executors (#4856)

- Adopt HoodieData in Spark action commit executors
- Make Spark independent DeleteHelper, WriteHelper, MergeHelper in hudi-client-common
- Make HoodieTable in WriteClient APIs have raw type to decouple with Client's generic types
This commit is contained in:
Raymond Xu
2022-03-17 19:17:56 +08:00
committed by GitHub
parent bf191f8d46
commit 7446ff95a7
69 changed files with 723 additions and 769 deletions

View File

@@ -18,8 +18,6 @@
package org.apache.hudi.client;
import com.codahale.metrics.Timer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hudi.async.AsyncCleanerService;
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
import org.apache.hudi.common.data.HoodieList;
@@ -55,6 +53,7 @@ import org.apache.hudi.io.HoodieWriteHandle;
import org.apache.hudi.io.MiniBatchHandle;
import org.apache.hudi.metadata.FlinkHoodieBackedTableMetadataWriter;
import org.apache.hudi.metadata.HoodieBackedTableMetadataWriter;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.HoodieFlinkTable;
import org.apache.hudi.table.HoodieTable;
@@ -64,6 +63,9 @@ import org.apache.hudi.table.marker.WriteMarkersFactory;
import org.apache.hudi.table.upgrade.FlinkUpgradeDowngradeHelper;
import org.apache.hudi.table.upgrade.UpgradeDowngrade;
import org.apache.hudi.util.FlinkClientUtil;
import com.codahale.metrics.Timer;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -111,8 +113,8 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
}
@Override
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> createTable(HoodieWriteConfig config, Configuration hadoopConf,
boolean refreshTimeline) {
protected HoodieTable createTable(HoodieWriteConfig config, Configuration hadoopConf,
boolean refreshTimeline) {
return HoodieFlinkTable.create(config, (HoodieFlinkEngineContext) context);
}
@@ -226,12 +228,12 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
}
@Override
public List<WriteStatus> bulkInsert(List<HoodieRecord<T>> records, String instantTime, Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner) {
public List<WriteStatus> bulkInsert(List<HoodieRecord<T>> records, String instantTime, Option<BulkInsertPartitioner> userDefinedBulkInsertPartitioner) {
throw new HoodieNotSupportedException("BulkInsert operation is not supported yet");
}
@Override
public List<WriteStatus> bulkInsertPreppedRecords(List<HoodieRecord<T>> preppedRecords, String instantTime, Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner) {
public List<WriteStatus> bulkInsertPreppedRecords(List<HoodieRecord<T>> preppedRecords, String instantTime, Option<BulkInsertPartitioner> bulkInsertPartitioner) {
throw new HoodieNotSupportedException("BulkInsertPrepped operation is not supported yet");
}
@@ -304,7 +306,7 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
@Override
protected List<WriteStatus> postWrite(HoodieWriteMetadata<List<WriteStatus>> result,
String instantTime,
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> hoodieTable) {
HoodieTable hoodieTable) {
if (result.getIndexLookupDuration().isPresent()) {
metrics.updateIndexMetrics(getOperationType().name(), result.getIndexUpdateDuration().get().toMillis());
}
@@ -324,7 +326,7 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
* @param extraMetadata Additional Metadata passed by user
*/
@Override
protected void postCommit(HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table,
protected void postCommit(HoodieTable table,
HoodieCommitMetadata metadata,
String instantTime,
Option<Map<String, String>> extraMetadata) {
@@ -351,7 +353,7 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
@Override
public void completeCompaction(
HoodieCommitMetadata metadata,
HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table,
HoodieTable table,
String compactionCommitTime) {
this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status and commit compaction");
List<HoodieWriteStat> writeStats = metadata.getWriteStats();
@@ -363,7 +365,7 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
// Do not do any conflict resolution here as we do with regular writes. We take the lock here to ensure all writes to metadata table happens within a
// single lock (single writer). Because more than one write to metadata table will result in conflicts since all of them updates the same partition.
table.getMetadataWriter(compactionInstant.getTimestamp()).ifPresent(
w -> w.update(metadata, compactionInstant.getTimestamp(), table.isTableServiceAction(compactionInstant.getAction())));
w -> ((HoodieTableMetadataWriter) w).update(metadata, compactionInstant.getTimestamp(), table.isTableServiceAction(compactionInstant.getAction())));
LOG.info("Committing Compaction {} finished with result {}.", compactionCommitTime, metadata);
CompactHelpers.getInstance().completeInflightCompaction(table, compactionCommitTime, metadata);
} finally {
@@ -396,7 +398,7 @@ public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
}
@Override
protected HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime) {
protected HoodieTable doInitTable(HoodieTableMetaClient metaClient, Option<String> instantTime) {
// Create a Hoodie table which encapsulated the commits and files visible
return getHoodieTable();
}

View File

@@ -68,6 +68,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
@@ -231,7 +232,7 @@ public class HoodieFlinkCopyOnWriteTable<T extends HoodieRecordPayload>
public HoodieWriteMetadata<List<WriteStatus>> bulkInsert(HoodieEngineContext context,
String instantTime,
List<HoodieRecord<T>> records,
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner) {
Option<BulkInsertPartitioner> bulkInsertPartitioner) {
throw new HoodieNotSupportedException("BulkInsert is not supported yet");
}
@@ -264,7 +265,7 @@ public class HoodieFlinkCopyOnWriteTable<T extends HoodieRecordPayload>
public HoodieWriteMetadata<List<WriteStatus>> bulkInsertPrepped(HoodieEngineContext context,
String instantTime,
List<HoodieRecord<T>> preppedRecords,
Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> bulkInsertPartitioner) {
Option<BulkInsertPartitioner> bulkInsertPartitioner) {
throw new HoodieNotSupportedException("BulkInsertPrepped is not supported yet");
}