1
0

[HUDI-2009] Fixing extra commit metadata in row writer path (#3075)

This commit is contained in:
Sivabalan Narayanan
2021-07-08 03:07:27 -04:00
committed by GitHub
parent 1d3cd06572
commit 8c0dbaa9b3
12 changed files with 180 additions and 29 deletions

View File

@@ -55,6 +55,7 @@ import org.apache.spark.sql.Row;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -132,6 +133,18 @@ public class DataSourceUtils {
}
}
public static Map<String, String> getExtraMetadata(Map<String, String> properties) {
Map<String, String> extraMetadataMap = new HashMap<>();
if (properties.containsKey(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY().key())) {
properties.entrySet().forEach(entry -> {
if (entry.getKey().startsWith(properties.get(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY().key()))) {
extraMetadataMap.put(entry.getKey(), entry.getValue());
}
});
}
return extraMetadataMap;
}
/**
* Create a payload class via reflection, do not ordering/precombine value.
*/

View File

@@ -38,8 +38,8 @@ import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.StructType;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Helper class for HoodieDataSourceInternalWriter used by Spark datasource v2.
@@ -54,14 +54,17 @@ public class DataSourceInternalWriterHelper {
private final SparkRDDWriteClient writeClient;
private final HoodieTable hoodieTable;
private final WriteOperationType operationType;
private Map<String, String> extraMetadata;
public DataSourceInternalWriterHelper(String instantTime, HoodieWriteConfig writeConfig, StructType structType,
SparkSession sparkSession, Configuration configuration) {
SparkSession sparkSession, Configuration configuration, Map<String, String> extraMetadata) {
this.instantTime = instantTime;
this.operationType = WriteOperationType.BULK_INSERT;
this.extraMetadata = extraMetadata;
this.writeClient = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(new JavaSparkContext(sparkSession.sparkContext())), writeConfig);
writeClient.setOperationType(operationType);
writeClient.startCommitWithTime(instantTime);
this.metaClient = HoodieTableMetaClient.builder().setConf(configuration).setBasePath(writeConfig.getBasePath()).build();
this.hoodieTable = HoodieSparkTable.create(writeConfig, new HoodieSparkEngineContext(new JavaSparkContext(sparkSession.sparkContext())), metaClient);
}
@@ -76,7 +79,7 @@ public class DataSourceInternalWriterHelper {
public void commit(List<HoodieWriteStat> writeStatList) {
try {
writeClient.commitStats(instantTime, writeStatList, Option.of(new HashMap<>()),
writeClient.commitStats(instantTime, writeStatList, Option.of(extraMetadata),
CommitUtils.getCommitActionType(operationType, metaClient.getTableType()));
} catch (Exception ioe) {
throw new HoodieException(ioe.getMessage(), ioe);