[HUDI-2009] Fixing extra commit metadata in row writer path (#3075)
This commit is contained in:
committed by
GitHub
parent
1d3cd06572
commit
8c0dbaa9b3
@@ -67,6 +67,6 @@ public class DefaultSource extends BaseDefaultSource implements DataSourceV2,
|
||||
options.get(HoodieInternalConfig.BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED).isPresent()
|
||||
? options.get(HoodieInternalConfig.BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED).get() : null);
|
||||
return Optional.of(new HoodieDataSourceInternalWriter(instantTime, config, schema, getSparkSession(),
|
||||
getConfiguration(), arePartitionRecordsSorted));
|
||||
getConfiguration(), options, arePartitionRecordsSorted));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.internal;
|
||||
|
||||
import org.apache.hudi.DataSourceUtils;
|
||||
import org.apache.hudi.client.HoodieInternalWriteStatus;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.model.WriteOperationType;
|
||||
@@ -26,13 +27,16 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.sql.catalyst.InternalRow;
|
||||
import org.apache.spark.sql.sources.v2.DataSourceOptions;
|
||||
import org.apache.spark.sql.sources.v2.writer.DataSourceWriter;
|
||||
import org.apache.spark.sql.sources.v2.writer.DataWriterFactory;
|
||||
import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
|
||||
import org.apache.spark.sql.types.StructType;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
@@ -46,15 +50,18 @@ public class HoodieDataSourceInternalWriter implements DataSourceWriter {
|
||||
private final StructType structType;
|
||||
private final DataSourceInternalWriterHelper dataSourceInternalWriterHelper;
|
||||
private final Boolean arePartitionRecordsSorted;
|
||||
private Map<String, String> extraMetadataMap = new HashMap<>();
|
||||
|
||||
public HoodieDataSourceInternalWriter(String instantTime, HoodieWriteConfig writeConfig, StructType structType,
|
||||
SparkSession sparkSession, Configuration configuration, boolean arePartitionRecordsSorted) {
|
||||
SparkSession sparkSession, Configuration configuration, DataSourceOptions dataSourceOptions,
|
||||
boolean arePartitionRecordsSorted) {
|
||||
this.instantTime = instantTime;
|
||||
this.writeConfig = writeConfig;
|
||||
this.structType = structType;
|
||||
this.arePartitionRecordsSorted = arePartitionRecordsSorted;
|
||||
this.extraMetadataMap = DataSourceUtils.getExtraMetadata(dataSourceOptions.asMap());
|
||||
this.dataSourceInternalWriterHelper = new DataSourceInternalWriterHelper(instantTime, writeConfig, structType,
|
||||
sparkSession, configuration);
|
||||
sparkSession, configuration, extraMetadataMap);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -89,4 +96,5 @@ public class HoodieDataSourceInternalWriter implements DataSourceWriter {
|
||||
public void abort(WriterCommitMessage[] messages) {
|
||||
dataSourceInternalWriterHelper.abort();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
package org.apache.hudi.internal;
|
||||
|
||||
import org.apache.hudi.client.HoodieInternalWriteStatus;
|
||||
|
||||
import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
Reference in New Issue
Block a user