[HUDI-3701] Flink bulk_insert support bucket hash index (#5118)
This commit is contained in:
@@ -38,11 +38,15 @@ public class BucketIdentifier {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static int getBucketId(HoodieKey hoodieKey, String indexKeyFields, int numBuckets) {
|
public static int getBucketId(HoodieKey hoodieKey, String indexKeyFields, int numBuckets) {
|
||||||
|
return getBucketId(hoodieKey.getRecordKey(), indexKeyFields, numBuckets);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int getBucketId(String recordKey, String indexKeyFields, int numBuckets) {
|
||||||
List<String> hashKeyFields;
|
List<String> hashKeyFields;
|
||||||
if (!hoodieKey.getRecordKey().contains(":")) {
|
if (!recordKey.contains(":")) {
|
||||||
hashKeyFields = Collections.singletonList(hoodieKey.getRecordKey());
|
hashKeyFields = Collections.singletonList(recordKey);
|
||||||
} else {
|
} else {
|
||||||
Map<String, String> recordKeyPairs = Arrays.stream(hoodieKey.getRecordKey().split(","))
|
Map<String, String> recordKeyPairs = Arrays.stream(recordKey.split(","))
|
||||||
.map(p -> p.split(":"))
|
.map(p -> p.split(":"))
|
||||||
.collect(Collectors.toMap(p -> p[0], p -> p[1]));
|
.collect(Collectors.toMap(p -> p[0], p -> p[1]));
|
||||||
hashKeyFields = Arrays.stream(indexKeyFields.split(","))
|
hashKeyFields = Arrays.stream(indexKeyFields.split(","))
|
||||||
|
|||||||
@@ -0,0 +1,73 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.sink.bucket;
|
||||||
|
|
||||||
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
|
import org.apache.hudi.index.bucket.BucketIdentifier;
|
||||||
|
import org.apache.hudi.io.storage.row.HoodieRowDataCreateHandle;
|
||||||
|
import org.apache.hudi.sink.bulk.BulkInsertWriterHelper;
|
||||||
|
import org.apache.hudi.table.HoodieTable;
|
||||||
|
|
||||||
|
import org.apache.flink.configuration.Configuration;
|
||||||
|
import org.apache.flink.table.data.RowData;
|
||||||
|
import org.apache.flink.table.types.logical.RowType;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper class for bucket index bulk insert used by Flink.
|
||||||
|
*/
|
||||||
|
public class BucketBulkInsertWriterHelper extends BulkInsertWriterHelper {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(BucketBulkInsertWriterHelper.class);
|
||||||
|
|
||||||
|
private final int bucketNum;
|
||||||
|
private final String indexKeyFields;
|
||||||
|
|
||||||
|
public BucketBulkInsertWriterHelper(Configuration conf, HoodieTable<?, ?, ?, ?> hoodieTable, HoodieWriteConfig writeConfig,
|
||||||
|
String instantTime, int taskPartitionId, long taskId, long taskEpochId, RowType rowType) {
|
||||||
|
super(conf, hoodieTable, writeConfig, instantTime, taskPartitionId, taskId, taskEpochId, rowType);
|
||||||
|
this.bucketNum = conf.getInteger(FlinkOptions.BUCKET_INDEX_NUM_BUCKETS);
|
||||||
|
this.indexKeyFields = conf.getString(FlinkOptions.INDEX_KEY_FIELD);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void write(RowData record) throws IOException {
|
||||||
|
try {
|
||||||
|
String recordKey = keyGen.getRecordKey(record);
|
||||||
|
String partitionPath = keyGen.getPartitionPath(record);
|
||||||
|
final int bucketNum = BucketIdentifier.getBucketId(recordKey, indexKeyFields, this.bucketNum);
|
||||||
|
String fileId = BucketIdentifier.newBucketFileIdPrefix(bucketNum);
|
||||||
|
getRowCreateHandle(partitionPath, fileId).write(recordKey, partitionPath, record);
|
||||||
|
} catch (Throwable throwable) {
|
||||||
|
LOG.error("Global error thrown while trying to write records in HoodieRowDataCreateHandle", throwable);
|
||||||
|
throw throwable;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private HoodieRowDataCreateHandle getRowCreateHandle(String partitionPath, String fileId) {
|
||||||
|
if (!handles.containsKey(fileId)) { // if there is no handle corresponding to the fileId
|
||||||
|
HoodieRowDataCreateHandle rowCreateHandle = new HoodieRowDataCreateHandle(hoodieTable, writeConfig, partitionPath, fileId,
|
||||||
|
instantTime, taskPartitionId, taskId, taskEpochId, rowType);
|
||||||
|
handles.put(fileId, rowCreateHandle);
|
||||||
|
}
|
||||||
|
return handles.get(fileId);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -16,7 +16,7 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hudi.sink;
|
package org.apache.hudi.sink.bucket;
|
||||||
|
|
||||||
import org.apache.hudi.common.model.FileSlice;
|
import org.apache.hudi.common.model.FileSlice;
|
||||||
import org.apache.hudi.common.model.HoodieKey;
|
import org.apache.hudi.common.model.HoodieKey;
|
||||||
@@ -26,6 +26,7 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
|
|||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.configuration.FlinkOptions;
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
import org.apache.hudi.index.bucket.BucketIdentifier;
|
import org.apache.hudi.index.bucket.BucketIdentifier;
|
||||||
|
import org.apache.hudi.sink.StreamWriteFunction;
|
||||||
import org.apache.hudi.table.HoodieFlinkTable;
|
import org.apache.hudi.table.HoodieFlinkTable;
|
||||||
|
|
||||||
import org.apache.flink.configuration.Configuration;
|
import org.apache.flink.configuration.Configuration;
|
||||||
@@ -16,7 +16,7 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hudi.sink;
|
package org.apache.hudi.sink.bucket;
|
||||||
|
|
||||||
import org.apache.hudi.sink.common.AbstractWriteOperator;
|
import org.apache.hudi.sink.common.AbstractWriteOperator;
|
||||||
import org.apache.hudi.sink.common.WriteOperatorFactory;
|
import org.apache.hudi.sink.common.WriteOperatorFactory;
|
||||||
@@ -167,7 +167,7 @@ public class BulkInsertWriteFunction<I>
|
|||||||
|
|
||||||
private void initWriterHelper() {
|
private void initWriterHelper() {
|
||||||
String instant = instantToWrite();
|
String instant = instantToWrite();
|
||||||
this.writerHelper = new BulkInsertWriterHelper(this.config, this.writeClient.getHoodieTable(), this.writeClient.getConfig(),
|
this.writerHelper = WriterHelpers.getWriterHelper(this.config, this.writeClient.getHoodieTable(), this.writeClient.getConfig(),
|
||||||
instant, this.taskID, getRuntimeContext().getNumberOfParallelSubtasks(), getRuntimeContext().getAttemptNumber(),
|
instant, this.taskID, getRuntimeContext().getNumberOfParallelSubtasks(), getRuntimeContext().getAttemptNumber(),
|
||||||
this.rowType);
|
this.rowType);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -50,21 +50,21 @@ public class BulkInsertWriterHelper {
|
|||||||
|
|
||||||
private static final Logger LOG = LogManager.getLogger(BulkInsertWriterHelper.class);
|
private static final Logger LOG = LogManager.getLogger(BulkInsertWriterHelper.class);
|
||||||
|
|
||||||
private final String instantTime;
|
protected final String instantTime;
|
||||||
private final int taskPartitionId;
|
protected final int taskPartitionId;
|
||||||
private final long taskId;
|
protected final long taskId;
|
||||||
private final long taskEpochId;
|
protected final long taskEpochId;
|
||||||
private final HoodieTable hoodieTable;
|
protected final HoodieTable hoodieTable;
|
||||||
private final HoodieWriteConfig writeConfig;
|
protected final HoodieWriteConfig writeConfig;
|
||||||
private final RowType rowType;
|
protected final RowType rowType;
|
||||||
private final Boolean arePartitionRecordsSorted;
|
private final Boolean arePartitionRecordsSorted;
|
||||||
private final List<HoodieInternalWriteStatus> writeStatusList = new ArrayList<>();
|
private final List<HoodieInternalWriteStatus> writeStatusList = new ArrayList<>();
|
||||||
private HoodieRowDataCreateHandle handle;
|
private HoodieRowDataCreateHandle handle;
|
||||||
private String lastKnownPartitionPath = null;
|
private String lastKnownPartitionPath = null;
|
||||||
private final String fileIdPrefix;
|
private final String fileIdPrefix;
|
||||||
private int numFilesWritten = 0;
|
private int numFilesWritten = 0;
|
||||||
private final Map<String, HoodieRowDataCreateHandle> handles = new HashMap<>();
|
protected final Map<String, HoodieRowDataCreateHandle> handles = new HashMap<>();
|
||||||
private final RowDataKeyGen keyGen;
|
protected final RowDataKeyGen keyGen;
|
||||||
|
|
||||||
public BulkInsertWriterHelper(Configuration conf, HoodieTable hoodieTable, HoodieWriteConfig writeConfig,
|
public BulkInsertWriterHelper(Configuration conf, HoodieTable hoodieTable, HoodieWriteConfig writeConfig,
|
||||||
String instantTime, int taskPartitionId, long taskId, long taskEpochId, RowType rowType) {
|
String instantTime, int taskPartitionId, long taskId, long taskEpochId, RowType rowType) {
|
||||||
|
|||||||
@@ -0,0 +1,39 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.sink.bulk;
|
||||||
|
|
||||||
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
|
import org.apache.hudi.configuration.OptionsResolver;
|
||||||
|
import org.apache.hudi.sink.bucket.BucketBulkInsertWriterHelper;
|
||||||
|
import org.apache.hudi.table.HoodieTable;
|
||||||
|
|
||||||
|
import org.apache.flink.configuration.Configuration;
|
||||||
|
import org.apache.flink.table.types.logical.RowType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory clazz to generate bulk insert writer helpers.
|
||||||
|
*/
|
||||||
|
public class WriterHelpers {
|
||||||
|
public static BulkInsertWriterHelper getWriterHelper(Configuration conf, HoodieTable<?, ?, ?, ?> hoodieTable, HoodieWriteConfig writeConfig,
|
||||||
|
String instantTime, int taskPartitionId, long taskId, long taskEpochId, RowType rowType) {
|
||||||
|
return OptionsResolver.isBucketIndexType(conf)
|
||||||
|
? new BucketBulkInsertWriterHelper(conf, hoodieTable, writeConfig, instantTime, taskPartitionId, taskId, taskEpochId, rowType)
|
||||||
|
: new BulkInsertWriterHelper(conf, hoodieTable, writeConfig, instantTime, taskPartitionId, taskId, taskEpochId, rowType);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -18,7 +18,6 @@
|
|||||||
|
|
||||||
package org.apache.hudi.sink.partitioner;
|
package org.apache.hudi.sink.partitioner;
|
||||||
|
|
||||||
import org.apache.hudi.common.model.HoodieKey;
|
|
||||||
import org.apache.hudi.index.bucket.BucketIdentifier;
|
import org.apache.hudi.index.bucket.BucketIdentifier;
|
||||||
|
|
||||||
import org.apache.flink.api.common.functions.Partitioner;
|
import org.apache.flink.api.common.functions.Partitioner;
|
||||||
@@ -29,7 +28,7 @@ import org.apache.flink.api.common.functions.Partitioner;
|
|||||||
*
|
*
|
||||||
* @param <T> The type of obj to hash
|
* @param <T> The type of obj to hash
|
||||||
*/
|
*/
|
||||||
public class BucketIndexPartitioner<T extends HoodieKey> implements Partitioner<T> {
|
public class BucketIndexPartitioner<T extends String> implements Partitioner<T> {
|
||||||
|
|
||||||
private final int bucketNum;
|
private final int bucketNum;
|
||||||
private final String indexKeyFields;
|
private final String indexKeyFields;
|
||||||
@@ -40,7 +39,7 @@ public class BucketIndexPartitioner<T extends HoodieKey> implements Partitioner<
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int partition(HoodieKey key, int numPartitions) {
|
public int partition(String key, int numPartitions) {
|
||||||
int curBucket = BucketIdentifier.getBucketId(key, indexKeyFields, bucketNum);
|
int curBucket = BucketIdentifier.getBucketId(key, indexKeyFields, bucketNum);
|
||||||
return BucketIdentifier.mod(curBucket, numPartitions);
|
return BucketIdentifier.mod(curBucket, numPartitions);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,16 +18,15 @@
|
|||||||
|
|
||||||
package org.apache.hudi.sink.utils;
|
package org.apache.hudi.sink.utils;
|
||||||
|
|
||||||
import org.apache.hudi.common.model.HoodieKey;
|
|
||||||
import org.apache.hudi.common.model.HoodieRecord;
|
import org.apache.hudi.common.model.HoodieRecord;
|
||||||
import org.apache.hudi.configuration.FlinkOptions;
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
import org.apache.hudi.configuration.OptionsResolver;
|
import org.apache.hudi.configuration.OptionsResolver;
|
||||||
import org.apache.hudi.sink.BucketStreamWriteOperator;
|
|
||||||
import org.apache.hudi.sink.CleanFunction;
|
import org.apache.hudi.sink.CleanFunction;
|
||||||
import org.apache.hudi.sink.StreamWriteOperator;
|
import org.apache.hudi.sink.StreamWriteOperator;
|
||||||
import org.apache.hudi.sink.append.AppendWriteOperator;
|
import org.apache.hudi.sink.append.AppendWriteOperator;
|
||||||
import org.apache.hudi.sink.bootstrap.BootstrapOperator;
|
import org.apache.hudi.sink.bootstrap.BootstrapOperator;
|
||||||
import org.apache.hudi.sink.bootstrap.batch.BatchBootstrapOperator;
|
import org.apache.hudi.sink.bootstrap.batch.BatchBootstrapOperator;
|
||||||
|
import org.apache.hudi.sink.bucket.BucketStreamWriteOperator;
|
||||||
import org.apache.hudi.sink.bulk.BulkInsertWriteOperator;
|
import org.apache.hudi.sink.bulk.BulkInsertWriteOperator;
|
||||||
import org.apache.hudi.sink.bulk.RowDataKeyGen;
|
import org.apache.hudi.sink.bulk.RowDataKeyGen;
|
||||||
import org.apache.hudi.sink.bulk.sort.SortOperatorGen;
|
import org.apache.hudi.sink.bulk.sort.SortOperatorGen;
|
||||||
@@ -88,6 +87,18 @@ public class Pipelines {
|
|||||||
*/
|
*/
|
||||||
public static DataStreamSink<Object> bulkInsert(Configuration conf, RowType rowType, DataStream<RowData> dataStream) {
|
public static DataStreamSink<Object> bulkInsert(Configuration conf, RowType rowType, DataStream<RowData> dataStream) {
|
||||||
WriteOperatorFactory<RowData> operatorFactory = BulkInsertWriteOperator.getFactory(conf, rowType);
|
WriteOperatorFactory<RowData> operatorFactory = BulkInsertWriteOperator.getFactory(conf, rowType);
|
||||||
|
if (OptionsResolver.isBucketIndexType(conf)) {
|
||||||
|
int bucketNum = conf.getInteger(FlinkOptions.BUCKET_INDEX_NUM_BUCKETS);
|
||||||
|
String indexKeyFields = conf.getString(FlinkOptions.INDEX_KEY_FIELD);
|
||||||
|
BucketIndexPartitioner<String> partitioner = new BucketIndexPartitioner<>(bucketNum, indexKeyFields);
|
||||||
|
RowDataKeyGen rowDataKeyGen = RowDataKeyGen.instance(conf, rowType);
|
||||||
|
return dataStream.partitionCustom(partitioner, rowDataKeyGen::getRecordKey)
|
||||||
|
.transform("bucket_bulk_insert", TypeInformation.of(Object.class), operatorFactory)
|
||||||
|
.uid("uid_bucket_bulk_insert" + conf.getString(FlinkOptions.TABLE_NAME))
|
||||||
|
.setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS))
|
||||||
|
.addSink(DummySink.INSTANCE)
|
||||||
|
.name("dummy");
|
||||||
|
}
|
||||||
|
|
||||||
final String[] partitionFields = FilePathUtils.extractPartitionKeys(conf);
|
final String[] partitionFields = FilePathUtils.extractPartitionKeys(conf);
|
||||||
if (partitionFields.length > 0) {
|
if (partitionFields.length > 0) {
|
||||||
@@ -278,8 +289,8 @@ public class Pipelines {
|
|||||||
WriteOperatorFactory<HoodieRecord> operatorFactory = BucketStreamWriteOperator.getFactory(conf);
|
WriteOperatorFactory<HoodieRecord> operatorFactory = BucketStreamWriteOperator.getFactory(conf);
|
||||||
int bucketNum = conf.getInteger(FlinkOptions.BUCKET_INDEX_NUM_BUCKETS);
|
int bucketNum = conf.getInteger(FlinkOptions.BUCKET_INDEX_NUM_BUCKETS);
|
||||||
String indexKeyFields = conf.getString(FlinkOptions.INDEX_KEY_FIELD);
|
String indexKeyFields = conf.getString(FlinkOptions.INDEX_KEY_FIELD);
|
||||||
BucketIndexPartitioner<HoodieKey> partitioner = new BucketIndexPartitioner<>(bucketNum, indexKeyFields);
|
BucketIndexPartitioner<String> partitioner = new BucketIndexPartitioner<>(bucketNum, indexKeyFields);
|
||||||
return dataStream.partitionCustom(partitioner, HoodieRecord::getKey)
|
return dataStream.partitionCustom(partitioner, HoodieRecord::getRecordKey)
|
||||||
.transform("bucket_write", TypeInformation.of(Object.class), operatorFactory)
|
.transform("bucket_write", TypeInformation.of(Object.class), operatorFactory)
|
||||||
.uid("uid_bucket_write" + conf.getString(FlinkOptions.TABLE_NAME))
|
.uid("uid_bucket_write" + conf.getString(FlinkOptions.TABLE_NAME))
|
||||||
.setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS));
|
.setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS));
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
|
|
||||||
package org.apache.hudi.streamer;
|
package org.apache.hudi.streamer;
|
||||||
|
|
||||||
import org.apache.flink.runtime.state.StateBackend;
|
|
||||||
import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
|
|
||||||
import org.apache.hudi.client.utils.OperationConverter;
|
import org.apache.hudi.client.utils.OperationConverter;
|
||||||
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
|
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
|
||||||
import org.apache.hudi.common.model.WriteOperationType;
|
import org.apache.hudi.common.model.WriteOperationType;
|
||||||
@@ -32,6 +30,8 @@ import org.apache.hudi.util.StreamerUtil;
|
|||||||
|
|
||||||
import com.beust.jcommander.Parameter;
|
import com.beust.jcommander.Parameter;
|
||||||
import org.apache.flink.configuration.Configuration;
|
import org.apache.flink.configuration.Configuration;
|
||||||
|
import org.apache.flink.runtime.state.StateBackend;
|
||||||
|
import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|||||||
@@ -18,12 +18,13 @@
|
|||||||
|
|
||||||
package org.apache.hudi.util;
|
package org.apache.hudi.util;
|
||||||
|
|
||||||
|
import org.apache.hudi.exception.HoodieException;
|
||||||
|
|
||||||
import com.beust.jcommander.IStringConverter;
|
import com.beust.jcommander.IStringConverter;
|
||||||
import com.beust.jcommander.ParameterException;
|
import com.beust.jcommander.ParameterException;
|
||||||
import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend;
|
import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend;
|
||||||
import org.apache.flink.runtime.state.StateBackend;
|
import org.apache.flink.runtime.state.StateBackend;
|
||||||
import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
|
import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
|
||||||
import org.apache.hudi.exception.HoodieException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converter that converts a string into Flink StateBackend.
|
* Converter that converts a string into Flink StateBackend.
|
||||||
|
|||||||
@@ -906,8 +906,8 @@ public class ITTestHoodieDataSource extends AbstractTestBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@ParameterizedTest
|
@ParameterizedTest
|
||||||
@ValueSource(booleans = {true, false})
|
@MethodSource("indexAndPartitioningParams")
|
||||||
void testBulkInsert(boolean hiveStylePartitioning) {
|
void testBulkInsert(String indexType, boolean hiveStylePartitioning) {
|
||||||
TableEnvironment tableEnv = batchTableEnv;
|
TableEnvironment tableEnv = batchTableEnv;
|
||||||
// csv source
|
// csv source
|
||||||
String csvSourceDDL = TestConfigurations.getCsvSourceDDL("csv_source", "test_source_5.data");
|
String csvSourceDDL = TestConfigurations.getCsvSourceDDL("csv_source", "test_source_5.data");
|
||||||
@@ -917,6 +917,7 @@ public class ITTestHoodieDataSource extends AbstractTestBase {
|
|||||||
.option(FlinkOptions.PATH, tempFile.getAbsolutePath())
|
.option(FlinkOptions.PATH, tempFile.getAbsolutePath())
|
||||||
.option(FlinkOptions.OPERATION, "bulk_insert")
|
.option(FlinkOptions.OPERATION, "bulk_insert")
|
||||||
.option(FlinkOptions.WRITE_BULK_INSERT_SHUFFLE_BY_PARTITION, true)
|
.option(FlinkOptions.WRITE_BULK_INSERT_SHUFFLE_BY_PARTITION, true)
|
||||||
|
.option(FlinkOptions.INDEX_TYPE, indexType)
|
||||||
.option(FlinkOptions.HIVE_STYLE_PARTITIONING, hiveStylePartitioning)
|
.option(FlinkOptions.HIVE_STYLE_PARTITIONING, hiveStylePartitioning)
|
||||||
.end();
|
.end();
|
||||||
tableEnv.executeSql(hoodieTableDDL);
|
tableEnv.executeSql(hoodieTableDDL);
|
||||||
@@ -1262,6 +1263,19 @@ public class ITTestHoodieDataSource extends AbstractTestBase {
|
|||||||
return Stream.of(data).map(Arguments::of);
|
return Stream.of(data).map(Arguments::of);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return test params => (index type, hive style partitioning).
|
||||||
|
*/
|
||||||
|
private static Stream<Arguments> indexAndPartitioningParams() {
|
||||||
|
Object[][] data =
|
||||||
|
new Object[][] {
|
||||||
|
{"FLINK_STATE", false},
|
||||||
|
{"FLINK_STATE", true},
|
||||||
|
{"BUCKET", false},
|
||||||
|
{"BUCKET", true}};
|
||||||
|
return Stream.of(data).map(Arguments::of);
|
||||||
|
}
|
||||||
|
|
||||||
private void execInsertSql(TableEnvironment tEnv, String insert) {
|
private void execInsertSql(TableEnvironment tEnv, String insert) {
|
||||||
TableResult tableResult = tEnv.executeSql(insert);
|
TableResult tableResult = tEnv.executeSql(insert);
|
||||||
// wait to finish
|
// wait to finish
|
||||||
|
|||||||
Reference in New Issue
Block a user