1
0

[HUDI-2746] Do not bootstrap for flink insert overwrite (#3980)

This commit is contained in:
Danny Chan
2021-11-12 12:17:58 +08:00
committed by GitHub
parent 6b93ccca9b
commit bc511edc85
5 changed files with 66 additions and 8 deletions

View File

@@ -98,15 +98,42 @@ public class Pipelines {
.name("dummy");
}
/**
* Constructs bootstrap pipeline as streaming.
*/
public static DataStream<HoodieRecord> bootstrap(
Configuration conf,
RowType rowType,
int defaultParallelism,
DataStream<RowData> dataStream) {
return bootstrap(conf, rowType, defaultParallelism, dataStream, false, false);
}
/**
* Constructs bootstrap pipeline.
*
* @param conf The configuration
* @param rowType The row type
* @param defaultParallelism The default parallelism
* @param dataStream The data stream
* @param bounded Whether the source is bounded
* @param overwrite Whether it is insert overwrite
*/
public static DataStream<HoodieRecord> bootstrap(
Configuration conf,
RowType rowType,
int defaultParallelism,
DataStream<RowData> dataStream,
boolean bounded) {
return bounded
? boundedBootstrap(conf, rowType, defaultParallelism, dataStream)
: streamBootstrap(conf, rowType, defaultParallelism, dataStream);
boolean bounded,
boolean overwrite) {
final boolean globalIndex = conf.getBoolean(FlinkOptions.INDEX_GLOBAL_ENABLED);
if (overwrite) {
return rowDataToHoodieRecord(conf, rowType, dataStream);
} else if (bounded && !globalIndex) {
return boundedBootstrap(conf, rowType, defaultParallelism, dataStream);
} else {
return streamBootstrap(conf, rowType, defaultParallelism, dataStream);
}
}
private static DataStream<HoodieRecord> streamBootstrap(

View File

@@ -96,7 +96,7 @@ public class HoodieFlinkStreamer {
}
}
DataStream<HoodieRecord> hoodieRecordDataStream = Pipelines.bootstrap(conf, rowType, parallelism, dataStream, false);
DataStream<HoodieRecord> hoodieRecordDataStream = Pipelines.bootstrap(conf, rowType, parallelism, dataStream);
DataStream<Object> pipeline = Pipelines.hoodieStreamWrite(conf, parallelism, hoodieRecordDataStream);
if (StreamerUtil.needsAsyncCompaction(conf)) {
Pipelines.compact(conf, pipeline);

View File

@@ -86,7 +86,8 @@ public class HoodieTableSink implements DynamicTableSink, SupportsPartitioning,
DataStream<Object> pipeline;
// bootstrap
final DataStream<HoodieRecord> hoodieRecordDataStream = Pipelines.bootstrap(conf, rowType, parallelism, dataStream, context.isBounded());
final DataStream<HoodieRecord> hoodieRecordDataStream =
Pipelines.bootstrap(conf, rowType, parallelism, dataStream, context.isBounded(), overwrite);
// write pipeline
pipeline = Pipelines.hoodieStreamWrite(conf, parallelism, hoodieRecordDataStream);
// compaction