[MINOR] Some cosmetic changes for Flink (#3503)
This commit is contained in:
@@ -148,7 +148,7 @@ public class BootstrapFunction<I, O extends HoodieRecord>
|
||||
}
|
||||
|
||||
/**
|
||||
* Load all the indices of give partition path into the backup state.
|
||||
* Loads all the indices of give partition path into the backup state.
|
||||
*
|
||||
* @param partitionPath The partition path
|
||||
*/
|
||||
|
||||
@@ -29,7 +29,15 @@ import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* The function to load specify partition index from existing hoodieTable.
|
||||
* The function to load index from existing hoodieTable.
|
||||
*
|
||||
* <p>This function should only be used for bounded source.
|
||||
*
|
||||
* <p>When a record comes in, the function firstly checks whether the partition path of the record is already loaded,
|
||||
* if the partition is not loaded yet, loads the entire partition and sends the index records to downstream operators
|
||||
* before it sends the input record; if the partition is loaded already, sends the input record directly.
|
||||
*
|
||||
* <p>The input records should shuffle by the partition path to avoid repeated loading.
|
||||
*/
|
||||
public class BatchBootstrapFunction<I, O extends HoodieRecord>
|
||||
extends BootstrapFunction<I, O> {
|
||||
@@ -61,5 +69,4 @@ public class BatchBootstrapFunction<I, O extends HoodieRecord>
|
||||
// send the trigger record
|
||||
out.collect((O) value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -85,7 +85,22 @@ public class Pipelines {
|
||||
.name("clean_commits");
|
||||
}
|
||||
|
||||
public static DataStream<HoodieRecord> bootstrap(Configuration conf, RowType rowType, int defaultParallelism, DataStream<RowData> dataStream) {
|
||||
public static DataStream<HoodieRecord> bootstrap(
|
||||
Configuration conf,
|
||||
RowType rowType,
|
||||
int defaultParallelism,
|
||||
DataStream<RowData> dataStream,
|
||||
boolean bounded) {
|
||||
return bounded
|
||||
? boundedBootstrap(conf, rowType, defaultParallelism, dataStream)
|
||||
: streamBootstrap(conf, rowType, defaultParallelism, dataStream);
|
||||
}
|
||||
|
||||
private static DataStream<HoodieRecord> streamBootstrap(
|
||||
Configuration conf,
|
||||
RowType rowType,
|
||||
int defaultParallelism,
|
||||
DataStream<RowData> dataStream) {
|
||||
DataStream<HoodieRecord> dataStream1 = rowDataToHoodieRecord(conf, rowType, dataStream);
|
||||
|
||||
if (conf.getBoolean(FlinkOptions.INDEX_BOOTSTRAP_ENABLED)) {
|
||||
@@ -101,8 +116,11 @@ public class Pipelines {
|
||||
return dataStream1;
|
||||
}
|
||||
|
||||
public static DataStream<HoodieRecord> batchBootstrap(Configuration conf, RowType rowType, int defaultParallelism, DataStream<RowData> dataStream) {
|
||||
// shuffle and sort by partition keys
|
||||
private static DataStream<HoodieRecord> boundedBootstrap(
|
||||
Configuration conf,
|
||||
RowType rowType,
|
||||
int defaultParallelism,
|
||||
DataStream<RowData> dataStream) {
|
||||
final String[] partitionFields = FilePathUtils.extractPartitionKeys(conf);
|
||||
if (partitionFields.length > 0) {
|
||||
RowDataKeyGen rowDataKeyGen = RowDataKeyGen.instance(conf, rowType);
|
||||
|
||||
@@ -77,9 +77,9 @@ public class HoodieTableSink implements DynamicTableSink, SupportsPartitioning,
|
||||
|
||||
// default parallelism
|
||||
int parallelism = dataStream.getExecutionConfig().getParallelism();
|
||||
final DataStream<HoodieRecord> dataStream1 = context.isBounded()
|
||||
? Pipelines.batchBootstrap(conf, rowType, parallelism, dataStream)
|
||||
: Pipelines.bootstrap(conf, rowType, parallelism, dataStream);
|
||||
|
||||
// bootstrap
|
||||
final DataStream<HoodieRecord> dataStream1 = Pipelines.bootstrap(conf, rowType, parallelism, dataStream, context.isBounded());
|
||||
|
||||
// write pipeline
|
||||
DataStream<Object> pipeline = Pipelines.hoodieStreamWrite(conf, parallelism, dataStream1);
|
||||
|
||||
Reference in New Issue
Block a user