[HUDI-2215] Add rateLimiter when Flink writes to hudi. (#3338)
Co-authored-by: wangminchao <wangminchao@asinking.com>
This commit is contained in:
@@ -312,6 +312,12 @@ public class FlinkOptions extends HoodieConfig {
|
|||||||
.withDescription("Maximum memory in MB for a write task, when the threshold hits,\n"
|
.withDescription("Maximum memory in MB for a write task, when the threshold hits,\n"
|
||||||
+ "it flushes the max size data bucket to avoid OOM, default 1GB");
|
+ "it flushes the max size data bucket to avoid OOM, default 1GB");
|
||||||
|
|
||||||
|
public static final ConfigOption<Long> WRITE_RATE_LIMIT = ConfigOptions
|
||||||
|
.key("write.rate.limit")
|
||||||
|
.longType()
|
||||||
|
.defaultValue(0L) // default no limit
|
||||||
|
.withDescription("Write record rate limit per second to prevent traffic jitter and improve stability, default 0 (no limit)");
|
||||||
|
|
||||||
public static final ConfigOption<Double> WRITE_BATCH_SIZE = ConfigOptions
|
public static final ConfigOption<Double> WRITE_BATCH_SIZE = ConfigOptions
|
||||||
.key("write.batch.size")
|
.key("write.batch.size")
|
||||||
.doubleType()
|
.doubleType()
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ import static org.apache.hudi.util.StreamerUtil.flinkConf2TypedProperties;
|
|||||||
/**
|
/**
|
||||||
* Function that transforms RowData to HoodieRecord.
|
* Function that transforms RowData to HoodieRecord.
|
||||||
*/
|
*/
|
||||||
public class RowDataToHoodieFunction<I extends RowData, O extends HoodieRecord<?>>
|
public class RowDataToHoodieFunction<I extends RowData, O extends HoodieRecord>
|
||||||
extends RichMapFunction<I, O> {
|
extends RichMapFunction<I, O> {
|
||||||
/**
|
/**
|
||||||
* Row type of the input.
|
* Row type of the input.
|
||||||
|
|||||||
@@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.sink.transform;
|
||||||
|
|
||||||
|
import org.apache.hudi.common.model.HoodieRecord;
|
||||||
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
|
|
||||||
|
import org.apache.flink.configuration.Configuration;
|
||||||
|
import org.apache.flink.shaded.guava18.com.google.common.util.concurrent.RateLimiter;
|
||||||
|
import org.apache.flink.table.data.RowData;
|
||||||
|
import org.apache.flink.table.types.logical.RowType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function that transforms RowData to a HoodieRecord with RateLimit.
|
||||||
|
*/
|
||||||
|
public class RowDataToHoodieFunctionWithRateLimit<I extends RowData, O extends HoodieRecord>
|
||||||
|
extends RowDataToHoodieFunction<I, O> {
|
||||||
|
/**
|
||||||
|
* Total rate limit per second for this job.
|
||||||
|
*/
|
||||||
|
private final double totalLimit;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rate limit per second for per task.
|
||||||
|
*/
|
||||||
|
private transient RateLimiter rateLimiter;
|
||||||
|
|
||||||
|
public RowDataToHoodieFunctionWithRateLimit(RowType rowType, Configuration config) {
|
||||||
|
super(rowType, config);
|
||||||
|
this.totalLimit = config.getLong(FlinkOptions.WRITE_RATE_LIMIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void open(Configuration parameters) throws Exception {
|
||||||
|
super.open(parameters);
|
||||||
|
this.rateLimiter =
|
||||||
|
RateLimiter.create(totalLimit / getRuntimeContext().getNumberOfParallelSubtasks());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public O map(I i) throws Exception {
|
||||||
|
rateLimiter.acquire();
|
||||||
|
return super.map(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.sink.transform;
|
||||||
|
|
||||||
|
import org.apache.hudi.common.model.HoodieRecord;
|
||||||
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
|
|
||||||
|
import org.apache.flink.configuration.Configuration;
|
||||||
|
import org.apache.flink.table.data.RowData;
|
||||||
|
import org.apache.flink.table.types.logical.RowType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utilities for {@link RowDataToHoodieFunction}.
|
||||||
|
*/
|
||||||
|
public abstract class RowDataToHoodieFunctions {
|
||||||
|
private RowDataToHoodieFunctions() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link RowDataToHoodieFunction} instance based on the given configuration.
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("rawtypes")
|
||||||
|
public static RowDataToHoodieFunction<RowData, HoodieRecord> create(RowType rowType, Configuration conf) {
|
||||||
|
if (conf.getLong(FlinkOptions.WRITE_RATE_LIMIT) > 0) {
|
||||||
|
return new RowDataToHoodieFunctionWithRateLimit<>(rowType, conf);
|
||||||
|
} else {
|
||||||
|
return new RowDataToHoodieFunction<>(rowType, conf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -31,7 +31,7 @@ import org.apache.hudi.sink.compact.CompactionPlanEvent;
|
|||||||
import org.apache.hudi.sink.compact.CompactionPlanOperator;
|
import org.apache.hudi.sink.compact.CompactionPlanOperator;
|
||||||
import org.apache.hudi.sink.partitioner.BucketAssignFunction;
|
import org.apache.hudi.sink.partitioner.BucketAssignFunction;
|
||||||
import org.apache.hudi.sink.partitioner.BucketAssignOperator;
|
import org.apache.hudi.sink.partitioner.BucketAssignOperator;
|
||||||
import org.apache.hudi.sink.transform.RowDataToHoodieFunction;
|
import org.apache.hudi.sink.transform.RowDataToHoodieFunctions;
|
||||||
import org.apache.hudi.sink.transform.Transformer;
|
import org.apache.hudi.sink.transform.Transformer;
|
||||||
import org.apache.hudi.util.AvroSchemaConverter;
|
import org.apache.hudi.util.AvroSchemaConverter;
|
||||||
import org.apache.hudi.util.StreamerUtil;
|
import org.apache.hudi.util.StreamerUtil;
|
||||||
@@ -112,7 +112,8 @@ public class HoodieFlinkStreamer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DataStream<HoodieRecord> dataStream2 = dataStream.map(new RowDataToHoodieFunction<>(rowType, conf), TypeInformation.of(HoodieRecord.class));
|
DataStream<HoodieRecord> dataStream2 = dataStream
|
||||||
|
.map(RowDataToHoodieFunctions.create(rowType, conf), TypeInformation.of(HoodieRecord.class));
|
||||||
|
|
||||||
if (conf.getBoolean(FlinkOptions.INDEX_BOOTSTRAP_ENABLED)) {
|
if (conf.getBoolean(FlinkOptions.INDEX_BOOTSTRAP_ENABLED)) {
|
||||||
dataStream2 = dataStream2.rebalance()
|
dataStream2 = dataStream2.rebalance()
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ import org.apache.hudi.sink.compact.CompactionPlanEvent;
|
|||||||
import org.apache.hudi.sink.compact.CompactionPlanOperator;
|
import org.apache.hudi.sink.compact.CompactionPlanOperator;
|
||||||
import org.apache.hudi.sink.partitioner.BucketAssignFunction;
|
import org.apache.hudi.sink.partitioner.BucketAssignFunction;
|
||||||
import org.apache.hudi.sink.partitioner.BucketAssignOperator;
|
import org.apache.hudi.sink.partitioner.BucketAssignOperator;
|
||||||
import org.apache.hudi.sink.transform.RowDataToHoodieFunction;
|
import org.apache.hudi.sink.transform.RowDataToHoodieFunctions;
|
||||||
import org.apache.hudi.util.StreamerUtil;
|
import org.apache.hudi.util.StreamerUtil;
|
||||||
|
|
||||||
import org.apache.flink.annotation.VisibleForTesting;
|
import org.apache.flink.annotation.VisibleForTesting;
|
||||||
@@ -105,7 +105,7 @@ public class HoodieTableSink implements DynamicTableSink, SupportsPartitioning,
|
|||||||
StreamWriteOperatorFactory<HoodieRecord> operatorFactory = new StreamWriteOperatorFactory<>(conf);
|
StreamWriteOperatorFactory<HoodieRecord> operatorFactory = new StreamWriteOperatorFactory<>(conf);
|
||||||
|
|
||||||
DataStream<HoodieRecord> dataStream1 = dataStream
|
DataStream<HoodieRecord> dataStream1 = dataStream
|
||||||
.map(new RowDataToHoodieFunction<>(rowType, conf), TypeInformation.of(HoodieRecord.class));
|
.map(RowDataToHoodieFunctions.create(rowType, conf), TypeInformation.of(HoodieRecord.class));
|
||||||
|
|
||||||
// bootstrap index
|
// bootstrap index
|
||||||
// TODO: This is a very time-consuming operation, will optimization
|
// TODO: This is a very time-consuming operation, will optimization
|
||||||
|
|||||||
Reference in New Issue
Block a user