[HUDI-2135] Add compaction schedule option for flink (#3226)
This commit is contained in:
@@ -329,6 +329,12 @@ public class FlinkOptions {
|
|||||||
// Compaction Options
|
// Compaction Options
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
|
public static final ConfigOption<Boolean> COMPACTION_SCHEDULE_ENABLED = ConfigOptions
|
||||||
|
.key("compaction.schedule.enabled")
|
||||||
|
.booleanType()
|
||||||
|
.defaultValue(true) // default true for MOR write
|
||||||
|
.withDescription("Schedule the compaction plan, enabled by default for MOR");
|
||||||
|
|
||||||
public static final ConfigOption<Boolean> COMPACTION_ASYNC_ENABLED = ConfigOptions
|
public static final ConfigOption<Boolean> COMPACTION_ASYNC_ENABLED = ConfigOptions
|
||||||
.key("compaction.async.enabled")
|
.key("compaction.async.enabled")
|
||||||
.booleanType()
|
.booleanType()
|
||||||
|
|||||||
@@ -108,9 +108,9 @@ public class StreamWriteOperatorCoordinator
|
|||||||
private final int parallelism;
|
private final int parallelism;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether to schedule asynchronous compaction task on finished checkpoints.
|
* Whether to schedule compaction plan on finished checkpoints.
|
||||||
*/
|
*/
|
||||||
private final boolean asyncCompaction;
|
private final boolean scheduleCompaction;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A single-thread executor to handle all the asynchronous jobs of the coordinator.
|
* A single-thread executor to handle all the asynchronous jobs of the coordinator.
|
||||||
@@ -144,7 +144,7 @@ public class StreamWriteOperatorCoordinator
|
|||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
this.context = context;
|
this.context = context;
|
||||||
this.parallelism = context.currentParallelism();
|
this.parallelism = context.currentParallelism();
|
||||||
this.asyncCompaction = StreamerUtil.needsAsyncCompaction(conf);
|
this.scheduleCompaction = StreamerUtil.needsScheduleCompaction(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -205,7 +205,7 @@ public class StreamWriteOperatorCoordinator
|
|||||||
final boolean committed = commitInstant(this.instant);
|
final boolean committed = commitInstant(this.instant);
|
||||||
if (committed) {
|
if (committed) {
|
||||||
// if async compaction is on, schedule the compaction
|
// if async compaction is on, schedule the compaction
|
||||||
if (asyncCompaction) {
|
if (scheduleCompaction) {
|
||||||
writeClient.scheduleCompaction(Option.empty());
|
writeClient.scheduleCompaction(Option.empty());
|
||||||
}
|
}
|
||||||
// start new instant.
|
// start new instant.
|
||||||
|
|||||||
@@ -89,12 +89,25 @@ public class FlinkCompactionConfig extends Configuration {
|
|||||||
@Parameter(names = {"--compaction-tasks"}, description = "Parallelism of tasks that do actual compaction, default is -1", required = false)
|
@Parameter(names = {"--compaction-tasks"}, description = "Parallelism of tasks that do actual compaction, default is -1", required = false)
|
||||||
public Integer compactionTasks = -1;
|
public Integer compactionTasks = -1;
|
||||||
|
|
||||||
|
@Parameter(names = {"--schedule", "-sc"}, description = "Not recommended. Schedule the compaction plan in this job.\n"
|
||||||
|
+ "There is a risk of losing data when scheduling compaction outside the writer job.\n"
|
||||||
|
+ "Scheduling compaction in the writer job and only let this job do the compaction execution is recommended.\n"
|
||||||
|
+ "Default is false", required = false)
|
||||||
|
public Boolean schedule = false;
|
||||||
|
|
||||||
|
public static final String SEQ_FIFO = "FIFO";
|
||||||
|
public static final String SEQ_LIFO = "LIFO";
|
||||||
|
@Parameter(names = {"--seq"}, description = "Compaction plan execution sequence, two options are supported:\n"
|
||||||
|
+ "1). FIFO: execute the oldest plan first;\n"
|
||||||
|
+ "2). LIFO: execute the latest plan first, by default LIFO", required = false)
|
||||||
|
public String compactionSeq = SEQ_LIFO;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Transforms a {@code HoodieFlinkCompaction.config} into {@code Configuration}.
|
* Transforms a {@code HoodieFlinkCompaction.config} into {@code Configuration}.
|
||||||
* The latter is more suitable for the table APIs. It reads all the properties
|
* The latter is more suitable for the table APIs. It reads all the properties
|
||||||
* in the properties file (set by `--props` option) and cmd line options
|
* in the properties file (set by `--props` option) and cmd line options
|
||||||
* (set by `--hoodie-conf` option).
|
* (set by `--hoodie-conf` option).
|
||||||
* */
|
*/
|
||||||
public static org.apache.flink.configuration.Configuration toFlinkConfig(FlinkCompactionConfig config) {
|
public static org.apache.flink.configuration.Configuration toFlinkConfig(FlinkCompactionConfig config) {
|
||||||
org.apache.flink.configuration.Configuration conf = new Configuration();
|
org.apache.flink.configuration.Configuration conf = new Configuration();
|
||||||
|
|
||||||
@@ -111,6 +124,7 @@ public class FlinkCompactionConfig extends Configuration {
|
|||||||
conf.setBoolean(FlinkOptions.CLEAN_ASYNC_ENABLED, config.cleanAsyncEnable);
|
conf.setBoolean(FlinkOptions.CLEAN_ASYNC_ENABLED, config.cleanAsyncEnable);
|
||||||
// use synchronous compaction always
|
// use synchronous compaction always
|
||||||
conf.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, false);
|
conf.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, false);
|
||||||
|
conf.setBoolean(FlinkOptions.COMPACTION_SCHEDULE_ENABLED, config.schedule);
|
||||||
|
|
||||||
return conf;
|
return conf;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -75,6 +75,7 @@ public class HoodieFlinkCompactor {
|
|||||||
|
|
||||||
// judge whether have operation
|
// judge whether have operation
|
||||||
// to compute the compaction instant time and do compaction.
|
// to compute the compaction instant time and do compaction.
|
||||||
|
if (cfg.schedule) {
|
||||||
String compactionInstantTime = CompactionUtil.getCompactionInstantTime(metaClient);
|
String compactionInstantTime = CompactionUtil.getCompactionInstantTime(metaClient);
|
||||||
boolean scheduled = writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
|
boolean scheduled = writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
|
||||||
if (!scheduled) {
|
if (!scheduled) {
|
||||||
@@ -82,8 +83,21 @@ public class HoodieFlinkCompactor {
|
|||||||
LOG.info("No compaction plan for this job ");
|
LOG.info("No compaction plan for this job ");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
table.getMetaClient().reloadActiveTimeline();
|
table.getMetaClient().reloadActiveTimeline();
|
||||||
|
|
||||||
|
// fetch the instant based on the configured execution sequence
|
||||||
|
HoodieTimeline timeline = table.getActiveTimeline().filterPendingCompactionTimeline()
|
||||||
|
.filter(instant -> instant.getState() == HoodieInstant.State.REQUESTED);
|
||||||
|
Option<HoodieInstant> requested = CompactionUtil.isLIFO(cfg.compactionSeq) ? timeline.lastInstant() : timeline.firstInstant();
|
||||||
|
if (!requested.isPresent()) {
|
||||||
|
// do nothing.
|
||||||
|
LOG.info("No compaction plan scheduled, turns on the compaction plan schedule with --schedule option");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String compactionInstantTime = requested.get().getTimestamp();
|
||||||
// generate compaction plan
|
// generate compaction plan
|
||||||
// should support configurable commit metadata
|
// should support configurable commit metadata
|
||||||
HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(
|
HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(
|
||||||
@@ -92,7 +106,7 @@ public class HoodieFlinkCompactor {
|
|||||||
if (compactionPlan == null || (compactionPlan.getOperations() == null)
|
if (compactionPlan == null || (compactionPlan.getOperations() == null)
|
||||||
|| (compactionPlan.getOperations().isEmpty())) {
|
|| (compactionPlan.getOperations().isEmpty())) {
|
||||||
// No compaction plan, do nothing and return.
|
// No compaction plan, do nothing and return.
|
||||||
LOG.info("No compaction plan for this job and instant " + compactionInstantTime);
|
LOG.info("No compaction plan for instant " + compactionInstantTime);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
|||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.configuration.FlinkOptions;
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
import org.apache.hudi.exception.HoodieIOException;
|
import org.apache.hudi.exception.HoodieIOException;
|
||||||
|
import org.apache.hudi.sink.compact.FlinkCompactionConfig;
|
||||||
import org.apache.hudi.table.HoodieFlinkTable;
|
import org.apache.hudi.table.HoodieFlinkTable;
|
||||||
|
|
||||||
import org.apache.avro.Schema;
|
import org.apache.avro.Schema;
|
||||||
@@ -35,6 +36,7 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utilities for flink hudi compaction.
|
* Utilities for flink hudi compaction.
|
||||||
@@ -106,4 +108,11 @@ public class CompactionUtil {
|
|||||||
table.getMetaClient().reloadActiveTimeline();
|
table.getMetaClient().reloadActiveTimeline();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the execution sequence is LIFO.
|
||||||
|
*/
|
||||||
|
public static boolean isLIFO(String seq) {
|
||||||
|
return seq.toUpperCase(Locale.ROOT).equals(FlinkCompactionConfig.SEQ_LIFO);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -246,6 +246,18 @@ public class StreamerUtil {
|
|||||||
&& conf.getBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED);
|
&& conf.getBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether needs to schedule the compaction plan.
|
||||||
|
*
|
||||||
|
* @param conf The flink configuration.
|
||||||
|
*/
|
||||||
|
public static boolean needsScheduleCompaction(Configuration conf) {
|
||||||
|
return conf.getString(FlinkOptions.TABLE_TYPE)
|
||||||
|
.toUpperCase(Locale.ROOT)
|
||||||
|
.equals(FlinkOptions.TABLE_TYPE_MERGE_ON_READ)
|
||||||
|
&& conf.getBoolean(FlinkOptions.COMPACTION_SCHEDULE_ENABLED);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates the meta client.
|
* Creates the meta client.
|
||||||
*/
|
*/
|
||||||
|
|||||||
Reference in New Issue
Block a user