1
0

[HUDI-2135] Add compaction schedule option for flink (#3226)

This commit is contained in:
Danny Chan
2021-07-06 14:11:20 +08:00
committed by GitHub
parent a4dcbb5c5a
commit 1d6978cde4
6 changed files with 70 additions and 15 deletions

View File

@@ -329,6 +329,12 @@ public class FlinkOptions {
// Compaction Options // Compaction Options
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
public static final ConfigOption<Boolean> COMPACTION_SCHEDULE_ENABLED = ConfigOptions
.key("compaction.schedule.enabled")
.booleanType()
.defaultValue(true) // default true for MOR write
.withDescription("Schedule the compaction plan, enabled by default for MOR");
public static final ConfigOption<Boolean> COMPACTION_ASYNC_ENABLED = ConfigOptions public static final ConfigOption<Boolean> COMPACTION_ASYNC_ENABLED = ConfigOptions
.key("compaction.async.enabled") .key("compaction.async.enabled")
.booleanType() .booleanType()

View File

@@ -108,9 +108,9 @@ public class StreamWriteOperatorCoordinator
private final int parallelism; private final int parallelism;
/** /**
* Whether to schedule asynchronous compaction task on finished checkpoints. * Whether to schedule compaction plan on finished checkpoints.
*/ */
private final boolean asyncCompaction; private final boolean scheduleCompaction;
/** /**
* A single-thread executor to handle all the asynchronous jobs of the coordinator. * A single-thread executor to handle all the asynchronous jobs of the coordinator.
@@ -144,7 +144,7 @@ public class StreamWriteOperatorCoordinator
this.conf = conf; this.conf = conf;
this.context = context; this.context = context;
this.parallelism = context.currentParallelism(); this.parallelism = context.currentParallelism();
this.asyncCompaction = StreamerUtil.needsAsyncCompaction(conf); this.scheduleCompaction = StreamerUtil.needsScheduleCompaction(conf);
} }
@Override @Override
@@ -205,7 +205,7 @@ public class StreamWriteOperatorCoordinator
final boolean committed = commitInstant(this.instant); final boolean committed = commitInstant(this.instant);
if (committed) { if (committed) {
// if async compaction is on, schedule the compaction // if async compaction is on, schedule the compaction
if (asyncCompaction) { if (scheduleCompaction) {
writeClient.scheduleCompaction(Option.empty()); writeClient.scheduleCompaction(Option.empty());
} }
// start new instant. // start new instant.

View File

@@ -89,12 +89,25 @@ public class FlinkCompactionConfig extends Configuration {
@Parameter(names = {"--compaction-tasks"}, description = "Parallelism of tasks that do actual compaction, default is -1", required = false) @Parameter(names = {"--compaction-tasks"}, description = "Parallelism of tasks that do actual compaction, default is -1", required = false)
public Integer compactionTasks = -1; public Integer compactionTasks = -1;
@Parameter(names = {"--schedule", "-sc"}, description = "Not recommended. Schedule the compaction plan in this job.\n"
+ "There is a risk of losing data when scheduling compaction outside the writer job.\n"
+ "Scheduling compaction in the writer job and only let this job do the compaction execution is recommended.\n"
+ "Default is false", required = false)
public Boolean schedule = false;
public static final String SEQ_FIFO = "FIFO";
public static final String SEQ_LIFO = "LIFO";
@Parameter(names = {"--seq"}, description = "Compaction plan execution sequence, two options are supported:\n"
+ "1). FIFO: execute the oldest plan first;\n"
+ "2). LIFO: execute the latest plan first, by default LIFO", required = false)
public String compactionSeq = SEQ_LIFO;
/** /**
* Transforms a {@code HoodieFlinkCompaction.config} into {@code Configuration}. * Transforms a {@code HoodieFlinkCompaction.config} into {@code Configuration}.
* The latter is more suitable for the table APIs. It reads all the properties * The latter is more suitable for the table APIs. It reads all the properties
* in the properties file (set by `--props` option) and cmd line options * in the properties file (set by `--props` option) and cmd line options
* (set by `--hoodie-conf` option). * (set by `--hoodie-conf` option).
* */ */
public static org.apache.flink.configuration.Configuration toFlinkConfig(FlinkCompactionConfig config) { public static org.apache.flink.configuration.Configuration toFlinkConfig(FlinkCompactionConfig config) {
org.apache.flink.configuration.Configuration conf = new Configuration(); org.apache.flink.configuration.Configuration conf = new Configuration();
@@ -111,6 +124,7 @@ public class FlinkCompactionConfig extends Configuration {
conf.setBoolean(FlinkOptions.CLEAN_ASYNC_ENABLED, config.cleanAsyncEnable); conf.setBoolean(FlinkOptions.CLEAN_ASYNC_ENABLED, config.cleanAsyncEnable);
// use synchronous compaction always // use synchronous compaction always
conf.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, false); conf.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, false);
conf.setBoolean(FlinkOptions.COMPACTION_SCHEDULE_ENABLED, config.schedule);
return conf; return conf;
} }

View File

@@ -75,6 +75,7 @@ public class HoodieFlinkCompactor {
// judge whether have operation // judge whether have operation
// to compute the compaction instant time and do compaction. // to compute the compaction instant time and do compaction.
if (cfg.schedule) {
String compactionInstantTime = CompactionUtil.getCompactionInstantTime(metaClient); String compactionInstantTime = CompactionUtil.getCompactionInstantTime(metaClient);
boolean scheduled = writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty()); boolean scheduled = writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
if (!scheduled) { if (!scheduled) {
@@ -82,8 +83,21 @@ public class HoodieFlinkCompactor {
LOG.info("No compaction plan for this job "); LOG.info("No compaction plan for this job ");
return; return;
} }
}
table.getMetaClient().reloadActiveTimeline(); table.getMetaClient().reloadActiveTimeline();
// fetch the instant based on the configured execution sequence
HoodieTimeline timeline = table.getActiveTimeline().filterPendingCompactionTimeline()
.filter(instant -> instant.getState() == HoodieInstant.State.REQUESTED);
Option<HoodieInstant> requested = CompactionUtil.isLIFO(cfg.compactionSeq) ? timeline.lastInstant() : timeline.firstInstant();
if (!requested.isPresent()) {
// do nothing.
LOG.info("No compaction plan scheduled, turns on the compaction plan schedule with --schedule option");
return;
}
String compactionInstantTime = requested.get().getTimestamp();
// generate compaction plan // generate compaction plan
// should support configurable commit metadata // should support configurable commit metadata
HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan( HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(
@@ -92,7 +106,7 @@ public class HoodieFlinkCompactor {
if (compactionPlan == null || (compactionPlan.getOperations() == null) if (compactionPlan == null || (compactionPlan.getOperations() == null)
|| (compactionPlan.getOperations().isEmpty())) { || (compactionPlan.getOperations().isEmpty())) {
// No compaction plan, do nothing and return. // No compaction plan, do nothing and return.
LOG.info("No compaction plan for this job and instant " + compactionInstantTime); LOG.info("No compaction plan for instant " + compactionInstantTime);
return; return;
} }

View File

@@ -26,6 +26,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.sink.compact.FlinkCompactionConfig;
import org.apache.hudi.table.HoodieFlinkTable; import org.apache.hudi.table.HoodieFlinkTable;
import org.apache.avro.Schema; import org.apache.avro.Schema;
@@ -35,6 +36,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.util.Locale;
/** /**
* Utilities for flink hudi compaction. * Utilities for flink hudi compaction.
@@ -106,4 +108,11 @@ public class CompactionUtil {
table.getMetaClient().reloadActiveTimeline(); table.getMetaClient().reloadActiveTimeline();
}); });
} }
/**
* Returns whether the execution sequence is LIFO.
*/
public static boolean isLIFO(String seq) {
return seq.toUpperCase(Locale.ROOT).equals(FlinkCompactionConfig.SEQ_LIFO);
}
} }

View File

@@ -246,6 +246,18 @@ public class StreamerUtil {
&& conf.getBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED); && conf.getBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED);
} }
/**
* Returns whether needs to schedule the compaction plan.
*
* @param conf The flink configuration.
*/
public static boolean needsScheduleCompaction(Configuration conf) {
return conf.getString(FlinkOptions.TABLE_TYPE)
.toUpperCase(Locale.ROOT)
.equals(FlinkOptions.TABLE_TYPE_MERGE_ON_READ)
&& conf.getBoolean(FlinkOptions.COMPACTION_SCHEDULE_ENABLED);
}
/** /**
* Creates the meta client. * Creates the meta client.
*/ */