[HUDI-2685] Support scheduling online compaction plan when there are no commit data (#3928)
Co-authored-by: yuzhaojing <yuzhaojing@bytedance.com>
This commit is contained in:
@@ -84,4 +84,12 @@ public class OptionsResolver {
|
||||
final String preCombineField = conf.getString(FlinkOptions.PRECOMBINE_FIELD);
|
||||
return preCombineField.equals(FlinkOptions.NO_PRE_COMBINE) ? null : preCombineField;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the compaction strategy is based on elapsed delta time.
|
||||
*/
|
||||
public static boolean isDeltaTimeCompaction(Configuration conf) {
|
||||
final String strategy = conf.getString(FlinkOptions.COMPACTION_TRIGGER_STRATEGY).toLowerCase(Locale.ROOT);
|
||||
return FlinkOptions.TIME_ELAPSED.equals(strategy) || FlinkOptions.NUM_OR_TIME.equals(strategy);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,11 +29,13 @@ import org.apache.hudi.common.util.CommitUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.configuration.FlinkOptions;
|
||||
import org.apache.hudi.configuration.OptionsResolver;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.sink.event.CommitAckEvent;
|
||||
import org.apache.hudi.sink.event.WriteMetadataEvent;
|
||||
import org.apache.hudi.sink.utils.HiveSyncContext;
|
||||
import org.apache.hudi.sink.utils.NonThrownExecutor;
|
||||
import org.apache.hudi.util.CompactionUtil;
|
||||
import org.apache.hudi.util.StreamerUtil;
|
||||
|
||||
import org.apache.flink.annotation.VisibleForTesting;
|
||||
@@ -221,11 +223,13 @@ public class StreamWriteOperatorCoordinator
|
||||
// the stream write task snapshot and flush the data buffer synchronously in sequence,
|
||||
// so a successful checkpoint subsumes the old one(follows the checkpoint subsuming contract)
|
||||
final boolean committed = commitInstant(this.instant, checkpointId);
|
||||
|
||||
if (tableState.scheduleCompaction) {
|
||||
// if async compaction is on, schedule the compaction
|
||||
CompactionUtil.scheduleCompaction(metaClient, writeClient, tableState.isDeltaTimeCompaction, committed);
|
||||
}
|
||||
|
||||
if (committed) {
|
||||
if (tableState.scheduleCompaction) {
|
||||
// if async compaction is on, schedule the compaction
|
||||
writeClient.scheduleCompaction(Option.empty());
|
||||
}
|
||||
// start new instant.
|
||||
startInstant();
|
||||
// sync Hive if is enabled
|
||||
@@ -557,6 +561,7 @@ public class StreamWriteOperatorCoordinator
|
||||
final boolean scheduleCompaction;
|
||||
final boolean syncHive;
|
||||
final boolean syncMetadata;
|
||||
final boolean isDeltaTimeCompaction;
|
||||
|
||||
private TableState(Configuration conf) {
|
||||
this.operationType = WriteOperationType.fromValue(conf.getString(FlinkOptions.OPERATION));
|
||||
@@ -566,6 +571,7 @@ public class StreamWriteOperatorCoordinator
|
||||
this.scheduleCompaction = StreamerUtil.needsScheduleCompaction(conf);
|
||||
this.syncHive = conf.getBoolean(FlinkOptions.HIVE_SYNC_ENABLED);
|
||||
this.syncMetadata = conf.getBoolean(FlinkOptions.METADATA_ENABLED);
|
||||
this.isDeltaTimeCompaction = OptionsResolver.isDeltaTimeCompaction(conf);
|
||||
}
|
||||
|
||||
public static TableState create(Configuration conf) {
|
||||
|
||||
@@ -75,14 +75,16 @@ public class HoodieFlinkCompactor {
|
||||
// judge whether have operation
|
||||
// to compute the compaction instant time and do compaction.
|
||||
if (cfg.schedule) {
|
||||
String compactionInstantTime = CompactionUtil.getCompactionInstantTime(metaClient);
|
||||
boolean scheduled = writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
|
||||
if (!scheduled) {
|
||||
// do nothing.
|
||||
LOG.info("No compaction plan for this job ");
|
||||
return;
|
||||
Option<String> compactionInstantTimeOption = CompactionUtil.getCompactionInstantTime(metaClient);
|
||||
if (compactionInstantTimeOption.isPresent()) {
|
||||
boolean scheduled = writeClient.scheduleCompactionAtInstant(compactionInstantTimeOption.get(), Option.empty());
|
||||
if (!scheduled) {
|
||||
// do nothing.
|
||||
LOG.info("No compaction plan for this job ");
|
||||
return;
|
||||
}
|
||||
table.getMetaClient().reloadActiveTimeline();
|
||||
}
|
||||
table.getMetaClient().reloadActiveTimeline();
|
||||
}
|
||||
|
||||
// fetch the instant based on the configured execution sequence
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.util;
|
||||
|
||||
import org.apache.hudi.client.HoodieFlinkWriteClient;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.TableSchemaResolver;
|
||||
@@ -46,10 +47,36 @@ public class CompactionUtil {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(CompactionUtil.class);
|
||||
|
||||
/**
|
||||
* Schedules a new compaction instant.
|
||||
*
|
||||
* @param metaClient The metadata client
|
||||
* @param writeClient The write client
|
||||
* @param deltaTimeCompaction Whether the compaction is trigger by elapsed delta time
|
||||
* @param committed Whether the last instant was committed successfully
|
||||
*/
|
||||
public static void scheduleCompaction(
|
||||
HoodieTableMetaClient metaClient,
|
||||
HoodieFlinkWriteClient<?> writeClient,
|
||||
boolean deltaTimeCompaction,
|
||||
boolean committed) {
|
||||
if (committed) {
|
||||
writeClient.scheduleCompaction(Option.empty());
|
||||
} else if (deltaTimeCompaction) {
|
||||
// if there are no new commits and the compaction trigger strategy is based on elapsed delta time,
|
||||
// schedules the compaction anyway.
|
||||
metaClient.reloadActiveTimeline();
|
||||
Option<String> compactionInstantTime = CompactionUtil.getCompactionInstantTime(metaClient);
|
||||
if (compactionInstantTime.isPresent()) {
|
||||
writeClient.scheduleCompactionAtInstant(compactionInstantTime.get(), Option.empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets compaction Instant time.
|
||||
*/
|
||||
public static String getCompactionInstantTime(HoodieTableMetaClient metaClient) {
|
||||
public static Option<String> getCompactionInstantTime(HoodieTableMetaClient metaClient) {
|
||||
Option<HoodieInstant> firstPendingInstant = metaClient.getCommitsTimeline()
|
||||
.filterPendingExcludingCompaction().firstInstant();
|
||||
Option<HoodieInstant> lastCompleteInstant = metaClient.getActiveTimeline().getWriteTimeline()
|
||||
@@ -59,8 +86,11 @@ public class CompactionUtil {
|
||||
String lastCompleteTimestamp = lastCompleteInstant.get().getTimestamp();
|
||||
// Committed and pending compaction instants should have strictly lower timestamps
|
||||
return StreamerUtil.medianInstantTime(firstPendingTimestamp, lastCompleteTimestamp);
|
||||
} else if (!lastCompleteInstant.isPresent()) {
|
||||
LOG.info("No instants to schedule the compaction plan");
|
||||
return Option.empty();
|
||||
} else {
|
||||
return HoodieActiveTimeline.createNewInstantTime();
|
||||
return Option.of(HoodieActiveTimeline.createNewInstantTime());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -407,14 +407,14 @@ public class StreamerUtil {
|
||||
/**
|
||||
* Returns the median instant time between the given two instant time.
|
||||
*/
|
||||
public static String medianInstantTime(String highVal, String lowVal) {
|
||||
public static Option<String> medianInstantTime(String highVal, String lowVal) {
|
||||
try {
|
||||
long high = HoodieActiveTimeline.parseInstantTime(highVal).getTime();
|
||||
long low = HoodieActiveTimeline.parseInstantTime(lowVal).getTime();
|
||||
ValidationUtils.checkArgument(high > low,
|
||||
"Instant [" + highVal + "] should have newer timestamp than instant [" + lowVal + "]");
|
||||
long median = low + (high - low) / 2;
|
||||
return HoodieActiveTimeline.formatInstantTime(new Date(median));
|
||||
return low >= median ? Option.empty() : Option.of(HoodieActiveTimeline.formatInstantTime(new Date(median)));
|
||||
} catch (ParseException e) {
|
||||
throw new HoodieException("Get median instant time with interval [" + lowVal + ", " + highVal + "] error", e);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user