[HUDI-2737] Use earliest instant by default for async compaction and clustering jobs (#3991)
Address review comments Fix test failures Co-authored-by: Sagar Sumit <sagarsumit09@gmail.com>
This commit is contained in:
@@ -274,6 +274,12 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
|
|||||||
return Option.fromJavaOptional(instants.stream().findFirst());
|
return Option.fromJavaOptional(instants.stream().findFirst());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Option<HoodieInstant> firstInstant(String action, State state) {
|
||||||
|
return Option.fromJavaOptional(instants.stream()
|
||||||
|
.filter(s -> action.equals(s.getAction()) && state.equals(s.getState())).findFirst());
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Option<HoodieInstant> nthInstant(int n) {
|
public Option<HoodieInstant> nthInstant(int n) {
|
||||||
if (empty() || n >= countInstants()) {
|
if (empty() || n >= countInstants()) {
|
||||||
|
|||||||
@@ -209,6 +209,13 @@ public interface HoodieTimeline extends Serializable {
|
|||||||
*/
|
*/
|
||||||
Option<HoodieInstant> firstInstant();
|
Option<HoodieInstant> firstInstant();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param action Instant action String.
|
||||||
|
* @param state Instant State.
|
||||||
|
* @return first instant of a specific action and state if available
|
||||||
|
*/
|
||||||
|
Option<HoodieInstant> firstInstant(String action, State state);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return nth completed instant from the first completed instant
|
* @return nth completed instant from the first completed instant
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ import org.apache.hudi.common.testutils.MockHoodieTimeline;
|
|||||||
import org.apache.hudi.common.util.CollectionUtils;
|
import org.apache.hudi.common.util.CollectionUtils;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.exception.HoodieException;
|
import org.apache.hudi.exception.HoodieException;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
@@ -176,6 +177,15 @@ public class TestHoodieActiveTimeline extends HoodieCommonTestHarness {
|
|||||||
assertFalse(timeline.empty());
|
assertFalse(timeline.empty());
|
||||||
assertFalse(timeline.getCommitTimeline().filterPendingExcludingCompaction().empty());
|
assertFalse(timeline.getCommitTimeline().filterPendingExcludingCompaction().empty());
|
||||||
assertEquals(12, timeline.countInstants());
|
assertEquals(12, timeline.countInstants());
|
||||||
|
assertEquals("01", timeline.firstInstant(
|
||||||
|
HoodieTimeline.COMMIT_ACTION, State.COMPLETED).get().getTimestamp());
|
||||||
|
assertEquals("21", timeline.firstInstant(
|
||||||
|
HoodieTimeline.COMMIT_ACTION, State.INFLIGHT).get().getTimestamp());
|
||||||
|
assertFalse(timeline.firstInstant(
|
||||||
|
HoodieTimeline.COMMIT_ACTION, State.REQUESTED).isPresent());
|
||||||
|
assertFalse(timeline.firstInstant(
|
||||||
|
HoodieTimeline.REPLACE_COMMIT_ACTION, State.COMPLETED).isPresent());
|
||||||
|
|
||||||
HoodieTimeline activeCommitTimeline = timeline.getCommitTimeline().filterCompletedInstants();
|
HoodieTimeline activeCommitTimeline = timeline.getCommitTimeline().filterCompletedInstants();
|
||||||
assertEquals(10, activeCommitTimeline.countInstants());
|
assertEquals(10, activeCommitTimeline.countInstants());
|
||||||
|
|
||||||
|
|||||||
@@ -18,11 +18,6 @@
|
|||||||
|
|
||||||
package org.apache.hudi.utilities;
|
package org.apache.hudi.utilities;
|
||||||
|
|
||||||
import com.beust.jcommander.JCommander;
|
|
||||||
import com.beust.jcommander.Parameter;
|
|
||||||
import org.apache.avro.Schema;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hudi.client.SparkRDDWriteClient;
|
import org.apache.hudi.client.SparkRDDWriteClient;
|
||||||
import org.apache.hudi.common.config.TypedProperties;
|
import org.apache.hudi.common.config.TypedProperties;
|
||||||
import org.apache.hudi.common.fs.FSUtils;
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
@@ -36,10 +31,15 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
|
|||||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.common.util.StringUtils;
|
import org.apache.hudi.common.util.StringUtils;
|
||||||
|
import org.apache.hudi.exception.HoodieClusteringException;
|
||||||
import org.apache.hudi.exception.HoodieException;
|
import org.apache.hudi.exception.HoodieException;
|
||||||
|
|
||||||
import org.apache.hudi.table.HoodieSparkTable;
|
import org.apache.hudi.table.HoodieSparkTable;
|
||||||
|
|
||||||
|
import com.beust.jcommander.JCommander;
|
||||||
|
import com.beust.jcommander.Parameter;
|
||||||
|
import org.apache.avro.Schema;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
@@ -53,14 +53,15 @@ import java.util.stream.Collectors;
|
|||||||
|
|
||||||
public class HoodieClusteringJob {
|
public class HoodieClusteringJob {
|
||||||
|
|
||||||
|
public static final String EXECUTE = "execute";
|
||||||
|
public static final String SCHEDULE = "schedule";
|
||||||
|
public static final String SCHEDULE_AND_EXECUTE = "scheduleandexecute";
|
||||||
private static final Logger LOG = LogManager.getLogger(HoodieClusteringJob.class);
|
private static final Logger LOG = LogManager.getLogger(HoodieClusteringJob.class);
|
||||||
private final Config cfg;
|
private final Config cfg;
|
||||||
private transient FileSystem fs;
|
private transient FileSystem fs;
|
||||||
private TypedProperties props;
|
private TypedProperties props;
|
||||||
private final JavaSparkContext jsc;
|
private final JavaSparkContext jsc;
|
||||||
public static final String EXECUTE = "execute";
|
private final HoodieTableMetaClient metaClient;
|
||||||
public static final String SCHEDULE = "schedule";
|
|
||||||
public static final String SCHEDULE_AND_EXECUTE = "scheduleandexecute";
|
|
||||||
|
|
||||||
public HoodieClusteringJob(JavaSparkContext jsc, Config cfg) {
|
public HoodieClusteringJob(JavaSparkContext jsc, Config cfg) {
|
||||||
this.cfg = cfg;
|
this.cfg = cfg;
|
||||||
@@ -68,6 +69,7 @@ public class HoodieClusteringJob {
|
|||||||
this.props = cfg.propsFilePath == null
|
this.props = cfg.propsFilePath == null
|
||||||
? UtilHelpers.buildProperties(cfg.configs)
|
? UtilHelpers.buildProperties(cfg.configs)
|
||||||
: readConfigFromFileSystem(jsc, cfg);
|
: readConfigFromFileSystem(jsc, cfg);
|
||||||
|
this.metaClient = UtilHelpers.createMetaClient(jsc, cfg.basePath, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private TypedProperties readConfigFromFileSystem(JavaSparkContext jsc, Config cfg) {
|
private TypedProperties readConfigFromFileSystem(JavaSparkContext jsc, Config cfg) {
|
||||||
@@ -83,7 +85,9 @@ public class HoodieClusteringJob {
|
|||||||
public String basePath = null;
|
public String basePath = null;
|
||||||
@Parameter(names = {"--table-name", "-tn"}, description = "Table name", required = true)
|
@Parameter(names = {"--table-name", "-tn"}, description = "Table name", required = true)
|
||||||
public String tableName = null;
|
public String tableName = null;
|
||||||
@Parameter(names = {"--instant-time", "-it"}, description = "Clustering Instant time, only need when set --mode execute. "
|
@Parameter(names = {"--instant-time", "-it"}, description = "Clustering Instant time, only used when set --mode execute. "
|
||||||
|
+ "If the instant time is not provided with --mode execute, "
|
||||||
|
+ "the earliest scheduled clustering instant time is used by default. "
|
||||||
+ "When set \"--mode scheduleAndExecute\" this instant-time will be ignored.", required = false)
|
+ "When set \"--mode scheduleAndExecute\" this instant-time will be ignored.", required = false)
|
||||||
public String clusteringInstantTime = null;
|
public String clusteringInstantTime = null;
|
||||||
@Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for hoodie insert", required = false)
|
@Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for hoodie insert", required = false)
|
||||||
@@ -153,10 +157,6 @@ public class HoodieClusteringJob {
|
|||||||
if (StringUtils.isNullOrEmpty(cfg.runningMode)) {
|
if (StringUtils.isNullOrEmpty(cfg.runningMode)) {
|
||||||
cfg.runningMode = cfg.runSchedule ? SCHEDULE : EXECUTE;
|
cfg.runningMode = cfg.runSchedule ? SCHEDULE : EXECUTE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cfg.runningMode.equalsIgnoreCase(EXECUTE) && cfg.clusteringInstantTime == null) {
|
|
||||||
throw new RuntimeException("--instant-time couldn't be null when executing clustering plan.");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int cluster(int retry) {
|
public int cluster(int retry) {
|
||||||
@@ -192,7 +192,6 @@ public class HoodieClusteringJob {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private String getSchemaFromLatestInstant() throws Exception {
|
private String getSchemaFromLatestInstant() throws Exception {
|
||||||
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(jsc.hadoopConfiguration()).setBasePath(cfg.basePath).setLoadActiveTimelineOnLoad(true).build();
|
|
||||||
TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient);
|
TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient);
|
||||||
if (metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants() == 0) {
|
if (metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants() == 0) {
|
||||||
throw new HoodieException("Cannot run clustering without any completed commits");
|
throw new HoodieException("Cannot run clustering without any completed commits");
|
||||||
@@ -204,6 +203,20 @@ public class HoodieClusteringJob {
|
|||||||
private int doCluster(JavaSparkContext jsc) throws Exception {
|
private int doCluster(JavaSparkContext jsc) throws Exception {
|
||||||
String schemaStr = getSchemaFromLatestInstant();
|
String schemaStr = getSchemaFromLatestInstant();
|
||||||
try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
|
try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
|
||||||
|
if (StringUtils.isNullOrEmpty(cfg.clusteringInstantTime)) {
|
||||||
|
// Instant time is not specified
|
||||||
|
// Find the earliest scheduled clustering instant for execution
|
||||||
|
Option<HoodieInstant> firstClusteringInstant =
|
||||||
|
metaClient.getActiveTimeline().firstInstant(
|
||||||
|
HoodieTimeline.REPLACE_COMMIT_ACTION, HoodieInstant.State.REQUESTED);
|
||||||
|
if (firstClusteringInstant.isPresent()) {
|
||||||
|
cfg.clusteringInstantTime = firstClusteringInstant.get().getTimestamp();
|
||||||
|
LOG.info("Found the earliest scheduled clustering instant which will be executed: "
|
||||||
|
+ cfg.clusteringInstantTime);
|
||||||
|
} else {
|
||||||
|
throw new HoodieClusteringException("There is no scheduled clustering in the table.");
|
||||||
|
}
|
||||||
|
}
|
||||||
Option<HoodieCommitMetadata> commitMetadata = client.cluster(cfg.clusteringInstantTime, true).getCommitMetadata();
|
Option<HoodieCommitMetadata> commitMetadata = client.cluster(cfg.clusteringInstantTime, true).getCommitMetadata();
|
||||||
|
|
||||||
return handleErrors(commitMetadata.get(), cfg.clusteringInstantTime);
|
return handleErrors(commitMetadata.get(), cfg.clusteringInstantTime);
|
||||||
@@ -277,5 +290,4 @@ public class HoodieClusteringJob {
|
|||||||
LOG.error(String.format("Import failed with %d errors.", errorsCount));
|
LOG.error(String.format("Import failed with %d errors.", errorsCount));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,9 +21,15 @@ package org.apache.hudi.utilities;
|
|||||||
import org.apache.hudi.client.SparkRDDWriteClient;
|
import org.apache.hudi.client.SparkRDDWriteClient;
|
||||||
import org.apache.hudi.client.WriteStatus;
|
import org.apache.hudi.client.WriteStatus;
|
||||||
import org.apache.hudi.common.config.TypedProperties;
|
import org.apache.hudi.common.config.TypedProperties;
|
||||||
|
import org.apache.hudi.common.fs.ConsistencyGuardConfig;
|
||||||
import org.apache.hudi.common.fs.FSUtils;
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||||
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
|
import org.apache.hudi.common.util.StringUtils;
|
||||||
|
import org.apache.hudi.exception.HoodieCompactionException;
|
||||||
|
|
||||||
import com.beust.jcommander.JCommander;
|
import com.beust.jcommander.JCommander;
|
||||||
import com.beust.jcommander.Parameter;
|
import com.beust.jcommander.Parameter;
|
||||||
@@ -41,6 +47,7 @@ import java.util.List;
|
|||||||
public class HoodieCompactor {
|
public class HoodieCompactor {
|
||||||
|
|
||||||
private static final Logger LOG = LogManager.getLogger(HoodieCompactor.class);
|
private static final Logger LOG = LogManager.getLogger(HoodieCompactor.class);
|
||||||
|
private static ConsistencyGuardConfig consistencyGuardConfig = ConsistencyGuardConfig.newBuilder().build();
|
||||||
private final Config cfg;
|
private final Config cfg;
|
||||||
private transient FileSystem fs;
|
private transient FileSystem fs;
|
||||||
private TypedProperties props;
|
private TypedProperties props;
|
||||||
@@ -67,7 +74,7 @@ public class HoodieCompactor {
|
|||||||
public String basePath = null;
|
public String basePath = null;
|
||||||
@Parameter(names = {"--table-name", "-tn"}, description = "Table name", required = true)
|
@Parameter(names = {"--table-name", "-tn"}, description = "Table name", required = true)
|
||||||
public String tableName = null;
|
public String tableName = null;
|
||||||
@Parameter(names = {"--instant-time", "-it"}, description = "Compaction Instant time", required = true)
|
@Parameter(names = {"--instant-time", "-it"}, description = "Compaction Instant time", required = false)
|
||||||
public String compactionInstantTime = null;
|
public String compactionInstantTime = null;
|
||||||
@Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for hoodie insert", required = true)
|
@Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for hoodie insert", required = true)
|
||||||
public int parallelism = 1;
|
public int parallelism = 1;
|
||||||
@@ -134,6 +141,21 @@ public class HoodieCompactor {
|
|||||||
String schemaStr = UtilHelpers.parseSchema(fs, cfg.schemaFile);
|
String schemaStr = UtilHelpers.parseSchema(fs, cfg.schemaFile);
|
||||||
SparkRDDWriteClient<HoodieRecordPayload> client =
|
SparkRDDWriteClient<HoodieRecordPayload> client =
|
||||||
UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props);
|
UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props);
|
||||||
|
// If no compaction instant is provided by --instant-time, find the earliest scheduled compaction
|
||||||
|
// instant from the active timeline
|
||||||
|
if (StringUtils.isNullOrEmpty(cfg.compactionInstantTime)) {
|
||||||
|
HoodieTableMetaClient metaClient = UtilHelpers.createMetaClient(jsc, cfg.basePath, true);
|
||||||
|
Option<HoodieInstant> firstCompactionInstant =
|
||||||
|
metaClient.getActiveTimeline().firstInstant(
|
||||||
|
HoodieTimeline.COMPACTION_ACTION, HoodieInstant.State.REQUESTED);
|
||||||
|
if (firstCompactionInstant.isPresent()) {
|
||||||
|
cfg.compactionInstantTime = firstCompactionInstant.get().getTimestamp();
|
||||||
|
LOG.info("Found the earliest scheduled compaction instant which will be executed: "
|
||||||
|
+ cfg.compactionInstantTime);
|
||||||
|
} else {
|
||||||
|
throw new HoodieCompactionException("There is no scheduled compaction in the table.");
|
||||||
|
}
|
||||||
|
}
|
||||||
JavaRDD<WriteStatus> writeResponse = client.compact(cfg.compactionInstantTime);
|
JavaRDD<WriteStatus> writeResponse = client.compact(cfg.compactionInstantTime);
|
||||||
return UtilHelpers.handleErrors(jsc, cfg.compactionInstantTime, writeResponse);
|
return UtilHelpers.handleErrors(jsc, cfg.compactionInstantTime, writeResponse);
|
||||||
}
|
}
|
||||||
@@ -142,6 +164,10 @@ public class HoodieCompactor {
|
|||||||
// Get schema.
|
// Get schema.
|
||||||
SparkRDDWriteClient client =
|
SparkRDDWriteClient client =
|
||||||
UtilHelpers.createHoodieClient(jsc, cfg.basePath, "", cfg.parallelism, Option.of(cfg.strategyClassName), props);
|
UtilHelpers.createHoodieClient(jsc, cfg.basePath, "", cfg.parallelism, Option.of(cfg.strategyClassName), props);
|
||||||
|
if (cfg.compactionInstantTime == null) {
|
||||||
|
throw new IllegalArgumentException("No instant time is provided for scheduling compaction. "
|
||||||
|
+ "Please specify the compaction instant time by using --instant-time.");
|
||||||
|
}
|
||||||
client.scheduleCompactionAtInstant(cfg.compactionInstantTime, Option.empty());
|
client.scheduleCompactionAtInstant(cfg.compactionInstantTime, Option.empty());
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -469,6 +469,15 @@ public class UtilHelpers {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static HoodieTableMetaClient createMetaClient(
|
||||||
|
JavaSparkContext jsc, String basePath, boolean shouldLoadActiveTimelineOnLoad) {
|
||||||
|
return HoodieTableMetaClient.builder()
|
||||||
|
.setConf(jsc.hadoopConfiguration())
|
||||||
|
.setBasePath(basePath)
|
||||||
|
.setLoadActiveTimelineOnLoad(shouldLoadActiveTimelineOnLoad)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
@FunctionalInterface
|
@FunctionalInterface
|
||||||
public interface CheckedSupplier<T> {
|
public interface CheckedSupplier<T> {
|
||||||
T get() throws Throwable;
|
T get() throws Throwable;
|
||||||
|
|||||||
@@ -157,11 +157,13 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void setup() throws Exception {
|
public void setup() throws Exception {
|
||||||
super.setup();
|
super.setup();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
@AfterEach
|
@AfterEach
|
||||||
public void teardown() throws Exception {
|
public void teardown() throws Exception {
|
||||||
super.teardown();
|
super.teardown();
|
||||||
@@ -869,18 +871,20 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
|
|||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@ParameterizedTest
|
||||||
public void testHoodieAsyncClusteringJob() throws Exception {
|
@ValueSource(booleans = {true, false})
|
||||||
|
public void testHoodieAsyncClusteringJob(boolean shouldPassInClusteringInstantTime) throws Exception {
|
||||||
String tableBasePath = dfsBasePath + "/asyncClustering";
|
String tableBasePath = dfsBasePath + "/asyncClustering";
|
||||||
|
|
||||||
HoodieDeltaStreamer ds = initialHoodieDeltaStreamer(tableBasePath, 3000, "true");
|
HoodieDeltaStreamer ds = initialHoodieDeltaStreamer(tableBasePath, 3000, "true");
|
||||||
HoodieClusteringJob scheduleClusteringJob = initialHoodieClusteringJob(tableBasePath, null, true, null);
|
|
||||||
|
|
||||||
deltaStreamerTestRunner(ds, (r) -> {
|
deltaStreamerTestRunner(ds, (r) -> {
|
||||||
TestHelpers.assertAtLeastNCommits(2, tableBasePath, dfs);
|
TestHelpers.assertAtLeastNCommits(2, tableBasePath, dfs);
|
||||||
|
|
||||||
Option<String> scheduleClusteringInstantTime = Option.empty();
|
Option<String> scheduleClusteringInstantTime = Option.empty();
|
||||||
try {
|
try {
|
||||||
|
HoodieClusteringJob scheduleClusteringJob =
|
||||||
|
initialHoodieClusteringJob(tableBasePath, null, true, null);
|
||||||
scheduleClusteringInstantTime = scheduleClusteringJob.doSchedule();
|
scheduleClusteringInstantTime = scheduleClusteringJob.doSchedule();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.warn("Schedule clustering failed", e);
|
LOG.warn("Schedule clustering failed", e);
|
||||||
@@ -889,7 +893,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
|
|||||||
if (scheduleClusteringInstantTime.isPresent()) {
|
if (scheduleClusteringInstantTime.isPresent()) {
|
||||||
LOG.info("Schedule clustering success, now cluster with instant time " + scheduleClusteringInstantTime.get());
|
LOG.info("Schedule clustering success, now cluster with instant time " + scheduleClusteringInstantTime.get());
|
||||||
HoodieClusteringJob.Config clusterClusteringConfig = buildHoodieClusteringUtilConfig(tableBasePath,
|
HoodieClusteringJob.Config clusterClusteringConfig = buildHoodieClusteringUtilConfig(tableBasePath,
|
||||||
scheduleClusteringInstantTime.get(), false);
|
shouldPassInClusteringInstantTime ? scheduleClusteringInstantTime.get() : null, false);
|
||||||
HoodieClusteringJob clusterClusteringJob = new HoodieClusteringJob(jsc, clusterClusteringConfig);
|
HoodieClusteringJob clusterClusteringJob = new HoodieClusteringJob(jsc, clusterClusteringConfig);
|
||||||
clusterClusteringJob.cluster(clusterClusteringConfig.retry);
|
clusterClusteringJob.cluster(clusterClusteringConfig.retry);
|
||||||
LOG.info("Cluster success");
|
LOG.info("Cluster success");
|
||||||
@@ -988,7 +992,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@ParameterizedTest
|
@ParameterizedTest
|
||||||
@ValueSource(strings = {"schedule", "execute", "scheduleAndExecute"})
|
@ValueSource(strings = {"execute", "schedule", "scheduleAndExecute"})
|
||||||
public void testHoodieAsyncClusteringJobWithScheduleAndExecute(String runningMode) throws Exception {
|
public void testHoodieAsyncClusteringJobWithScheduleAndExecute(String runningMode) throws Exception {
|
||||||
String tableBasePath = dfsBasePath + "/asyncClustering2";
|
String tableBasePath = dfsBasePath + "/asyncClustering2";
|
||||||
HoodieDeltaStreamer ds = initialHoodieDeltaStreamer(tableBasePath, 3000, "false");
|
HoodieDeltaStreamer ds = initialHoodieDeltaStreamer(tableBasePath, 3000, "false");
|
||||||
@@ -1003,8 +1007,10 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
|
|||||||
LOG.info("Cluster success");
|
LOG.info("Cluster success");
|
||||||
} else {
|
} else {
|
||||||
LOG.warn("Import failed");
|
LOG.warn("Import failed");
|
||||||
|
if (!runningMode.toLowerCase().equals(HoodieClusteringJob.EXECUTE)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.warn("ScheduleAndExecute clustering failed", e);
|
LOG.warn("ScheduleAndExecute clustering failed", e);
|
||||||
exception = e;
|
exception = e;
|
||||||
@@ -1023,8 +1029,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
case HoodieClusteringJob.EXECUTE: {
|
case HoodieClusteringJob.EXECUTE: {
|
||||||
assertNotNull(exception);
|
TestHelpers.assertNoReplaceCommits(0, tableBasePath, dfs);
|
||||||
assertEquals(exception.getMessage(), "--instant-time couldn't be null when executing clustering plan.");
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
|||||||
Reference in New Issue
Block a user