[HUDI-575] Spark Streaming with async compaction support (#1752)
This commit is contained in:
committed by
GitHub
parent
61e027fadd
commit
7a2429f5ba
@@ -151,6 +151,7 @@ public class HoodieJavaApp {
|
||||
.option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY(),
|
||||
nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName()
|
||||
: SimpleKeyGenerator.class.getCanonicalName())
|
||||
.option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_KEY(), "false")
|
||||
// This will remove any existing data at path below, and create a
|
||||
.mode(SaveMode.Overwrite);
|
||||
|
||||
@@ -177,6 +178,7 @@ public class HoodieJavaApp {
|
||||
nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName()
|
||||
: SimpleKeyGenerator.class.getCanonicalName()) // Add Key Extractor
|
||||
.option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, "1")
|
||||
.option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_KEY(), "false")
|
||||
.option(HoodieWriteConfig.TABLE_NAME, tableName).mode(SaveMode.Append);
|
||||
|
||||
updateHiveSyncConfig(writer);
|
||||
@@ -202,6 +204,7 @@ public class HoodieJavaApp {
|
||||
nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName()
|
||||
: SimpleKeyGenerator.class.getCanonicalName()) // Add Key Extractor
|
||||
.option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, "1")
|
||||
.option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_KEY(), "false")
|
||||
.option(HoodieWriteConfig.TABLE_NAME, tableName).mode(SaveMode.Append);
|
||||
|
||||
updateHiveSyncConfig(writer);
|
||||
|
||||
@@ -16,12 +16,18 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hudi.DataSourceReadOptions;
|
||||
import org.apache.hudi.DataSourceWriteOptions;
|
||||
import org.apache.hudi.HoodieDataSourceHelpers;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.TableNotFoundException;
|
||||
import org.apache.hudi.hive.MultiPartKeysValueExtractor;
|
||||
|
||||
import com.beust.jcommander.JCommander;
|
||||
@@ -43,6 +49,7 @@ import java.util.List;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import org.apache.spark.sql.streaming.StreamingQuery;
|
||||
|
||||
import static org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings;
|
||||
|
||||
@@ -52,14 +59,14 @@ import static org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrin
|
||||
public class HoodieJavaStreamingApp {
|
||||
|
||||
@Parameter(names = {"--table-path", "-p"}, description = "path for Hoodie sample table")
|
||||
private String tablePath = "file:///tmp/hoodie/streaming/sample-table";
|
||||
private String tablePath = "/tmp/hoodie/streaming/sample-table";
|
||||
|
||||
@Parameter(names = {"--streaming-source-path", "-ssp"}, description = "path for streaming source file folder")
|
||||
private String streamingSourcePath = "file:///tmp/hoodie/streaming/source";
|
||||
private String streamingSourcePath = "/tmp/hoodie/streaming/source";
|
||||
|
||||
@Parameter(names = {"--streaming-checkpointing-path", "-scp"},
|
||||
description = "path for streaming checking pointing folder")
|
||||
private String streamingCheckpointingPath = "file:///tmp/hoodie/streaming/checkpoint";
|
||||
private String streamingCheckpointingPath = "/tmp/hoodie/streaming/checkpoint";
|
||||
|
||||
@Parameter(names = {"--streaming-duration-in-ms", "-sdm"},
|
||||
description = "time in millisecond for the streaming duration")
|
||||
@@ -106,7 +113,15 @@ public class HoodieJavaStreamingApp {
|
||||
cmd.usage();
|
||||
System.exit(1);
|
||||
}
|
||||
cli.run();
|
||||
int errStatus = 0;
|
||||
try {
|
||||
cli.run();
|
||||
} catch (Exception ex) {
|
||||
LOG.error("Got error running app ", ex);
|
||||
errStatus = -1;
|
||||
} finally {
|
||||
System.exit(errStatus);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -132,38 +147,118 @@ public class HoodieJavaStreamingApp {
|
||||
List<String> records1 = recordsToStrings(dataGen.generateInserts("001", 100));
|
||||
Dataset<Row> inputDF1 = spark.read().json(jssc.parallelize(records1, 2));
|
||||
|
||||
List<String> records2 = recordsToStrings(dataGen.generateUpdates("002", 100));
|
||||
|
||||
List<String> records2 = recordsToStrings(dataGen.generateUpdatesForAllRecords("002"));
|
||||
Dataset<Row> inputDF2 = spark.read().json(jssc.parallelize(records2, 2));
|
||||
|
||||
// setup the input for streaming
|
||||
Dataset<Row> streamingInput = spark.readStream().schema(inputDF1.schema()).json(streamingSourcePath);
|
||||
|
||||
String ckptPath = streamingCheckpointingPath + "/stream1";
|
||||
String srcPath = streamingSourcePath + "/stream1";
|
||||
fs.mkdirs(new Path(ckptPath));
|
||||
fs.mkdirs(new Path(srcPath));
|
||||
|
||||
// setup the input for streaming
|
||||
Dataset<Row> streamingInput = spark.readStream().schema(inputDF1.schema()).json(srcPath + "/*");
|
||||
|
||||
// start streaming and showing
|
||||
ExecutorService executor = Executors.newFixedThreadPool(2);
|
||||
int numInitialCommits = 0;
|
||||
|
||||
// thread for spark strucutured streaming
|
||||
Future<Void> streamFuture = executor.submit(() -> {
|
||||
LOG.info("===== Streaming Starting =====");
|
||||
stream(streamingInput);
|
||||
LOG.info("===== Streaming Ends =====");
|
||||
return null;
|
||||
});
|
||||
try {
|
||||
Future<Void> streamFuture = executor.submit(() -> {
|
||||
LOG.info("===== Streaming Starting =====");
|
||||
stream(streamingInput, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL(), ckptPath);
|
||||
LOG.info("===== Streaming Ends =====");
|
||||
return null;
|
||||
});
|
||||
|
||||
// thread for adding data to the streaming source and showing results over time
|
||||
Future<Void> showFuture = executor.submit(() -> {
|
||||
LOG.info("===== Showing Starting =====");
|
||||
show(spark, fs, inputDF1, inputDF2);
|
||||
LOG.info("===== Showing Ends =====");
|
||||
return null;
|
||||
});
|
||||
// thread for adding data to the streaming source and showing results over time
|
||||
Future<Integer> showFuture = executor.submit(() -> {
|
||||
LOG.info("===== Showing Starting =====");
|
||||
int numCommits = addInputAndValidateIngestion(spark, fs, srcPath,0, 100, inputDF1, inputDF2, true);
|
||||
LOG.info("===== Showing Ends =====");
|
||||
return numCommits;
|
||||
});
|
||||
|
||||
// let the threads run
|
||||
streamFuture.get();
|
||||
showFuture.get();
|
||||
// let the threads run
|
||||
streamFuture.get();
|
||||
numInitialCommits = showFuture.get();
|
||||
} finally {
|
||||
executor.shutdownNow();
|
||||
}
|
||||
|
||||
executor.shutdown();
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jssc.hadoopConfiguration(), tablePath);
|
||||
if (tableType.equals(HoodieTableType.MERGE_ON_READ.name())) {
|
||||
// Ensure we have successfully completed one compaction commit
|
||||
ValidationUtils.checkArgument(metaClient.getActiveTimeline().getCommitTimeline().getInstants().count() == 1);
|
||||
} else {
|
||||
ValidationUtils.checkArgument(metaClient.getActiveTimeline().getCommitTimeline().getInstants().count() >= 1);
|
||||
}
|
||||
|
||||
// Deletes Stream
|
||||
// Need to restart application to ensure spark does not assume there are multiple streams active.
|
||||
spark.close();
|
||||
SparkSession newSpark = SparkSession.builder().appName("Hoodie Spark Streaming APP")
|
||||
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").master("local[1]").getOrCreate();
|
||||
jssc = new JavaSparkContext(newSpark.sparkContext());
|
||||
String ckptPath2 = streamingCheckpointingPath + "/stream2";
|
||||
String srcPath2 = srcPath + "/stream2";
|
||||
fs.mkdirs(new Path(ckptPath2));
|
||||
fs.mkdirs(new Path(srcPath2));
|
||||
Dataset<Row> delStreamingInput = newSpark.readStream().schema(inputDF1.schema()).json(srcPath2 + "/*");
|
||||
List<String> deletes = recordsToStrings(dataGen.generateUniqueUpdates("002", 20));
|
||||
Dataset<Row> inputDF3 = newSpark.read().json(jssc.parallelize(deletes, 2));
|
||||
executor = Executors.newFixedThreadPool(2);
|
||||
|
||||
// thread for spark strucutured streaming
|
||||
try {
|
||||
Future<Void> streamFuture = executor.submit(() -> {
|
||||
LOG.info("===== Streaming Starting =====");
|
||||
stream(delStreamingInput, DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL(), ckptPath2);
|
||||
LOG.info("===== Streaming Ends =====");
|
||||
return null;
|
||||
});
|
||||
|
||||
final int numCommits = numInitialCommits;
|
||||
// thread for adding data to the streaming source and showing results over time
|
||||
Future<Void> showFuture = executor.submit(() -> {
|
||||
LOG.info("===== Showing Starting =====");
|
||||
addInputAndValidateIngestion(newSpark, fs, srcPath2, numCommits, 80, inputDF3, null, false);
|
||||
LOG.info("===== Showing Ends =====");
|
||||
return null;
|
||||
});
|
||||
|
||||
// let the threads run
|
||||
streamFuture.get();
|
||||
showFuture.get();
|
||||
} finally {
|
||||
executor.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
private void waitTillNCommits(FileSystem fs, int numCommits, int timeoutSecs, int sleepSecsAfterEachRun)
|
||||
throws InterruptedException {
|
||||
long beginTime = System.currentTimeMillis();
|
||||
long currTime = beginTime;
|
||||
long timeoutMsecs = timeoutSecs * 1000;
|
||||
|
||||
while ((currTime - beginTime) < timeoutMsecs) {
|
||||
try {
|
||||
HoodieTimeline timeline = HoodieDataSourceHelpers.allCompletedCommitsCompactions(fs, tablePath);
|
||||
LOG.info("Timeline :" + timeline.getInstants().collect(Collectors.toList()));
|
||||
if (timeline.countInstants() >= numCommits) {
|
||||
return;
|
||||
}
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), tablePath, true);
|
||||
System.out.println("Instants :" + metaClient.getActiveTimeline().getInstants().collect(Collectors.toList()));
|
||||
} catch (TableNotFoundException te) {
|
||||
LOG.info("Got table not found exception. Retrying");
|
||||
} finally {
|
||||
Thread.sleep(sleepSecsAfterEachRun * 1000);
|
||||
currTime = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
throw new IllegalStateException("Timedout waiting for " + numCommits + " commits to appear in " + tablePath);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -175,23 +270,40 @@ public class HoodieJavaStreamingApp {
|
||||
* @param inputDF2
|
||||
* @throws Exception
|
||||
*/
|
||||
public void show(SparkSession spark, FileSystem fs, Dataset<Row> inputDF1, Dataset<Row> inputDF2) throws Exception {
|
||||
inputDF1.write().mode(SaveMode.Append).json(streamingSourcePath);
|
||||
public int addInputAndValidateIngestion(SparkSession spark, FileSystem fs, String srcPath,
|
||||
int initialCommits, int expRecords,
|
||||
Dataset<Row> inputDF1, Dataset<Row> inputDF2, boolean instantTimeValidation) throws Exception {
|
||||
inputDF1.write().mode(SaveMode.Append).json(srcPath);
|
||||
|
||||
int numExpCommits = initialCommits + 1;
|
||||
// wait for spark streaming to process one microbatch
|
||||
Thread.sleep(3000);
|
||||
waitTillNCommits(fs, numExpCommits, 180, 3);
|
||||
String commitInstantTime1 = HoodieDataSourceHelpers.latestCommit(fs, tablePath);
|
||||
LOG.info("First commit at instant time :" + commitInstantTime1);
|
||||
|
||||
inputDF2.write().mode(SaveMode.Append).json(streamingSourcePath);
|
||||
// wait for spark streaming to process one microbatch
|
||||
Thread.sleep(3000);
|
||||
String commitInstantTime2 = HoodieDataSourceHelpers.latestCommit(fs, tablePath);
|
||||
LOG.info("Second commit at instant time :" + commitInstantTime2);
|
||||
String commitInstantTime2 = commitInstantTime1;
|
||||
if (null != inputDF2) {
|
||||
numExpCommits += 1;
|
||||
inputDF2.write().mode(SaveMode.Append).json(srcPath);
|
||||
// wait for spark streaming to process one microbatch
|
||||
Thread.sleep(3000);
|
||||
waitTillNCommits(fs, numExpCommits, 180, 3);
|
||||
commitInstantTime2 = HoodieDataSourceHelpers.latestCommit(fs, tablePath);
|
||||
LOG.info("Second commit at instant time :" + commitInstantTime2);
|
||||
}
|
||||
|
||||
if (tableType.equals(HoodieTableType.MERGE_ON_READ.name())) {
|
||||
numExpCommits += 1;
|
||||
// Wait for compaction to also finish and track latest timestamp as commit timestamp
|
||||
waitTillNCommits(fs, numExpCommits, 180, 3);
|
||||
commitInstantTime2 = HoodieDataSourceHelpers.latestCommit(fs, tablePath);
|
||||
LOG.info("Compaction commit at instant time :" + commitInstantTime2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read & do some queries
|
||||
*/
|
||||
Dataset<Row> hoodieROViewDF = spark.read().format("org.apache.hudi")
|
||||
Dataset<Row> hoodieROViewDF = spark.read().format("hudi")
|
||||
// pass any path glob, can include hoodie & non-hoodie
|
||||
// datasets
|
||||
.load(tablePath + "/*/*/*/*");
|
||||
@@ -200,11 +312,24 @@ public class HoodieJavaStreamingApp {
|
||||
// all trips whose fare amount was greater than 2.
|
||||
spark.sql("select fare.amount, begin_lon, begin_lat, timestamp from hoodie_ro where fare.amount > 2.0").show();
|
||||
|
||||
if (instantTimeValidation) {
|
||||
System.out.println("Showing all records. Latest Instant Time =" + commitInstantTime2);
|
||||
spark.sql("select * from hoodie_ro").show(200, false);
|
||||
long numRecordsAtInstant2 =
|
||||
spark.sql("select * from hoodie_ro where _hoodie_commit_time = " + commitInstantTime2).count();
|
||||
ValidationUtils.checkArgument(numRecordsAtInstant2 == expRecords,
|
||||
"Expecting " + expRecords + " records, Got " + numRecordsAtInstant2);
|
||||
}
|
||||
|
||||
long numRecords = spark.sql("select * from hoodie_ro").count();
|
||||
ValidationUtils.checkArgument(numRecords == expRecords,
|
||||
"Expecting " + expRecords + " records, Got " + numRecords);
|
||||
|
||||
if (tableType.equals(HoodieTableType.COPY_ON_WRITE.name())) {
|
||||
/**
|
||||
* Consume incrementally, only changes in commit 2 above. Currently only supported for COPY_ON_WRITE TABLE
|
||||
*/
|
||||
Dataset<Row> hoodieIncViewDF = spark.read().format("org.apache.hudi")
|
||||
Dataset<Row> hoodieIncViewDF = spark.read().format("hudi")
|
||||
.option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL())
|
||||
// Only changes in write 2 above
|
||||
.option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY(), commitInstantTime1)
|
||||
@@ -214,6 +339,7 @@ public class HoodieJavaStreamingApp {
|
||||
LOG.info("You will only see records from : " + commitInstantTime2);
|
||||
hoodieIncViewDF.groupBy(hoodieIncViewDF.col("_hoodie_commit_time")).count().show();
|
||||
}
|
||||
return numExpCommits;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -222,19 +348,23 @@ public class HoodieJavaStreamingApp {
|
||||
* @param streamingInput
|
||||
* @throws Exception
|
||||
*/
|
||||
public void stream(Dataset<Row> streamingInput) throws Exception {
|
||||
public void stream(Dataset<Row> streamingInput, String operationType, String checkpointLocation) throws Exception {
|
||||
|
||||
DataStreamWriter<Row> writer = streamingInput.writeStream().format("org.apache.hudi")
|
||||
.option("hoodie.insert.shuffle.parallelism", "2").option("hoodie.upsert.shuffle.parallelism", "2")
|
||||
.option(DataSourceWriteOptions.OPERATION_OPT_KEY(), operationType)
|
||||
.option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY(), tableType)
|
||||
.option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
|
||||
.option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition")
|
||||
.option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
|
||||
.option(HoodieWriteConfig.TABLE_NAME, tableName).option("checkpointLocation", streamingCheckpointingPath)
|
||||
.option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, "1")
|
||||
.option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_KEY(), "true")
|
||||
.option(HoodieWriteConfig.TABLE_NAME, tableName).option("checkpointLocation", checkpointLocation)
|
||||
.outputMode(OutputMode.Append());
|
||||
|
||||
updateHiveSyncConfig(writer);
|
||||
writer.trigger(new ProcessingTime(500)).start(tablePath).awaitTermination(streamingDurationInMs);
|
||||
StreamingQuery query = writer.trigger(new ProcessingTime(500)).start(tablePath);
|
||||
query.awaitTermination(streamingDurationInMs);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -50,7 +50,8 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers {
|
||||
try {
|
||||
val sqlContext = session.sqlContext
|
||||
val options = Map("path" -> "hoodie/test/path", HoodieWriteConfig.TABLE_NAME -> "hoodie_test_tbl")
|
||||
val e = intercept[HoodieException](HoodieSparkSqlWriter.write(sqlContext, SaveMode.ErrorIfExists, options, session.emptyDataFrame))
|
||||
val e = intercept[HoodieException](HoodieSparkSqlWriter.write(sqlContext, SaveMode.ErrorIfExists, options,
|
||||
session.emptyDataFrame))
|
||||
assert(e.getMessage.contains("spark.serializer"))
|
||||
} finally {
|
||||
session.stop()
|
||||
|
||||
@@ -17,12 +17,16 @@
|
||||
|
||||
package org.apache.hudi.functional
|
||||
|
||||
|
||||
import org.apache.hadoop.fs.{FileSystem, Path}
|
||||
import org.apache.hudi.common.fs.FSUtils
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator
|
||||
import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings
|
||||
import org.apache.hudi.config.HoodieWriteConfig
|
||||
import org.apache.hudi.exception.TableNotFoundException
|
||||
import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers}
|
||||
import org.apache.log4j.LogManager
|
||||
import org.apache.spark.sql._
|
||||
import org.apache.spark.sql.functions.col
|
||||
import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime}
|
||||
@@ -39,6 +43,7 @@ import scala.concurrent.{Await, Future}
|
||||
* Basic tests on the spark datasource
|
||||
*/
|
||||
class TestDataSource {
|
||||
private val log = LogManager.getLogger(getClass)
|
||||
|
||||
var spark: SparkSession = null
|
||||
var dataGen: HoodieTestDataGenerator = null
|
||||
@@ -214,7 +219,7 @@ class TestDataSource {
|
||||
assertEquals(hoodieIncViewDF2.count(), insert2NewKeyCnt)
|
||||
}
|
||||
|
||||
//@Test (TODO: re-enable after fixing noisyness)
|
||||
@Test
|
||||
def testStructuredStreaming(): Unit = {
|
||||
fs.delete(new Path(basePath), true)
|
||||
val sourcePath = basePath + "/source"
|
||||
@@ -254,7 +259,7 @@ class TestDataSource {
|
||||
val f2 = Future {
|
||||
inputDF1.write.mode(SaveMode.Append).json(sourcePath)
|
||||
// wait for spark streaming to process one microbatch
|
||||
Thread.sleep(3000)
|
||||
val currNumCommits = waitTillAtleastNCommits(fs, destPath, 1, 120, 5);
|
||||
assertTrue(HoodieDataSourceHelpers.hasNewCommits(fs, destPath, "000"))
|
||||
val commitInstantTime1: String = HoodieDataSourceHelpers.latestCommit(fs, destPath)
|
||||
// Read RO View
|
||||
@@ -264,9 +269,8 @@ class TestDataSource {
|
||||
|
||||
inputDF2.write.mode(SaveMode.Append).json(sourcePath)
|
||||
// wait for spark streaming to process one microbatch
|
||||
Thread.sleep(10000)
|
||||
waitTillAtleastNCommits(fs, destPath, currNumCommits + 1, 120, 5);
|
||||
val commitInstantTime2: String = HoodieDataSourceHelpers.latestCommit(fs, destPath)
|
||||
|
||||
assertEquals(2, HoodieDataSourceHelpers.listCommitsSince(fs, destPath, "000").size())
|
||||
// Read RO View
|
||||
val hoodieROViewDF2 = spark.read.format("org.apache.hudi")
|
||||
@@ -299,8 +303,35 @@ class TestDataSource {
|
||||
assertEquals(1, countsPerCommit.length)
|
||||
assertEquals(commitInstantTime2, countsPerCommit(0).get(0))
|
||||
}
|
||||
|
||||
Await.result(Future.sequence(Seq(f1, f2)), Duration.Inf)
|
||||
}
|
||||
|
||||
@throws[InterruptedException]
|
||||
private def waitTillAtleastNCommits(fs: FileSystem, tablePath: String,
|
||||
numCommits: Int, timeoutSecs: Int, sleepSecsAfterEachRun: Int): Int = {
|
||||
val beginTime = System.currentTimeMillis
|
||||
var currTime = beginTime
|
||||
val timeoutMsecs = timeoutSecs * 1000
|
||||
var numInstants = 0
|
||||
var success: Boolean = false
|
||||
while ({!success && (currTime - beginTime) < timeoutMsecs}) try {
|
||||
val timeline = HoodieDataSourceHelpers.allCompletedCommitsCompactions(fs, tablePath)
|
||||
log.info("Timeline :" + timeline.getInstants.toArray)
|
||||
if (timeline.countInstants >= numCommits) {
|
||||
numInstants = timeline.countInstants
|
||||
success = true
|
||||
}
|
||||
val metaClient = new HoodieTableMetaClient(fs.getConf, tablePath, true)
|
||||
} catch {
|
||||
case te: TableNotFoundException =>
|
||||
log.info("Got table not found exception. Retrying")
|
||||
} finally {
|
||||
Thread.sleep(sleepSecsAfterEachRun * 1000)
|
||||
currTime = System.currentTimeMillis
|
||||
}
|
||||
if (!success) {
|
||||
throw new IllegalStateException("Timed-out waiting for " + numCommits + " commits to appear in " + tablePath)
|
||||
}
|
||||
numInstants
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user