1
0

Fix Integration test flakiness in HoodieJavaStreamingApp (#1967)

This commit is contained in:
Balaji Varadarajan
2020-08-14 01:42:15 -07:00
committed by GitHub
parent 9bde6d616c
commit b8f4a30efd
5 changed files with 37 additions and 21 deletions

View File

@@ -74,7 +74,7 @@ public class HoodieDataSourceHelpers {
if (metaClient.getTableType().equals(HoodieTableType.MERGE_ON_READ)) {
return metaClient.getActiveTimeline().getTimelineOfActions(
CollectionUtils.createSet(HoodieActiveTimeline.COMMIT_ACTION,
HoodieActiveTimeline.DELTA_COMMIT_ACTION));
HoodieActiveTimeline.DELTA_COMMIT_ACTION)).filterCompletedInstants();
} else {
return metaClient.getCommitTimeline().filterCompletedInstants();
}

View File

@@ -16,6 +16,7 @@
* limitations under the License.
*/
import org.apache.hadoop.fs.Path;
import org.apache.hudi.DataSourceReadOptions;
import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.HoodieDataSourceHelpers;
@@ -120,6 +121,9 @@ public class HoodieJavaApp {
dataGen = new HoodieTestDataGenerator();
}
// Explicitly clear up the hoodie table path if it exists.
fs.delete(new Path(tablePath), true);
/**
* Commit with only inserts
*/

View File

@@ -273,7 +273,9 @@ public class HoodieJavaStreamingApp {
public int addInputAndValidateIngestion(SparkSession spark, FileSystem fs, String srcPath,
int initialCommits, int expRecords,
Dataset<Row> inputDF1, Dataset<Row> inputDF2, boolean instantTimeValidation) throws Exception {
inputDF1.write().mode(SaveMode.Append).json(srcPath);
// Ensure, we always write only one file. This is very important to ensure a single batch is reliably read
// atomically by one iteration of spark streaming.
inputDF1.coalesce(1).write().mode(SaveMode.Append).json(srcPath);
int numExpCommits = initialCommits + 1;
// wait for spark streaming to process one microbatch

View File

@@ -102,7 +102,7 @@ class TestStructuredStreaming extends HoodieClientTestBase {
}
val f2 = Future {
inputDF1.write.mode(SaveMode.Append).json(sourcePath)
inputDF1.coalesce(1).write.mode(SaveMode.Append).json(sourcePath)
// wait for spark streaming to process one microbatch
val currNumCommits = waitTillAtleastNCommits(fs, destPath, 1, 120, 5)
assertTrue(HoodieDataSourceHelpers.hasNewCommits(fs, destPath, "000"))
@@ -112,7 +112,7 @@ class TestStructuredStreaming extends HoodieClientTestBase {
.load(destPath + "/*/*/*/*")
assert(hoodieROViewDF1.count() == 100)
inputDF2.write.mode(SaveMode.Append).json(sourcePath)
inputDF2.coalesce(1).write.mode(SaveMode.Append).json(sourcePath)
// wait for spark streaming to process one microbatch
waitTillAtleastNCommits(fs, destPath, currNumCommits + 1, 120, 5)
val commitInstantTime2 = HoodieDataSourceHelpers.latestCommit(fs, destPath)