1
0

[HUDI-810] Migrate ClientTestHarness to JUnit 5 (#1553)

This commit is contained in:
Raymond Xu
2020-04-28 08:38:16 -07:00
committed by GitHub
parent 6de9f5d9e5
commit 06dae30297
36 changed files with 1232 additions and 1243 deletions

View File

@@ -35,16 +35,16 @@ import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.table.HoodieTable;
import org.apache.spark.api.java.JavaRDD;
import org.junit.Test;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
* Test Cases for rollback of snapshots and commits.
@@ -105,12 +105,12 @@ public class TestClientRollback extends TestHoodieClientBase {
List<HoodieBaseFile> dataFiles = partitionPaths.stream().flatMap(s -> {
return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"));
}).collect(Collectors.toList());
assertEquals("The data files for commit 003 should be present", 3, dataFiles.size());
assertEquals(3, dataFiles.size(), "The data files for commit 003 should be present");
dataFiles = partitionPaths.stream().flatMap(s -> {
return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"));
}).collect(Collectors.toList());
assertEquals("The data files for commit 002 should be present", 3, dataFiles.size());
assertEquals(3, dataFiles.size(), "The data files for commit 002 should be present");
/**
* Write 4 (updates)
@@ -128,15 +128,12 @@ public class TestClientRollback extends TestHoodieClientBase {
final BaseFileOnlyView view2 = table.getBaseFileOnlyView();
dataFiles = partitionPaths.stream().flatMap(s -> view2.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
assertEquals("The data files for commit 004 should be present", 3, dataFiles.size());
assertEquals(3, dataFiles.size(), "The data files for commit 004 should be present");
// rolling back to a non existent savepoint must not succeed
try {
assertThrows(HoodieRollbackException.class, () -> {
client.restoreToSavepoint("001");
fail("Rolling back to non-existent savepoint should not be allowed");
} catch (HoodieRollbackException e) {
// this is good
}
}, "Rolling back to non-existent savepoint should not be allowed");
// rollback to savepoint 002
HoodieInstant savepoint = table.getCompletedSavepointTimeline().getInstants().findFirst().get();
@@ -146,13 +143,13 @@ public class TestClientRollback extends TestHoodieClientBase {
table = HoodieTable.create(metaClient, getConfig(), jsc);
final BaseFileOnlyView view3 = table.getBaseFileOnlyView();
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"))).collect(Collectors.toList());
assertEquals("The data files for commit 002 be available", 3, dataFiles.size());
assertEquals(3, dataFiles.size(), "The data files for commit 002 be available");
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"))).collect(Collectors.toList());
assertEquals("The data files for commit 003 should be rolled back", 0, dataFiles.size());
assertEquals(0, dataFiles.size(), "The data files for commit 003 should be rolled back");
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
assertEquals("The data files for commit 004 should be rolled back", 0, dataFiles.size());
assertEquals(0, dataFiles.size(), "The data files for commit 004 should be rolled back");
}
}
@@ -195,12 +192,9 @@ public class TestClientRollback extends TestHoodieClientBase {
try (HoodieWriteClient client = getHoodieWriteClient(config, false);) {
// Rollback commit 1 (this should fail, since commit2 is still around)
try {
assertThrows(HoodieRollbackException.class, () -> {
client.rollback(commitTime1);
fail("Should have thrown an exception ");
} catch (HoodieRollbackException hrbe) {
// should get here
}
}, "Should have thrown an exception ");
// Rollback commit3
client.rollback(commitTime3);

View File

@@ -36,10 +36,9 @@ import org.apache.hudi.table.action.compact.OperationResult;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.HashSet;
@@ -52,6 +51,9 @@ import java.util.stream.Stream;
import static org.apache.hudi.client.CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation;
import static org.apache.hudi.client.CompactionAdminClient.renameLogFile;
import static org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestCompactionAdminClient extends TestHoodieClientBase {
@@ -60,7 +62,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
private HoodieTableMetaClient metaClient;
private CompactionAdminClient client;
@Before
@BeforeEach
public void setUp() throws Exception {
initPath();
initSparkContexts();
@@ -68,7 +70,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
client = new CompactionAdminClient(jsc, basePath);
}
@After
@AfterEach
public void tearDown() {
client.close();
metaClient = null;
@@ -137,7 +139,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true);
List<ValidationOpResult> result = client.validateCompactionPlan(metaClient, compactionInstant, 1);
if (expNumRepairs > 0) {
Assert.assertTrue("Expect some failures in validation", result.stream().anyMatch(r -> !r.isSuccess()));
assertTrue(result.stream().anyMatch(r -> !r.isSuccess()), "Expect some failures in validation");
}
// Now repair
List<Pair<HoodieLogFile, HoodieLogFile>> undoFiles =
@@ -155,18 +157,18 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
Map<String, String> expRenameFiles = renameFiles.stream()
.collect(Collectors.toMap(p -> p.getLeft().getPath().toString(), x -> x.getRight().getPath().toString()));
if (expNumRepairs > 0) {
Assert.assertFalse("Rename Files must be non-empty", renameFiles.isEmpty());
assertFalse(renameFiles.isEmpty(), "Rename Files must be non-empty");
} else {
Assert.assertTrue("Rename Files must be empty", renameFiles.isEmpty());
assertTrue(renameFiles.isEmpty(), "Rename Files must be empty");
}
expRenameFiles.forEach((key, value) -> LOG.info("Key :" + key + " renamed to " + value + " rolled back to "
+ renameFilesFromUndo.get(key)));
Assert.assertEquals("Undo must completely rollback renames", expRenameFiles, renameFilesFromUndo);
assertEquals(expRenameFiles, renameFilesFromUndo, "Undo must completely rollback renames");
// Now expect validation to succeed
result = client.validateCompactionPlan(metaClient, compactionInstant, 1);
Assert.assertTrue("Expect no failures in validation", result.stream().allMatch(OperationResult::isSuccess));
Assert.assertEquals("Expected Num Repairs", expNumRepairs, undoFiles.size());
assertTrue(result.stream().allMatch(OperationResult::isSuccess), "Expect no failures in validation");
assertEquals(expNumRepairs, undoFiles.size(), "Expected Num Repairs");
}
/**
@@ -178,8 +180,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true);
// Ensure compaction-plan is good to begin with
List<ValidationOpResult> validationResults = client.validateCompactionPlan(metaClient, compactionInstant, 1);
Assert.assertFalse("Some validations failed",
validationResults.stream().anyMatch(v -> !v.isSuccess()));
assertFalse(validationResults.stream().anyMatch(v -> !v.isSuccess()),
"Some validations failed");
}
private void validateRenameFiles(List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles, String ingestionInstant,
@@ -189,8 +191,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
Set<HoodieLogFile> uniqOldLogFiles = new HashSet<>();
renameFiles.forEach(lfPair -> {
Assert.assertFalse("Old Log File Names do not collide", uniqOldLogFiles.contains(lfPair.getKey()));
Assert.assertFalse("New Log File Names do not collide", uniqNewLogFiles.contains(lfPair.getValue()));
assertFalse(uniqOldLogFiles.contains(lfPair.getKey()), "Old Log File Names do not collide");
assertFalse(uniqNewLogFiles.contains(lfPair.getValue()), "New Log File Names do not collide");
uniqOldLogFiles.add(lfPair.getKey());
uniqNewLogFiles.add(lfPair.getValue());
});
@@ -198,17 +200,17 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
renameFiles.forEach(lfPair -> {
HoodieLogFile oldLogFile = lfPair.getLeft();
HoodieLogFile newLogFile = lfPair.getValue();
Assert.assertEquals("Base Commit time is expected", ingestionInstant, newLogFile.getBaseCommitTime());
Assert.assertEquals("Base Commit time is expected", compactionInstant, oldLogFile.getBaseCommitTime());
Assert.assertEquals("File Id is expected", oldLogFile.getFileId(), newLogFile.getFileId());
assertEquals(ingestionInstant, newLogFile.getBaseCommitTime(), "Base Commit time is expected");
assertEquals(compactionInstant, oldLogFile.getBaseCommitTime(), "Base Commit time is expected");
assertEquals(oldLogFile.getFileId(), newLogFile.getFileId(), "File Id is expected");
HoodieLogFile lastLogFileBeforeCompaction =
fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], ingestionInstant)
.filter(fs -> fs.getFileId().equals(oldLogFile.getFileId())).map(fs -> fs.getLogFiles().findFirst().get())
.findFirst().get();
Assert.assertEquals("Log Version expected",
lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(), newLogFile.getLogVersion());
Assert.assertTrue("Log version does not collide",
newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion());
assertEquals(lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(),
newLogFile.getLogVersion(), "Log Version expected");
assertTrue(newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion(),
"Log version does not collide");
});
}
@@ -243,8 +245,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0])
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).flatMap(FileSlice::getLogFiles)
.collect(Collectors.toSet());
Assert.assertEquals("Log files belonging to file-slices created because of compaction request must be renamed",
expLogFilesToBeRenamed, gotLogFilesToBeRenamed);
assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed,
"Log files belonging to file-slices created because of compaction request must be renamed");
if (skipUnSchedule) {
// Do the renaming only but do not touch the compaction plan - Needed for repair tests
@@ -274,9 +276,10 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
// Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true)
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).forEach(fs -> {
Assert.assertFalse("No Data file must be present", fs.getBaseFile().isPresent());
Assert.assertEquals("No Log Files", 0, fs.getLogFiles().count());
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant))
.forEach(fs -> {
assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
assertEquals(0, fs.getLogFiles().count(), "No Log Files");
});
// Ensure same number of log-files before and after renaming per fileId
@@ -286,10 +289,10 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
.map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count()))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Assert.assertEquals("Each File Id has same number of log-files", fileIdToCountsBeforeRenaming,
fileIdToCountsAfterRenaming);
Assert.assertEquals("Not Empty", numEntriesPerInstant, fileIdToCountsAfterRenaming.size());
Assert.assertEquals("Expected number of renames", expNumRenames, renameFiles.size());
assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming,
"Each File Id has same number of log-files");
assertEquals(numEntriesPerInstant, fileIdToCountsAfterRenaming.size(), "Not Empty");
assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
return renameFiles;
}
@@ -315,8 +318,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant))
.filter(fs -> fs.getFileId().equals(op.getFileId())).flatMap(FileSlice::getLogFiles)
.collect(Collectors.toSet());
Assert.assertEquals("Log files belonging to file-slices created because of compaction request must be renamed",
expLogFilesToBeRenamed, gotLogFilesToBeRenamed);
assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed,
"Log files belonging to file-slices created because of compaction request must be renamed");
validateRenameFiles(renameFiles, ingestionInstant, compactionInstant, fsView);
Map<String, Long> fileIdToCountsBeforeRenaming =
@@ -335,9 +338,10 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
// Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true)
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant))
.filter(fs -> fs.getFileId().equals(op.getFileId())).forEach(fs -> {
Assert.assertFalse("No Data file must be present", fs.getBaseFile().isPresent());
Assert.assertEquals("No Log Files", 0, fs.getLogFiles().count());
.filter(fs -> fs.getFileId().equals(op.getFileId()))
.forEach(fs -> {
assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
assertEquals(0, fs.getLogFiles().count(), "No Log Files");
});
// Ensure same number of log-files before and after renaming per fileId
@@ -348,9 +352,9 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
.map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count()))
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
Assert.assertEquals("Each File Id has same number of log-files", fileIdToCountsBeforeRenaming,
fileIdToCountsAfterRenaming);
Assert.assertEquals("Not Empty", 1, fileIdToCountsAfterRenaming.size());
Assert.assertEquals("Expected number of renames", expNumRenames, renameFiles.size());
assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming,
"Each File Id has same number of log-files");
assertEquals(1, fileIdToCountsAfterRenaming.size(), "Not Empty");
assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
}
}

View File

@@ -51,9 +51,8 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.SQLContext;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import java.io.IOException;
import java.util.HashMap;
@@ -64,9 +63,9 @@ import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
* Base Class providing setup/cleanup and utility methods for testing Hoodie Client facing tests.
@@ -75,12 +74,12 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
private static final Logger LOG = LogManager.getLogger(TestHoodieClientBase.class);
@Before
@BeforeEach
public void setUp() throws Exception {
initResources();
}
@After
@AfterEach
public void tearDown() throws Exception {
cleanupResources();
}
@@ -170,7 +169,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
public static void assertNoWriteErrors(List<WriteStatus> statuses) {
// Verify there are no errors
for (WriteStatus status : statuses) {
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
assertFalse(status.hasErrors(), "Errors found in write of " + status.getFileId());
}
}
@@ -200,7 +199,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
assertTrue(HoodiePartitionMetadata.hasPartitionMetadata(fs, new Path(basePath, partitionPath)));
HoodiePartitionMetadata pmeta = new HoodiePartitionMetadata(fs, new Path(basePath, partitionPath));
pmeta.readFromFS();
Assert.assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_DEPTH, pmeta.getPartitionDepth());
assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_DEPTH, pmeta.getPartitionDepth());
}
}
@@ -212,9 +211,9 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
*/
protected void checkTaggedRecords(List<HoodieRecord> taggedRecords, String instantTime) {
for (HoodieRecord rec : taggedRecords) {
assertTrue("Record " + rec + " found with no location.", rec.isCurrentLocationKnown());
assertEquals("All records should have commit time " + instantTime + ", since updates were made",
rec.getCurrentLocation().getInstantTime(), instantTime);
assertTrue(rec.isCurrentLocationKnown(), "Record " + rec + " found with no location.");
assertEquals(rec.getCurrentLocation().getInstantTime(), instantTime,
"All records should have commit time " + instantTime + ", since updates were made");
}
}
@@ -231,7 +230,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
if (!partitionToKeys.containsKey(partitionPath)) {
partitionToKeys.put(partitionPath, new HashSet<>());
}
assertFalse("key " + key + " is duplicate within partition " + partitionPath, partitionToKeys.get(partitionPath).contains(key));
assertFalse(partitionToKeys.get(partitionPath).contains(key), "key " + key + " is duplicate within partition " + partitionPath);
partitionToKeys.get(partitionPath).add(key);
}
}
@@ -472,30 +471,30 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
if (assertForCommit) {
assertEquals("Expecting " + expTotalCommits + " commits.", expTotalCommits,
timeline.findInstantsAfter(initCommitTime, Integer.MAX_VALUE).countInstants());
Assert.assertEquals("Latest commit should be " + newCommitTime, newCommitTime,
timeline.lastInstant().get().getTimestamp());
assertEquals("Must contain " + expRecordsInThisCommit + " records", expRecordsInThisCommit,
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
assertEquals(expTotalCommits, timeline.findInstantsAfter(initCommitTime, Integer.MAX_VALUE).countInstants(),
"Expecting " + expTotalCommits + " commits.");
assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(),
"Latest commit should be " + newCommitTime);
assertEquals(expRecordsInThisCommit, HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
"Must contain " + expRecordsInThisCommit + " records");
// Check the entire dataset has all records still
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
}
assertEquals("Must contain " + expTotalRecords + " records", expTotalRecords,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
assertEquals(expTotalRecords, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
"Must contain " + expTotalRecords + " records");
// Check that the incremental consumption from prevCommitTime
assertEquals("Incremental consumption from " + prevCommitTime + " should give all records in latest commit",
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count());
assertEquals(HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count(),
"Incremental consumption from " + prevCommitTime + " should give all records in latest commit");
if (commitTimesBetweenPrevAndNew.isPresent()) {
commitTimesBetweenPrevAndNew.get().forEach(ct -> {
assertEquals("Incremental consumption from " + ct + " should give all records in latest commit",
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, ct).count());
assertEquals(HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, ct).count(),
"Incremental consumption from " + ct + " should give all records in latest commit");
});
}
}
@@ -540,26 +539,26 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
if (assertForCommit) {
assertEquals("Expecting 3 commits.", 3,
timeline.findInstantsAfter(initCommitTime, Integer.MAX_VALUE).countInstants());
Assert.assertEquals("Latest commit should be " + newCommitTime, newCommitTime,
timeline.lastInstant().get().getTimestamp());
assertEquals("Must contain " + expRecordsInThisCommit + " records", expRecordsInThisCommit,
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
assertEquals(3, timeline.findInstantsAfter(initCommitTime, Integer.MAX_VALUE).countInstants(),
"Expecting 3 commits.");
assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(),
"Latest commit should be " + newCommitTime);
assertEquals(expRecordsInThisCommit, HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
"Must contain " + expRecordsInThisCommit + " records");
// Check the entire dataset has all records still
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
}
assertEquals("Must contain " + expTotalRecords + " records", expTotalRecords,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
assertEquals(expTotalRecords, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
"Must contain " + expTotalRecords + " records");
// Check that the incremental consumption from prevCommitTime
assertEquals("Incremental consumption from " + prevCommitTime + " should give no records in latest commit,"
+ " since it is a delete operation",
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count());
assertEquals(HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count(),
"Incremental consumption from " + prevCommitTime + " should give no records in latest commit,"
+ " since it is a delete operation");
}
return result;
}

View File

@@ -18,7 +18,6 @@
package org.apache.hudi.client;
import java.util.HashSet;
import org.apache.hudi.common.HoodieClientTestUtils;
import org.apache.hudi.common.HoodieTestDataGenerator;
import org.apache.hudi.common.TestRawTripPayload;
@@ -49,15 +48,14 @@ import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.index.HoodieIndex.IndexType;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.commit.WriteHelper;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.table.action.commit.WriteHelper;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
import org.junit.Assert;
import org.junit.Test;
import org.junit.jupiter.api.Test;
import java.io.FileInputStream;
import java.io.IOException;
@@ -65,6 +63,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -75,10 +74,10 @@ import static org.apache.hudi.common.HoodieTestDataGenerator.NULL_SCHEMA;
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
import static org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.VERSION_0;
import static org.apache.hudi.common.util.ParquetUtils.readRowKeysFromParquet;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -154,11 +153,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
JavaRDD<WriteStatus> result = insertFirstBatch(cfg, client, newCommitTime, prevCommitTime, numRecords, writeFn,
isPrepped, false, numRecords);
assertFalse("If Autocommit is false, then commit should not be made automatically",
HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
assertTrue("Commit should succeed", client.commit(newCommitTime, result));
assertTrue("After explicit commit, commit file should be created",
HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
assertFalse(HoodieTestUtils.doesCommitExist(basePath, newCommitTime),
"If Autocommit is false, then commit should not be made automatically");
assertTrue(client.commit(newCommitTime, result), "Commit should succeed");
assertTrue(HoodieTestUtils.doesCommitExist(basePath, newCommitTime),
"After explicit commit, commit file should be created");
}
}
@@ -251,7 +250,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
if (!partitionToKeys.containsKey(partitionPath)) {
partitionToKeys.put(partitionPath, new HashSet<>());
}
assertFalse("key " + key + " is duplicate within partition " + partitionPath, partitionToKeys.get(partitionPath).contains(key));
assertFalse(partitionToKeys.get(partitionPath).contains(key), "key " + key + " is duplicate within partition " + partitionPath);
partitionToKeys.get(partitionPath).add(key);
}
}
@@ -326,8 +325,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
}
assertEquals("Must contain " + 200 + " records", 200,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
assertEquals(200, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
"Must contain " + 200 + " records");
// Perform Delete again on upgraded dataset.
prevCommitTime = newCommitTime;
@@ -340,17 +339,17 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
HoodieActiveTimeline activeTimeline = new HoodieActiveTimeline(metaClient, false);
List<HoodieInstant> instants = activeTimeline.getCommitTimeline().getInstants().collect(Collectors.toList());
Assert.assertEquals(5, instants.size());
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "001"),
assertEquals(5, instants.size());
assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "001"),
instants.get(0));
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "004"),
assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "004"),
instants.get(1));
// New Format should have all states of instants
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "006"),
assertEquals(new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "006"),
instants.get(2));
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "006"),
assertEquals(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "006"),
instants.get(3));
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "006"),
assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "006"),
instants.get(4));
}
@@ -425,8 +424,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
}
assertEquals("Must contain 100 records", 100,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
assertEquals(100, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
"Must contain 100 records");
/**
* Write 2. Updates with different partition
@@ -448,8 +447,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
}
assertEquals("Must contain 100 records", 100,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
assertEquals(100, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
"Must contain 100 records");
}
/**
@@ -476,12 +475,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
assertNoWriteErrors(statuses);
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
String file1 = statuses.get(0).getFileId();
Assert.assertEquals("file should contain 100 records",
assertEquals(100,
readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath()))
.size(),
100);
.size(), "file should contain 100 records");
// Update + Inserts such that they just expand file1
String commitTime2 = "002";
@@ -496,18 +494,18 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
statuses = client.upsert(insertAndUpdatesRDD2, commitTime2).collect();
assertNoWriteErrors(statuses);
assertEquals("Just 1 file needs to be updated.", 1, statuses.size());
assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit());
assertEquals(1, statuses.size(), "Just 1 file needs to be updated.");
assertEquals(file1, statuses.get(0).getFileId(), "Existing file should be expanded");
assertEquals(commitTime1, statuses.get(0).getStat().getPrevCommit(), "Existing file should be expanded");
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
assertEquals("file should contain 140 records", readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
140);
assertEquals(140, readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
"file should contain 140 records");
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
for (GenericRecord record : records) {
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
assertEquals("only expect commit2", commitTime2, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
assertTrue("key expected to be part of commit2", keys2.contains(recordKey) || keys1.contains(recordKey));
assertEquals(commitTime2, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString(), "only expect commit2");
assertTrue(keys2.contains(recordKey) || keys1.contains(recordKey), "key expected to be part of commit2");
}
// update + inserts such that file1 is updated and expanded, a new file2 is created.
@@ -522,7 +520,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
statuses = client.upsert(insertAndUpdatesRDD3, commitTime3).collect();
assertNoWriteErrors(statuses);
assertEquals("2 files needs to be committed.", 2, statuses.size());
assertEquals(2, statuses.size(), "2 files needs to be committed.");
HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
HoodieTable table = getHoodieTable(metadata, config);
@@ -533,7 +531,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
int numTotalUpdatesInCommit3 = 0;
for (HoodieBaseFile file : files) {
if (file.getFileName().contains(file1)) {
assertEquals("Existing file should be expanded", commitTime3, file.getCommitTime());
assertEquals(commitTime3, file.getCommitTime(), "Existing file should be expanded");
records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath()));
for (GenericRecord record : records) {
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
@@ -547,21 +545,21 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
}
}
}
assertEquals("All keys added in commit 2 must be updated in commit3 correctly", 0, keys2.size());
assertEquals(0, keys2.size(), "All keys added in commit 2 must be updated in commit3 correctly");
} else {
assertEquals("New file must be written for commit 3", commitTime3, file.getCommitTime());
assertEquals(commitTime3, file.getCommitTime(), "New file must be written for commit 3");
records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath()));
for (GenericRecord record : records) {
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
assertEquals("only expect commit3", commitTime3,
record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
assertTrue("key expected to be part of commit3", keys3.contains(recordKey));
assertEquals(commitTime3, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString(),
"only expect commit3");
assertTrue(keys3.contains(recordKey), "key expected to be part of commit3");
}
numTotalInsertsInCommit3 += records.size();
}
}
assertEquals("Total updates in commit3 must add up", inserts2.size(), numTotalUpdatesInCommit3);
assertEquals("Total inserts in commit3 must add up", keys3.size(), numTotalInsertsInCommit3);
assertEquals(numTotalUpdatesInCommit3, inserts2.size(), "Total updates in commit3 must add up");
assertEquals(numTotalInsertsInCommit3, keys3.size(), "Total inserts in commit3 must add up");
}
/**
@@ -588,12 +586,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
assertNoWriteErrors(statuses);
assertPartitionMetadata(new String[] {testPartitionPath}, fs);
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
String file1 = statuses.get(0).getFileId();
assertEquals("file should contain 100 records",
assertEquals(100,
readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath()))
.size(),
100);
.size(), "file should contain 100 records");
// Second, set of Inserts should just expand file1
String commitTime2 = "002";
@@ -604,21 +601,21 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
statuses = client.insert(insertRecordsRDD2, commitTime2).collect();
assertNoWriteErrors(statuses);
assertEquals("Just 1 file needs to be updated.", 1, statuses.size());
assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit());
assertEquals(1, statuses.size(), "Just 1 file needs to be updated.");
assertEquals(file1, statuses.get(0).getFileId(), "Existing file should be expanded");
assertEquals(commitTime1, statuses.get(0).getStat().getPrevCommit(), "Existing file should be expanded");
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
assertEquals("file should contain 140 records", readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
140);
assertEquals(140, readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
"file should contain 140 records");
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
for (GenericRecord record : records) {
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String recCommitTime = record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
assertTrue("Record expected to be part of commit 1 or commit2",
commitTime1.equals(recCommitTime) || commitTime2.equals(recCommitTime));
assertTrue("key expected to be part of commit 1 or commit2",
keys2.contains(recordKey) || keys1.contains(recordKey));
assertTrue(commitTime1.equals(recCommitTime) || commitTime2.equals(recCommitTime),
"Record expected to be part of commit 1 or commit2");
assertTrue(keys2.contains(recordKey) || keys1.contains(recordKey),
"key expected to be part of commit 1 or commit2");
}
// Lots of inserts such that file1 is updated and expanded, a new file2 is created.
@@ -628,22 +625,22 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
JavaRDD<HoodieRecord> insertRecordsRDD3 = jsc.parallelize(insert3, 1);
statuses = client.insert(insertRecordsRDD3, commitTime3).collect();
assertNoWriteErrors(statuses);
assertEquals("2 files needs to be committed.", 2, statuses.size());
assertEquals(2, statuses.size(), "2 files needs to be committed.");
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
HoodieTable table = getHoodieTable(metaClient, config);
List<HoodieBaseFile> files = table.getBaseFileOnlyView()
.getLatestBaseFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList());
assertEquals("Total of 2 valid data files", 2, files.size());
assertEquals(2, files.size(), "Total of 2 valid data files");
int totalInserts = 0;
for (HoodieBaseFile file : files) {
assertEquals("All files must be at commit 3", commitTime3, file.getCommitTime());
assertEquals(commitTime3, file.getCommitTime(), "All files must be at commit 3");
records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath()));
totalInserts += records.size();
}
assertEquals("Total number of records must add up", totalInserts,
inserts1.size() + inserts2.size() + insert3.size());
assertEquals(totalInserts, inserts1.size() + inserts2.size() + insert3.size(),
"Total number of records must add up");
}
/**
@@ -670,12 +667,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
assertNoWriteErrors(statuses);
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
String file1 = statuses.get(0).getFileId();
Assert.assertEquals("file should contain 100 records",
assertEquals(100,
readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath()))
.size(),
100);
.size(), "file should contain 100 records");
// Delete 20 among 100 inserted
testDeletes(client, inserts1, 20, file1, "002", 80, keysSoFar);
@@ -701,15 +697,16 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
JavaRDD<HoodieKey> deleteKeys3 = jsc.parallelize(hoodieKeysToDelete3, 1);
statuses = client.delete(deleteKeys3, commitTime6).collect();
assertNoWriteErrors(statuses);
assertEquals("Just 0 write status for delete.", 0, statuses.size());
assertEquals(0, statuses.size(), "Just 0 write status for delete.");
// Check the entire dataset has all records still
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
}
assertEquals("Must contain " + 150 + " records", 150,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
assertEquals(150,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
"Must contain " + 150 + " records");
// delete another batch. previous delete commit should have persisted the schema. If not,
// this will throw exception
@@ -735,8 +732,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
}
assertEquals("Must contain " + expectedTotalRecords + " records", expectedTotalRecords,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
assertEquals(expectedTotalRecords,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
"Must contain " + expectedTotalRecords + " records");
return Pair.of(keys, inserts);
}
@@ -751,26 +749,28 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
assertNoWriteErrors(statuses);
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
assertEquals("Existing file should be expanded", existingFile, statuses.get(0).getFileId());
assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
assertEquals(existingFile, statuses.get(0).getFileId(), "Existing file should be expanded");
// Check the entire dataset has all records still
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
}
assertEquals("Must contain " + exepctedRecords + " records", exepctedRecords,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
assertEquals(exepctedRecords,
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
"Must contain " + exepctedRecords + " records");
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
assertEquals("file should contain 110 records", readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
exepctedRecords);
assertEquals(exepctedRecords,
readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
"file should contain 110 records");
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
for (GenericRecord record : records) {
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
assertTrue("key expected to be part of " + instantTime, keys.contains(recordKey));
assertFalse("Key deleted", hoodieKeysToDelete.contains(recordKey));
assertTrue(keys.contains(recordKey), "key expected to be part of " + instantTime);
assertFalse(hoodieKeysToDelete.contains(recordKey), "Key deleted");
}
}
@@ -795,12 +795,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
List<HoodieKey> hoodieKeysToDelete = HoodieClientTestUtils
.getKeysToDelete(HoodieClientTestUtils.getHoodieKeys(dummyInserts), 20);
JavaRDD<HoodieKey> deleteKeys = jsc.parallelize(hoodieKeysToDelete, 1);
try {
assertThrows(HoodieIOException.class, () -> {
client.delete(deleteKeys, commitTime1).collect();
fail("Should have thrown Exception");
} catch (HoodieIOException e) {
// ignore
}
}, "Should have thrown Exception");
}
/**
@@ -822,9 +819,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, result));
assertTrue("After explicit commit, commit file should be created",
HoodieTestUtils.doesCommitExist(basePath, instantTime));
assertTrue(client.commit(instantTime, result), "Commit should succeed");
assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime),
"After explicit commit, commit file should be created");
// Get parquet file paths from commit metadata
String actionType = metaClient.getCommitActionType();
@@ -868,9 +865,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, result));
assertTrue("After explicit commit, commit file should be created",
HoodieTestUtils.doesCommitExist(basePath, instantTime));
assertTrue(client.commit(instantTime, result), "Commit should succeed");
assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime),
"After explicit commit, commit file should be created");
// Read from commit file
String filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime);
@@ -888,7 +885,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
inserts += stat.getValue().getInserts();
}
}
Assert.assertEquals(inserts, 200);
assertEquals(200, inserts);
// Update + Inserts such that they just expand file1
instantTime = "001";
@@ -898,9 +895,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
writeRecords = jsc.parallelize(records, 1);
result = client.upsert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, result));
assertTrue("After explicit commit, commit file should be created",
HoodieTestUtils.doesCommitExist(basePath, instantTime));
assertTrue(client.commit(instantTime, result), "Commit should succeed");
assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime),
"After explicit commit, commit file should be created");
// Read from commit file
filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime);
@@ -919,8 +916,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
upserts += stat.getValue().getUpserts();
}
}
Assert.assertEquals(inserts, 200);
Assert.assertEquals(upserts, 200);
assertEquals(200, inserts);
assertEquals(200, upserts);
}
@@ -937,9 +934,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
// Delete orphan marker and commit should succeed
metaClient.getFs().delete(result.getKey(), false);
assertTrue("Commit should succeed", client.commit(instantTime, result.getRight()));
assertTrue("After explicit commit, commit file should be created",
HoodieTestUtils.doesCommitExist(basePath, instantTime));
assertTrue(client.commit(instantTime, result.getRight()), "Commit should succeed");
assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime),
"After explicit commit, commit file should be created");
// Marker directory must be removed
assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime))));
}
@@ -954,8 +951,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
// Rollback of this commit should succeed
client.rollback(instantTime);
assertFalse("After explicit rollback, commit file should not be present",
HoodieTestUtils.doesCommitExist(basePath, instantTime));
assertFalse(HoodieTestUtils.doesCommitExist(basePath, instantTime),
"After explicit rollback, commit file should not be present");
// Marker directory must be removed after rollback
assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime))));
}
@@ -984,12 +981,10 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
metaClient.getFs().create(markerFilePath);
LOG.info("Created a dummy marker path=" + markerFilePath);
try {
Exception e = assertThrows(HoodieCommitException.class, () -> {
client.commit(instantTime, result);
fail("Commit should fail due to consistency check");
} catch (HoodieCommitException cme) {
assertTrue(cme.getCause() instanceof HoodieIOException);
}
}, "Commit should fail due to consistency check");
assertTrue(e.getCause() instanceof HoodieIOException);
return Pair.of(markerFilePath, result);
}

View File

@@ -28,7 +28,7 @@ import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.junit.Test;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.Arrays;
@@ -36,7 +36,8 @@ import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
@SuppressWarnings("unchecked")
/**
@@ -79,11 +80,13 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
});
}
@Test(expected = IllegalStateException.class)
@Test
public void testReadROViewFailsWithoutSqlContext() {
HoodieReadClient readClient = new HoodieReadClient(jsc, getConfig());
JavaRDD<HoodieKey> recordsRDD = jsc.parallelize(new ArrayList<>(), 1);
readClient.readROView(recordsRDD, 1);
assertThrows(IllegalStateException.class, () -> {
readClient.readROView(recordsRDD, 1);
});
}
/**
@@ -131,14 +134,11 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
assertEquals(75, rows.count());
JavaRDD<HoodieKey> keysWithoutPaths = keyToPathPair.filter(keyPath -> !keyPath._2.isPresent())
.map(keyPath -> keyPath._1);
.map(keyPath -> keyPath._1);
try {
assertThrows(AnalysisException.class, () -> {
anotherReadClient.readROView(keysWithoutPaths, 1);
} catch (Exception e) {
// data frame reader throws exception for empty records. ignore the error.
assertEquals(e.getClass(), AnalysisException.class);
}
});
// Actual tests of getPendingCompactions method are in TestAsyncCompaction
// This is just testing empty list

View File

@@ -39,13 +39,13 @@ import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.List;
import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestMultiFS extends HoodieClientTestHarness {
@@ -54,14 +54,14 @@ public class TestMultiFS extends HoodieClientTestHarness {
protected String tableName = "hoodie_rt";
private String tableType = HoodieTableType.COPY_ON_WRITE.name();
@Before
@BeforeEach
public void setUp() throws Exception {
initSparkContexts();
initDFS();
initTestDataGenerator();
}
@After
@AfterEach
public void tearDown() throws Exception {
cleanupSparkContexts();
cleanupDFS();
@@ -103,7 +103,7 @@ public class TestMultiFS extends HoodieClientTestHarness {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), dfsBasePath);
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
Dataset<Row> readRecords = HoodieClientTestUtils.readCommit(dfsBasePath, sqlContext, timeline, readCommitTime);
assertEquals("Should contain 100 records", readRecords.count(), records.size());
assertEquals(readRecords.count(), records.size(), "Should contain 100 records");
// Write to local
HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), tablePath, HoodieTableType.valueOf(tableType),
@@ -122,7 +122,7 @@ public class TestMultiFS extends HoodieClientTestHarness {
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
Dataset<Row> localReadRecords =
HoodieClientTestUtils.readCommit(tablePath, sqlContext, timeline, writeCommitTime);
assertEquals("Should contain 100 records", localReadRecords.count(), localRecords.size());
assertEquals(localReadRecords.count(), localRecords.size(), "Should contain 100 records");
}
}
}

View File

@@ -18,8 +18,6 @@
package org.apache.hudi.client;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.HoodieClientTestUtils;
import org.apache.hudi.common.HoodieTestDataGenerator;
@@ -36,9 +34,12 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieInsertException;
import org.apache.hudi.exception.HoodieUpsertException;
import org.apache.hudi.index.HoodieIndex.IndexType;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.List;
@@ -51,10 +52,10 @@ import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_SCHEMA_PREFIX;
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_SCHEMA_SUFFIX;
import static org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.VERSION_1;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
public class TestTableSchemaEvolution extends TestHoodieClientBase {
private final String initCommitTime = "000";
@@ -73,60 +74,60 @@ public class TestTableSchemaEvolution extends TestHoodieClientBase {
public static final String TRIP_EXAMPLE_SCHEMA_DEVOLVED = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA + FARE_NESTED_SCHEMA
+ TRIP_SCHEMA_SUFFIX;
@Before
@BeforeEach
public void setUp() throws Exception {
initResources();
}
@After
@AfterEach
public void tearDown() {
cleanupSparkContexts();
}
@Test
public void testSchemaCompatibilityBasic() throws Exception {
assertTrue("Same schema is compatible",
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA));
assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA),
"Same schema is compatible");
String reorderedSchema = TRIP_SCHEMA_PREFIX + TIP_NESTED_SCHEMA + FARE_NESTED_SCHEMA
String reorderedSchema = TRIP_SCHEMA_PREFIX + TIP_NESTED_SCHEMA + FARE_NESTED_SCHEMA
+ MAP_TYPE_SCHEMA + TRIP_SCHEMA_SUFFIX;
assertTrue("Reordered fields are compatible",
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, reorderedSchema));
assertTrue("Reordered fields are compatible",
TableSchemaResolver.isSchemaCompatible(reorderedSchema, TRIP_EXAMPLE_SCHEMA));
assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, reorderedSchema),
"Reordered fields are compatible");
assertTrue(TableSchemaResolver.isSchemaCompatible(reorderedSchema, TRIP_EXAMPLE_SCHEMA),
"Reordered fields are compatible");
String renamedSchema = TRIP_EXAMPLE_SCHEMA.replace("tip_history", "tip_future");
assertFalse("Renamed fields are not compatible",
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, renamedSchema));
assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, renamedSchema),
"Renamed fields are not compatible");
assertFalse("Deleted single field is not compatible",
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA_DEVOLVED));
String deletedMultipleFieldSchema = TRIP_SCHEMA_PREFIX + TIP_NESTED_SCHEMA + TRIP_SCHEMA_SUFFIX;
assertFalse("Deleted multiple fields are not compatible",
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, deletedMultipleFieldSchema));
assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA_DEVOLVED),
"Deleted single field is not compatible");
String deletedMultipleFieldSchema = TRIP_SCHEMA_PREFIX + TIP_NESTED_SCHEMA + TRIP_SCHEMA_SUFFIX;
assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, deletedMultipleFieldSchema),
"Deleted multiple fields are not compatible");
String renamedRecordSchema = TRIP_EXAMPLE_SCHEMA.replace("triprec", "triprec_renamed");
assertFalse("Renamed record name is not compatible",
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, renamedRecordSchema));
assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, renamedRecordSchema),
"Renamed record name is not compatible");
String swappedFieldSchema = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA.replace("city_to_state", "fare")
+ FARE_NESTED_SCHEMA.replace("fare", "city_to_state") + TIP_NESTED_SCHEMA + TRIP_SCHEMA_SUFFIX;
assertFalse("Swapped fields are not compatible",
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, swappedFieldSchema));
assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, swappedFieldSchema),
"Swapped fields are not compatible");
String typeChangeSchema = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA + FARE_NESTED_SCHEMA
+ TIP_NESTED_SCHEMA.replace("string", "boolean") + TRIP_SCHEMA_SUFFIX;
assertFalse("Field type change is not compatible",
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, typeChangeSchema));
assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, typeChangeSchema),
"Field type change is not compatible");
assertTrue("Added field with default is compatible (Evolved Schema)",
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA_EVOLVED));
assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA_EVOLVED),
"Added field with default is compatible (Evolved Schema)");
String multipleAddedFieldSchema = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA + FARE_NESTED_SCHEMA
+ TIP_NESTED_SCHEMA + EXTRA_FIELD_SCHEMA + EXTRA_FIELD_SCHEMA.replace("new_field", "new_new_field")
+ TRIP_SCHEMA_SUFFIX;
assertTrue("Multiple added fields with defauls are compatible",
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, multipleAddedFieldSchema));
assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, multipleAddedFieldSchema),
"Multiple added fields with defauls are compatible");
}
@Test

View File

@@ -31,26 +31,26 @@ import org.apache.hudi.common.util.ParquetUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.io.HoodieCreateHandle;
import org.apache.hudi.io.HoodieMergeHandle;
import org.apache.hudi.table.HoodieTable;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.table.HoodieTable;
import org.apache.parquet.avro.AvroReadSupport;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.junit.Assert.fail;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
@Before
@BeforeEach
public void setUp() throws Exception {
initPath();
HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath);
@@ -58,7 +58,7 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
initFileSystem();
}
@After
@AfterEach
public void tearDown() {
cleanupSparkContexts();
}
@@ -103,7 +103,7 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
String fileId = insertResult.getFileId();
final HoodieTable table2 = HoodieTable.create(config2, jsc);
Assert.assertEquals(1, jsc.parallelize(Arrays.asList(1)).map(x -> {
assertEquals(1, jsc.parallelize(Arrays.asList(1)).map(x -> {
// New content with values for the newly added field
String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12,\"added_field\":1}";
@@ -116,9 +116,9 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
record1.seal();
updateRecords.add(record1);
try {
assertDoesNotThrow(() -> {
HoodieMergeHandle mergeHandle = new HoodieMergeHandle(config2, "101", table2,
updateRecords.iterator(), record1.getPartitionPath(), fileId, supplier);
updateRecords.iterator(), record1.getPartitionPath(), fileId, supplier);
Configuration conf = new Configuration();
AvroReadSupport.setAvroReadSchema(conf, mergeHandle.getWriterSchema());
List<GenericRecord> oldRecords = ParquetUtils.readAvroRecords(conf,
@@ -127,10 +127,9 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
mergeHandle.write(rec);
}
mergeHandle.close();
} catch (ClassCastException e) {
fail("UpdateFunction could not read records written with exampleSchema.txt using the "
+ "exampleEvolvedSchema.txt");
}
}, "UpdateFunction could not read records written with exampleSchema.txt using the "
+ "exampleEvolvedSchema.txt");
return 1;
}).collect().size());
}

View File

@@ -23,6 +23,7 @@ import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.minicluster.HdfsTestService;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.testutils.HoodieCommonTestHarnessJunit5;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -44,7 +45,7 @@ import java.util.concurrent.atomic.AtomicInteger;
/**
* The test harness for resource initialization and cleanup.
*/
public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness implements Serializable {
public abstract class HoodieClientTestHarness extends HoodieCommonTestHarnessJunit5 implements Serializable {
private static final Logger LOG = LoggerFactory.getLogger(HoodieClientTestHarness.class);

View File

@@ -28,16 +28,17 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.execution.LazyInsertIterable.HoodieInsertValueGenResult;
import org.apache.avro.generic.IndexedRecord;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.List;
import scala.Tuple2;
import static org.apache.hudi.execution.LazyInsertIterable.getTransformFunction;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -45,12 +46,12 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
private final String instantTime = HoodieActiveTimeline.createNewInstantTime();
@Before
@BeforeEach
public void setUp() throws Exception {
initTestDataGenerator();
}
@After
@AfterEach
public void tearDown() throws Exception {
cleanupTestDataGenerator();
}
@@ -73,7 +74,8 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
}
@Override
protected void finish() {}
protected void finish() {
}
@Override
protected Integer getResult() {
@@ -87,9 +89,9 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
int result = executor.execute();
// It should buffer and write 100 records
Assert.assertEquals(result, 100);
assertEquals(100, result);
// There should be no remaining records in the buffer
Assert.assertFalse(executor.isRemaining());
assertFalse(executor.isRemaining());
} finally {
if (executor != null) {
executor.shutdownNow();

View File

@@ -34,10 +34,10 @@ import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.execution.LazyInsertIterable.HoodieInsertValueGenResult;
import org.apache.avro.generic.IndexedRecord;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import java.util.ArrayList;
import java.util.HashMap;
@@ -54,6 +54,9 @@ import java.util.stream.IntStream;
import scala.Tuple2;
import static org.apache.hudi.execution.LazyInsertIterable.getTransformFunction;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -61,13 +64,13 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
private final String instantTime = HoodieActiveTimeline.createNewInstantTime();
@Before
@BeforeEach
public void setUp() throws Exception {
initTestDataGenerator();
initExecutorServiceWithFixedThreadPool(2);
}
@After
@AfterEach
public void tearDown() throws Exception {
cleanupTestDataGenerator();
cleanupExecutorService();
@@ -76,7 +79,8 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
// Test to ensure that we are reading all records from queue iterator in the same order
// without any exceptions.
@SuppressWarnings("unchecked")
@Test(timeout = 60000)
@Test
@Timeout(value = 60)
public void testRecordReading() throws Exception {
final int numRecords = 128;
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
@@ -96,15 +100,15 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
originalRecord.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA);
final HoodieInsertValueGenResult<HoodieRecord> payload = queue.iterator().next();
// Ensure that record ordering is guaranteed.
Assert.assertEquals(originalRecord, payload.record);
assertEquals(originalRecord, payload.record);
// cached insert value matches the expected insert value.
Assert.assertEquals(originalInsertValue,
assertEquals(originalInsertValue,
payload.record.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA));
recordsRead++;
}
Assert.assertFalse(queue.iterator().hasNext() || originalRecordIterator.hasNext());
assertFalse(queue.iterator().hasNext() || originalRecordIterator.hasNext());
// all the records should be read successfully.
Assert.assertEquals(numRecords, recordsRead);
assertEquals(numRecords, recordsRead);
// should not throw any exceptions.
resFuture.get();
}
@@ -113,7 +117,8 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
* Test to ensure that we are reading all records from queue iterator when we have multiple producers.
*/
@SuppressWarnings("unchecked")
@Test(timeout = 60000)
@Test
@Timeout(value = 60)
public void testCompositeProducerRecordReading() throws Exception {
final int numRecords = 1000;
final int numProducers = 40;
@@ -129,7 +134,7 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
List<HoodieRecord> pRecs = dataGen.generateInserts(instantTime, numRecords);
int j = 0;
for (HoodieRecord r : pRecs) {
Assert.assertTrue(!keyToProducerAndIndexMap.containsKey(r.getRecordKey()));
assertFalse(keyToProducerAndIndexMap.containsKey(r.getRecordKey()));
keyToProducerAndIndexMap.put(r.getRecordKey(), new Tuple2<>(i, j));
j++;
}
@@ -192,12 +197,12 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
countMap.put(producerPos._1(), countMap.get(producerPos._1()) + 1);
lastSeenMap.put(producerPos._1(), lastSeenPos + 1);
// Ensure we are seeing the next record generated
Assert.assertEquals(lastSeenPos + 1, producerPos._2().intValue());
assertEquals(lastSeenPos + 1, producerPos._2().intValue());
}
for (int i = 0; i < numProducers; i++) {
// Ensure we have seen all the records for each producers
Assert.assertEquals(Integer.valueOf(numRecords), countMap.get(i));
assertEquals(Integer.valueOf(numRecords), countMap.get(i));
}
// Ensure Close future is done
@@ -206,7 +211,8 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
// Test to ensure that record queueing is throttled when we hit memory limit.
@SuppressWarnings("unchecked")
@Test(timeout = 60000)
@Test
@Timeout(value = 60)
public void testMemoryLimitForBuffering() throws Exception {
final int numRecords = 128;
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
@@ -229,14 +235,14 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
while (!isQueueFull(queue.rateLimiter)) {
Thread.sleep(10);
}
Assert.assertEquals(0, queue.rateLimiter.availablePermits());
Assert.assertEquals(recordLimit, queue.currentRateLimit);
Assert.assertEquals(recordLimit, queue.size());
Assert.assertEquals(recordLimit - 1, queue.samplingRecordCounter.get());
assertEquals(0, queue.rateLimiter.availablePermits());
assertEquals(recordLimit, queue.currentRateLimit);
assertEquals(recordLimit, queue.size());
assertEquals(recordLimit - 1, queue.samplingRecordCounter.get());
// try to read 2 records.
Assert.assertEquals(hoodieRecords.get(0), queue.iterator().next().record);
Assert.assertEquals(hoodieRecords.get(1), queue.iterator().next().record);
assertEquals(hoodieRecords.get(0), queue.iterator().next().record);
assertEquals(hoodieRecords.get(1), queue.iterator().next().record);
// waiting for permits to expire.
while (!isQueueFull(queue.rateLimiter)) {
@@ -245,17 +251,18 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
// No change is expected in rate limit or number of queued records. We only expect
// queueing thread to read
// 2 more records into the queue.
Assert.assertEquals(0, queue.rateLimiter.availablePermits());
Assert.assertEquals(recordLimit, queue.currentRateLimit);
Assert.assertEquals(recordLimit, queue.size());
Assert.assertEquals(recordLimit - 1 + 2, queue.samplingRecordCounter.get());
assertEquals(0, queue.rateLimiter.availablePermits());
assertEquals(recordLimit, queue.currentRateLimit);
assertEquals(recordLimit, queue.size());
assertEquals(recordLimit - 1 + 2, queue.samplingRecordCounter.get());
}
// Test to ensure that exception in either queueing thread or BufferedIterator-reader thread
// is propagated to
// another thread.
@SuppressWarnings("unchecked")
@Test(timeout = 60000)
@Test
@Timeout(value = 60)
public void testException() throws Exception {
final int numRecords = 256;
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
@@ -285,13 +292,10 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
// notify queueing thread of an exception and ensure that it exits.
final Exception e = new Exception("Failing it :)");
queue1.markAsFailed(e);
try {
resFuture.get();
Assert.fail("exception is expected");
} catch (ExecutionException e1) {
Assert.assertEquals(HoodieException.class, e1.getCause().getClass());
Assert.assertEquals(e, e1.getCause().getCause());
}
final Throwable thrown1 = assertThrows(ExecutionException.class, resFuture::get,
"exception is expected");
assertEquals(HoodieException.class, thrown1.getCause().getClass());
assertEquals(e, thrown1.getCause().getCause());
// second let us raise an exception while doing record queueing. this exception should get
// propagated to
@@ -314,19 +318,14 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
return true;
});
try {
final Throwable thrown2 = assertThrows(Exception.class, () -> {
queue2.iterator().hasNext();
Assert.fail("exception is expected");
} catch (Exception e1) {
Assert.assertEquals(expectedException, e1.getCause());
}
}, "exception is expected");
assertEquals(expectedException, thrown2.getCause());
// queueing thread should also have exited. make sure that it is not running.
try {
res.get();
Assert.fail("exception is expected");
} catch (ExecutionException e2) {
Assert.assertEquals(expectedException, e2.getCause());
}
final Throwable thrown3 = assertThrows(ExecutionException.class, res::get,
"exception is expected");
assertEquals(expectedException, thrown3.getCause());
}
private boolean isQueueFull(Semaphore rateLimiter) {

View File

@@ -32,19 +32,20 @@ import org.apache.hudi.index.hbase.HBaseIndexQPSResourceAllocator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
private static String tableName = "test_table";
private static final String TABLE_NAME = "test_table";
private static final String QPS_TEST_SUFFIX_PATH = "qps_test_suffix";
private HBaseTestingUtility utility;
private Configuration hbaseConfig;
private static String QPS_TEST_SUFFIX_PATH = "qps_test_suffix";
@Before
@BeforeEach
public void setUp() throws Exception {
utility = new HBaseTestingUtility();
utility.startMiniCluster();
@@ -52,12 +53,12 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
initSparkContexts("TestQPSResourceAllocator");
initPath();
basePath = folder.getRoot().getAbsolutePath() + QPS_TEST_SUFFIX_PATH;
basePath = tempDir.resolve(QPS_TEST_SUFFIX_PATH).toAbsolutePath().toString();
// Initialize table
initMetaClient();
}
@After
@AfterEach
public void tearDown() throws Exception {
cleanupSparkContexts();
cleanupMetaClient();
@@ -71,9 +72,9 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
HoodieWriteConfig config = getConfig(Option.empty());
HBaseIndex index = new HBaseIndex(config);
HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config);
Assert.assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
DefaultHBaseQPSResourceAllocator.class.getName());
Assert.assertEquals(config.getHbaseIndexQPSFraction(),
assertEquals(config.getHbaseIndexQPSFraction(),
hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f);
}
@@ -82,9 +83,9 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
HoodieWriteConfig config = getConfig(Option.of(HoodieHBaseIndexConfig.DEFAULT_HBASE_INDEX_QPS_ALLOCATOR_CLASS));
HBaseIndex index = new HBaseIndex(config);
HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config);
Assert.assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
DefaultHBaseQPSResourceAllocator.class.getName());
Assert.assertEquals(config.getHbaseIndexQPSFraction(),
assertEquals(config.getHbaseIndexQPSFraction(),
hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f);
}
@@ -93,9 +94,9 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
HoodieWriteConfig config = getConfig(Option.of("InvalidResourceAllocatorClassName"));
HBaseIndex index = new HBaseIndex(config);
HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config);
Assert.assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
DefaultHBaseQPSResourceAllocator.class.getName());
Assert.assertEquals(config.getHbaseIndexQPSFraction(),
assertEquals(config.getHbaseIndexQPSFraction(),
hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f);
}
@@ -117,7 +118,7 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
private HoodieHBaseIndexConfig getConfigWithResourceAllocator(Option<String> resourceAllocatorClass) {
HoodieHBaseIndexConfig.Builder builder = new HoodieHBaseIndexConfig.Builder()
.hbaseZkPort(Integer.parseInt(hbaseConfig.get("hbase.zookeeper.property.clientPort")))
.hbaseZkQuorum(hbaseConfig.get("hbase.zookeeper.quorum")).hbaseTableName(tableName).hbaseIndexGetBatchSize(100);
.hbaseZkQuorum(hbaseConfig.get("hbase.zookeeper.quorum")).hbaseTableName(TABLE_NAME).hbaseIndexGetBatchSize(100);
if (resourceAllocatorClass.isPresent()) {
builder.withQPSResourceAllocatorType(resourceAllocatorClass.get());
}

View File

@@ -50,14 +50,13 @@ import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.spark.api.java.JavaRDD;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.FixMethodOrder;
import org.junit.Test;
import org.junit.runners.MethodSorters;
import org.mockito.Mockito;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.MethodOrderer;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestMethodOrder;
import java.util.ArrayList;
import java.util.Arrays;
@@ -65,12 +64,13 @@ import java.util.List;
import scala.Tuple2;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.atMost;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@@ -78,9 +78,9 @@ import static org.mockito.Mockito.when;
/**
* Note :: HBaseTestingUtility is really flaky with issues where the HbaseMiniCluster fails to shutdown across tests,
* (see one problem here : https://issues.apache .org/jira/browse/HBASE-15835). Hence, the need to use
* MethodSorters.NAME_ASCENDING to make sure the tests run in order. Please alter the order of tests running carefully.
* {@link MethodOrderer.Alphanumeric} to make sure the tests run in order. Please alter the order of tests running carefully.
*/
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
@TestMethodOrder(MethodOrderer.Alphanumeric.class)
public class TestHbaseIndex extends HoodieClientTestHarness {
private static HBaseTestingUtility utility;
@@ -89,14 +89,14 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
public TestHbaseIndex() {}
@AfterClass
@AfterAll
public static void clean() throws Exception {
if (utility != null) {
utility.shutdownMiniCluster();
}
}
@BeforeClass
@BeforeAll
public static void init() throws Exception {
// Initialize HbaseMiniCluster
hbaseConfig = HBaseConfiguration.create();
@@ -108,7 +108,7 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
utility.createTable(TableName.valueOf(tableName), Bytes.toBytes("_s"));
}
@Before
@BeforeEach
public void setUp() throws Exception {
// Initialize a local spark env
initSparkContexts("TestHbaseIndex");
@@ -120,7 +120,7 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
initMetaClient();
}
@After
@AfterEach
public void tearDown() throws Exception {
cleanupSparkContexts();
cleanupTestDataGenerator();
@@ -257,8 +257,8 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
HBaseIndex index = new HBaseIndex(config);
// Mock hbaseConnection and related entities
Connection hbaseConnection = Mockito.mock(Connection.class);
HTable table = Mockito.mock(HTable.class);
Connection hbaseConnection = mock(Connection.class);
HTable table = mock(HTable.class);
when(hbaseConnection.getTable(TableName.valueOf(tableName))).thenReturn(table);
when(table.get((List<Get>) any())).thenReturn(new Result[0]);
@@ -306,8 +306,8 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
writeClient.commit(newCommitTime, writeStatues);
// Mock hbaseConnection and related entities
Connection hbaseConnection = Mockito.mock(Connection.class);
HTable table = Mockito.mock(HTable.class);
Connection hbaseConnection = mock(Connection.class);
HTable table = mock(HTable.class);
when(hbaseConnection.getTable(TableName.valueOf(tableName))).thenReturn(table);
when(table.get((List<Get>) any())).thenReturn(new Result[0]);
@@ -335,28 +335,28 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
// 8 (batchSize) * 200 (parallelism) * 10 (maxReqsInOneSecond) * 10 (numRegionServers) * 0.1 (qpsFraction)) => 16000
// We assume requests get distributed to Region Servers uniformly, so each RS gets 1600 request
// 1600 happens to be 10% of 16667 (maxQPSPerRegionServer) as expected.
assertEquals(putBatchSize, 8);
assertEquals(8, putBatchSize);
// Number of Region Servers are halved, total requests sent in a second are also halved, so batchSize is also halved
int putBatchSize2 = batchSizeCalculator.getBatchSize(5, 16667, 1200, 200, 100, 0.1f);
assertEquals(putBatchSize2, 4);
assertEquals(4, putBatchSize2);
// If the parallelism is halved, batchSize has to double
int putBatchSize3 = batchSizeCalculator.getBatchSize(10, 16667, 1200, 100, 100, 0.1f);
assertEquals(putBatchSize3, 16);
assertEquals(16, putBatchSize3);
// If the parallelism is halved, batchSize has to double.
// This time parallelism is driven by numTasks rather than numExecutors
int putBatchSize4 = batchSizeCalculator.getBatchSize(10, 16667, 100, 200, 100, 0.1f);
assertEquals(putBatchSize4, 16);
assertEquals(16, putBatchSize4);
// If sleepTimeMs is halved, batchSize has to halve
int putBatchSize5 = batchSizeCalculator.getBatchSize(10, 16667, 1200, 200, 100, 0.05f);
assertEquals(putBatchSize5, 4);
assertEquals(4, putBatchSize5);
// If maxQPSPerRegionServer is doubled, batchSize also doubles
int putBatchSize6 = batchSizeCalculator.getBatchSize(10, 33334, 1200, 200, 100, 0.1f);
assertEquals(putBatchSize6, 16);
assertEquals(16, putBatchSize6);
}
@Test
@@ -494,19 +494,15 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
}
@Test
public void testFeatureSupport() throws Exception {
public void testFeatureSupport() {
HoodieWriteConfig config = getConfig();
HBaseIndex index = new HBaseIndex(config);
assertTrue(index.canIndexLogFiles());
try {
assertThrows(UnsupportedOperationException.class, () -> {
HoodieTable hoodieTable = HoodieTable.create(metaClient, config, jsc);
index.fetchRecordLocation(jsc.parallelize(new ArrayList<HoodieKey>(), 1), jsc, hoodieTable);
fail("HbaseIndex supports fetchRecordLocation");
} catch (UnsupportedOperationException ex) {
// Expected so ignore
ex.getStackTrace();
}
}, "HbaseIndex supports fetchRecordLocation");
}
private WriteStatus getSampleWriteStatus(final int numInserts, final int numUpdateWrites) {
@@ -521,7 +517,7 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
private void assertNoWriteErrors(List<WriteStatus> statuses) {
// Verify there are no errors
for (WriteStatus status : statuses) {
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
assertFalse(status.hasErrors(), "Errors found in write of " + status.getFileId());
}
}

View File

@@ -39,23 +39,28 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestHoodieIndex extends HoodieClientTestHarness {
@Before
private HoodieWriteConfig.Builder clientConfigBuilder;
private HoodieIndexConfig.Builder indexConfigBuilder;
@BeforeEach
public void setUp() throws Exception {
initSparkContexts("TestHoodieIndex");
initPath();
initMetaClient();
clientConfigBuilder = HoodieWriteConfig.newBuilder();
indexConfigBuilder = HoodieIndexConfig.newBuilder();
}
@After
@AfterEach
public void tearDown() {
cleanupSparkContexts();
cleanupMetaClient();
@@ -63,8 +68,6 @@ public class TestHoodieIndex extends HoodieClientTestHarness {
@Test
public void testCreateIndex() {
HoodieWriteConfig.Builder clientConfigBuilder = HoodieWriteConfig.newBuilder();
HoodieIndexConfig.Builder indexConfigBuilder = HoodieIndexConfig.newBuilder();
// Different types
HoodieWriteConfig config = clientConfigBuilder.withPath(basePath)
.withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.HBASE)
@@ -84,27 +87,27 @@ public class TestHoodieIndex extends HoodieClientTestHarness {
config = clientConfigBuilder.withPath(basePath)
.withIndexConfig(indexConfigBuilder.withIndexClass(DummyHoodieIndex.class.getName()).build()).build();
assertTrue(HoodieIndex.createIndex(config, jsc) instanceof DummyHoodieIndex);
}
config = clientConfigBuilder.withPath(basePath)
@Test
public void testCreateIndex_withException() {
final HoodieWriteConfig config1 = clientConfigBuilder.withPath(basePath)
.withIndexConfig(indexConfigBuilder.withIndexClass(IndexWithConstructor.class.getName()).build()).build();
try {
HoodieIndex.createIndex(config, jsc);
fail("exception is expected");
} catch (HoodieIndexException e) {
assertTrue(e.getMessage().contains("is not a subclass of HoodieIndex"));
}
final Throwable thrown1 = assertThrows(HoodieException.class, () -> {
HoodieIndex.createIndex(config1, jsc);
}, "exception is expected");
assertTrue(thrown1.getMessage().contains("is not a subclass of HoodieIndex"));
config = clientConfigBuilder.withPath(basePath)
.withIndexConfig(indexConfigBuilder.withIndexClass(IndexWithoutConstructor.class.getName()).build()).build();
try {
HoodieIndex.createIndex(config, jsc);
fail("exception is expected");
} catch (HoodieException e) {
assertTrue(e.getMessage().contains("Unable to instantiate class"));
}
final HoodieWriteConfig config2 = clientConfigBuilder.withPath(basePath)
.withIndexConfig(indexConfigBuilder.withIndexClass(IndexWithoutConstructor.class.getName()).build()).build();
final Throwable thrown2 = assertThrows(HoodieException.class, () -> {
HoodieIndex.createIndex(config2, jsc);
}, "exception is expected");
assertTrue(thrown2.getMessage().contains("Unable to instantiate class"));
}
public static class DummyHoodieIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
public DummyHoodieIndex(HoodieWriteConfig config) {
super(config);
}
@@ -146,7 +149,9 @@ public class TestHoodieIndex extends HoodieClientTestHarness {
}
public static class IndexWithConstructor {
public IndexWithConstructor(HoodieWriteConfig config) {}
public IndexWithConstructor(HoodieWriteConfig config) {
}
}
public static class IndexWithoutConstructor {

View File

@@ -41,17 +41,18 @@ import org.apache.avro.Schema;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@@ -59,40 +60,31 @@ import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import scala.Tuple2;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
@RunWith(Parameterized.class)
public class TestHoodieBloomIndex extends HoodieClientTestHarness {
private String schemaStr;
private Schema schema;
private boolean rangePruning;
private boolean treeFiltering;
private boolean bucketizedChecking;
private static final String TEST_NAME_WITH_PARAMS = "[{index}] Test with rangePruning={0}, treeFiltering={1}, bucketizedChecking={2}";
@Parameterized.Parameters(name = "{index}: Test with rangePruning={0}, treeFiltering ={1}, bucketizedChecking is:{2}")
public static Collection<Object[]> data() {
public static Stream<Arguments> configParams() {
Object[][] data =
new Object[][] {{true, true, true}, {false, true, true}, {true, true, false}, {true, false, true}};
return Arrays.asList(data);
return Stream.of(data).map(Arguments::of);
}
public TestHoodieBloomIndex(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) {
this.rangePruning = rangePruning;
this.treeFiltering = treeFiltering;
this.bucketizedChecking = bucketizedChecking;
}
@Before
@BeforeEach
public void setUp() throws Exception {
initSparkContexts("TestHoodieBloomIndex");
initPath();
@@ -103,14 +95,14 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
initMetaClient();
}
@After
@AfterEach
public void tearDown() throws Exception {
cleanupSparkContexts();
cleanupFileSystem();
cleanupMetaClient();
}
private HoodieWriteConfig makeConfig() {
private HoodieWriteConfig makeConfig(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) {
return HoodieWriteConfig.newBuilder().withPath(basePath)
.withIndexConfig(HoodieIndexConfig.newBuilder().bloomIndexPruneByRanges(rangePruning)
.bloomIndexTreebasedFilter(treeFiltering).bloomIndexBucketizedChecking(bucketizedChecking)
@@ -118,9 +110,10 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
.build();
}
@Test
public void testLoadInvolvedFiles() throws IOException {
HoodieWriteConfig config = makeConfig();
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testLoadInvolvedFiles(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws IOException {
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
HoodieBloomIndex index = new HoodieBloomIndex(config);
// Create some partitions, and put some files
@@ -128,9 +121,9 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
// "2016/04/01": 1 file (2_0_20160401010101.parquet)
// "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet,
// 4_0_20150312101010.parquet)
new File(basePath + "/2016/01/21").mkdirs();
new File(basePath + "/2016/04/01").mkdirs();
new File(basePath + "/2015/03/12").mkdirs();
Files.createDirectories(Paths.get(basePath, "2016", "01", "21"));
Files.createDirectories(Paths.get(basePath, "2016", "04", "01"));
Files.createDirectories(Paths.get(basePath, "2015", "03", "12"));
TestRawTripPayload rowChange1 =
new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
@@ -163,16 +156,16 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
List<Tuple2<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, jsc, table);
// Still 0, as no valid commit
assertEquals(filesList.size(), 0);
assertEquals(0, filesList.size());
// Add some commits
new File(basePath + "/.hoodie").mkdirs();
new File(basePath + "/.hoodie/20160401010101.commit").createNewFile();
new File(basePath + "/.hoodie/20150312101010.commit").createNewFile();
java.nio.file.Path hoodieDir = Files.createDirectories(Paths.get(basePath, ".hoodie"));
Files.createFile(hoodieDir.resolve("20160401010101.commit"));
Files.createFile(hoodieDir.resolve("20150312101010.commit"));
table = HoodieTable.create(metaClient, config, jsc);
filesList = index.loadInvolvedFiles(partitions, jsc, table);
assertEquals(filesList.size(), 4);
assertEquals(4, filesList.size());
if (rangePruning) {
// these files will not have the key ranges
@@ -194,9 +187,10 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
}
}
@Test
public void testRangePruning() {
HoodieWriteConfig config = makeConfig();
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testRangePruning(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) {
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
HoodieBloomIndex index = new HoodieBloomIndex(config);
final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>();
@@ -277,27 +271,27 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
// assertTrue(results.get(1)._2().equals(filename));
}
@Test
public void testTagLocationWithEmptyRDD() {
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testTagLocationWithEmptyRDD(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) {
// We have some records to be tagged (two different partitions)
JavaRDD<HoodieRecord> recordRDD = jsc.emptyRDD();
// Also create the metadata and config
HoodieWriteConfig config = makeConfig();
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
// Let's tag
HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config);
try {
assertDoesNotThrow(() -> {
bloomIndex.tagLocation(recordRDD, jsc, table);
} catch (IllegalArgumentException e) {
fail("EmptyRDD should not result in IllegalArgumentException: Positive number of slices required");
}
}, "EmptyRDD should not result in IllegalArgumentException: Positive number of slices required");
}
@Test
public void testTagLocation() throws Exception {
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testTagLocation(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws Exception {
// We have some records to be tagged (two different partitions)
String rowKey1 = UUID.randomUUID().toString();
String rowKey2 = UUID.randomUUID().toString();
@@ -322,7 +316,7 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record4));
// Also create the metadata and config
HoodieWriteConfig config = makeConfig();
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
@@ -365,8 +359,9 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
}
}
@Test
public void testCheckExists() throws Exception {
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testCheckExists(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws Exception {
// We have some records to be tagged (two different partitions)
String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
@@ -392,7 +387,7 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
JavaRDD<HoodieKey> keysRDD = jsc.parallelize(Arrays.asList(key1, key2, key3, key4));
// Also create the metadata and config
HoodieWriteConfig config = makeConfig();
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
@@ -437,8 +432,9 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
}
}
@Test
public void testBloomFilterFalseError() throws IOException, InterruptedException {
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testBloomFilterFalseError(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws IOException, InterruptedException {
// We have two hoodie records
String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
@@ -463,7 +459,7 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
// We do the tag
JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2));
HoodieWriteConfig config = makeConfig();
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable table = HoodieTable.create(metaClient, config, jsc);

View File

@@ -36,12 +36,14 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.avro.Schema;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -53,12 +55,12 @@ import java.util.stream.Collectors;
import scala.Tuple2;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
@@ -67,7 +69,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
public TestHoodieGlobalBloomIndex() {
}
@Before
@BeforeEach
public void setUp() throws Exception {
initSparkContexts("TestHoodieGlobalBloomIndex");
initPath();
@@ -77,7 +79,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
initMetaClient();
}
@After
@AfterEach
public void tearDown() {
cleanupSparkContexts();
cleanupMetaClient();
@@ -93,12 +95,12 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
// "2016/04/01": 1 file (2_0_20160401010101.parquet)
// "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet,
// 4_0_20150312101010.parquet)
new File(basePath + "/2016/01/21").mkdirs();
new File(basePath + "/2016/01/21/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
new File(basePath + "/2016/04/01").mkdirs();
new File(basePath + "/2016/04/01/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
new File(basePath + "/2015/03/12").mkdirs();
new File(basePath + "/2015/03/12/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
Path dir1 = Files.createDirectories(Paths.get(basePath, "2016", "01", "21"));
Files.createFile(dir1.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
Path dir2 = Files.createDirectories(Paths.get(basePath, "2016", "04", "01"));
Files.createFile(dir2.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
Path dir3 = Files.createDirectories(Paths.get(basePath, "2015", "03", "12"));
Files.createFile(dir3.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
TestRawTripPayload rowChange1 =
new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
@@ -133,16 +135,16 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
// partitions will NOT be respected by this loadInvolvedFiles(...) call
List<Tuple2<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, jsc, table);
// Still 0, as no valid commit
assertEquals(filesList.size(), 0);
assertEquals(0, filesList.size());
// Add some commits
new File(basePath + "/.hoodie").mkdirs();
new File(basePath + "/.hoodie/20160401010101.commit").createNewFile();
new File(basePath + "/.hoodie/20150312101010.commit").createNewFile();
Path hoodieDir = Files.createDirectories(Paths.get(basePath, ".hoodie"));
Files.createFile(hoodieDir.resolve("20160401010101.commit"));
Files.createFile(hoodieDir.resolve("20150312101010.commit"));
table = HoodieTable.create(metaClient, config, jsc);
filesList = index.loadInvolvedFiles(partitions, jsc, table);
assertEquals(filesList.size(), 4);
assertEquals(4, filesList.size());
Map<String, BloomIndexFileInfo> filesMap = toFileMap(filesList);
// key ranges checks
@@ -213,12 +215,12 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
// "2016/04/01": 1 file (2_0_20160401010101.parquet)
// "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet,
// 4_0_20150312101010.parquet)
new File(basePath + "/2016/01/21").mkdirs();
new File(basePath + "/2016/01/21/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
new File(basePath + "/2016/04/01").mkdirs();
new File(basePath + "/2016/04/01/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
new File(basePath + "/2015/03/12").mkdirs();
new File(basePath + "/2015/03/12/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
Path dir1 = Files.createDirectories(Paths.get(basePath, "2016", "01", "21"));
Files.createFile(dir1.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
Path dir2 = Files.createDirectories(Paths.get(basePath, "2016", "04", "01"));
Files.createFile(dir2.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
Path dir3 = Files.createDirectories(Paths.get(basePath, "2015", "03", "12"));
Files.createFile(dir3.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
TestRawTripPayload rowChange1 =
new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
@@ -262,7 +264,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
// Add some commits
new File(basePath + "/.hoodie").mkdirs();
Files.createDirectories(Paths.get(basePath, ".hoodie"));
// partitions will NOT be respected by this loadInvolvedFiles(...) call
JavaRDD<HoodieRecord> taggedRecordRDD = index.tagLocation(recordRDD, jsc, table);
@@ -305,8 +307,8 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
// Create the original partition, and put a record, along with the meta file
// "2016/01/31": 1 file (1_0_20160131101010.parquet)
new File(basePath + "/2016/01/31").mkdirs();
new File(basePath + "/2016/01/31/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
Path dir = Files.createDirectories(Paths.get(basePath, "2016", "01", "31"));
Files.createFile(dir.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
// this record will be saved in table and will be tagged to an empty record
TestRawTripPayload originalPayload =
@@ -347,7 +349,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
// Add some commits
new File(basePath + "/.hoodie").mkdirs();
Files.createDirectories(Paths.get(basePath, ".hoodie"));
// test against incoming record with a different partition
JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Collections.singletonList(incomingRecord));

View File

@@ -35,9 +35,9 @@ import org.apache.hudi.table.HoodieTimelineArchiveLog;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.Arrays;
@@ -47,16 +47,16 @@ import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
private Configuration hadoopConf;
private HoodieTableMetaClient metaClient;
@Before
@BeforeEach
public void init() throws Exception {
initDFS();
initPath();
@@ -67,7 +67,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
metaClient = HoodieTestUtils.init(hadoopConf, basePath);
}
@After
@AfterEach
public void clean() throws IOException {
cleanupDFS();
cleanupSparkContexts();
@@ -137,7 +137,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
HoodieTestUtils.createCleanFiles(metaClient, basePath, "100", dfs.getConf());
HoodieTestUtils.createCleanFiles(metaClient, basePath, "101", dfs.getConf());
@@ -151,7 +151,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList());
assertEquals("Loaded 6 commits and the count should match", 12, timeline.countInstants());
assertEquals(12, timeline.countInstants(), "Loaded 6 commits and the count should match");
// verify in-flight instants before archive
verifyInflightInstants(metaClient, 2);
@@ -168,42 +168,42 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
// Check compaction instants
List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem(
new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false);
assertEquals("Should delete all compaction instants < 104", 4, instants.size());
assertFalse("Requested Compaction must be absent for 100",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")));
assertFalse("Inflight Compaction must be absent for 100",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")));
assertFalse("Requested Compaction must be absent for 101",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")));
assertFalse("Inflight Compaction must be absent for 101",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")));
assertFalse("Requested Compaction must be absent for 102",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")));
assertFalse("Inflight Compaction must be absent for 102",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")));
assertFalse("Requested Compaction must be absent for 103",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")));
assertFalse("Inflight Compaction must be absent for 103",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")));
assertTrue("Requested Compaction must be present for 104",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104")));
assertTrue("Inflight Compaction must be present for 104",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "104")));
assertTrue("Requested Compaction must be present for 105",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "105")));
assertTrue("Inflight Compaction must be present for 105",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "105")));
assertEquals(4, instants.size(), "Should delete all compaction instants < 104");
assertFalse(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")),
"Requested Compaction must be absent for 100");
assertFalse(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")),
"Inflight Compaction must be absent for 100");
assertFalse(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")),
"Requested Compaction must be absent for 101");
assertFalse(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")),
"Inflight Compaction must be absent for 101");
assertFalse(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")),
"Requested Compaction must be absent for 102");
assertFalse(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")),
"Inflight Compaction must be absent for 102");
assertFalse(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")),
"Requested Compaction must be absent for 103");
assertFalse(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")),
"Inflight Compaction must be absent for 103");
assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104")),
"Requested Compaction must be present for 104");
assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "104")),
"Inflight Compaction must be present for 104");
assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "105")),
"Requested Compaction must be present for 105");
assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "105")),
"Inflight Compaction must be present for 105");
// read the file
HoodieArchivedTimeline archivedTimeline = new HoodieArchivedTimeline(metaClient);
assertEquals("Total archived records and total read records are the same count",
24, archivedTimeline.countInstants());
assertEquals(24, archivedTimeline.countInstants(),
"Total archived records and total read records are the same count");
//make sure the archived commits are the same as the (originalcommits - commitsleft)
Set<String> readCommits =
archivedTimeline.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
assertEquals("Read commits map should match the originalCommits - commitsLoadedFromArchival",
originalCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()), readCommits);
archivedTimeline.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
assertEquals(originalCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()), readCommits,
"Read commits map should match the originalCommits - commitsLoadedFromArchival");
// verify in-flight instants after archive
verifyInflightInstants(metaClient, 2);
@@ -247,31 +247,31 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
HoodieTestDataGenerator.createCommitFile(basePath, "103", dfs.getConf());
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
assertEquals("Loaded 4 commits and the count should match", 4, timeline.countInstants());
assertEquals(4, timeline.countInstants(), "Loaded 4 commits and the count should match");
boolean result = archiveLog.archiveIfRequired(jsc);
assertTrue(result);
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertEquals("Should not archive commits when maxCommitsToKeep is 5", 4, timeline.countInstants());
assertEquals(4, timeline.countInstants(), "Should not archive commits when maxCommitsToKeep is 5");
List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem(
new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false);
assertEquals("Should not delete any aux compaction files when maxCommitsToKeep is 5", 8, instants.size());
assertTrue("Requested Compaction must be present for 100",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")));
assertTrue("Inflight Compaction must be present for 100",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")));
assertTrue("Requested Compaction must be present for 101",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")));
assertTrue("Inflight Compaction must be present for 101",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")));
assertTrue("Requested Compaction must be present for 102",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")));
assertTrue("Inflight Compaction must be present for 102",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")));
assertTrue("Requested Compaction must be present for 103",
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")));
assertTrue("Inflight Compaction must be present for 103",
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")));
assertEquals(8, instants.size(), "Should not delete any aux compaction files when maxCommitsToKeep is 5");
assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")),
"Requested Compaction must be present for 100");
assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")),
"Inflight Compaction must be present for 100");
assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")),
"Requested Compaction must be present for 101");
assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")),
"Inflight Compaction must be present for 101");
assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")),
"Requested Compaction must be present for 102");
assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")),
"Inflight Compaction must be present for 102");
assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")),
"Requested Compaction must be present for 103");
assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")),
"Inflight Compaction must be present for 103");
}
@Test
@@ -290,14 +290,14 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
HoodieTestDataGenerator.createCommitFile(basePath, "105", dfs.getConf());
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
boolean result = archiveLog.archiveIfRequired(jsc);
assertTrue(result);
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("100"));
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("101"));
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("102"));
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("103"));
assertTrue(timeline.containsOrBeforeTimelineStarts("100"), "Archived commits should always be safe");
assertTrue(timeline.containsOrBeforeTimelineStarts("101"), "Archived commits should always be safe");
assertTrue(timeline.containsOrBeforeTimelineStarts("102"), "Archived commits should always be safe");
assertTrue(timeline.containsOrBeforeTimelineStarts("103"), "Archived commits should always be safe");
}
@Test
@@ -317,19 +317,18 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
HoodieTestDataGenerator.createCommitFile(basePath, "105", dfs.getConf());
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
boolean result = archiveLog.archiveIfRequired(jsc);
assertTrue(result);
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertEquals(
"Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)", 5,
timeline.countInstants());
assertTrue("Archived commits should always be safe",
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101")));
assertTrue("Archived commits should always be safe",
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")));
assertTrue("Archived commits should always be safe",
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")));
assertEquals(5, timeline.countInstants(),
"Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)");
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101")),
"Archived commits should always be safe");
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")),
"Archived commits should always be safe");
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")),
"Archived commits should always be safe");
}
@Test
@@ -354,28 +353,29 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
HoodieTestDataGenerator.createCommitFile(basePath, "107", dfs.getConf());
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsAndCompactionTimeline();
assertEquals("Loaded 6 commits and the count should match", 8, timeline.countInstants());
assertEquals(8, timeline.countInstants(), "Loaded 6 commits and the count should match");
boolean result = archiveLog.archiveIfRequired(jsc);
assertTrue(result);
timeline = metaClient.getActiveTimeline().reload().getCommitsAndCompactionTimeline();
assertFalse("Instants before oldest pending compaction can be removed",
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "100")));
assertEquals("Since we have a pending compaction at 101, we should never archive any commit "
+ "after 101 (we only archive 100)", 7, timeline.countInstants());
assertTrue("Requested Compaction must still be present",
timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")));
assertTrue("Instants greater than oldest pending compaction must be present",
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")));
assertTrue("Instants greater than oldest pending compaction must be present",
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")));
assertTrue("Instants greater than oldest pending compaction must be present",
timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104")));
assertTrue("Instants greater than oldest pending compaction must be present",
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "105")));
assertTrue("Instants greater than oldest pending compaction must be present",
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "106")));
assertTrue("Instants greater than oldest pending compaction must be present",
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "107")));
assertFalse(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "100")),
"Instants before oldest pending compaction can be removed");
assertEquals(7, timeline.countInstants(),
"Since we have a pending compaction at 101, we should never archive any commit "
+ "after 101 (we only archive 100)");
assertTrue(timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")),
"Requested Compaction must still be present");
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")),
"Instants greater than oldest pending compaction must be present");
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")),
"Instants greater than oldest pending compaction must be present");
assertTrue(timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104")),
"Instants greater than oldest pending compaction must be present");
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "105")),
"Instants greater than oldest pending compaction must be present");
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "106")),
"Instants greater than oldest pending compaction must be present");
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "107")),
"Instants greater than oldest pending compaction must be present");
}
@Test
@@ -412,8 +412,8 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
private void verifyInflightInstants(HoodieTableMetaClient metaClient, int expectedTotalInstants) {
HoodieTimeline timeline = metaClient.getActiveTimeline().reload()
.getTimelineOfActions(Collections.singleton(HoodieTimeline.CLEAN_ACTION)).filterInflights();
assertEquals("Loaded inflight clean actions and the count should match", expectedTotalInstants,
timeline.countInstants());
assertEquals(expectedTotalInstants, timeline.countInstants(),
"Loaded inflight clean actions and the count should match");
}
@Test

View File

@@ -39,22 +39,23 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
@SuppressWarnings("unchecked")
public class TestHoodieMergeHandle extends HoodieClientTestHarness {
@Before
@BeforeEach
public void setUp() throws Exception {
initSparkContexts("TestHoodieMergeHandle");
initPath();
@@ -63,7 +64,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
initMetaClient();
}
@After
@AfterEach
public void tearDown() throws Exception {
cleanupFileSystem();
cleanupTestDataGenerator();
@@ -110,11 +111,12 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// verify that there is a commit
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting a single commit.", 1,
timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
Assert.assertEquals("Latest commit should be 001", newCommitTime, timeline.lastInstant().get().getTimestamp());
assertEquals("Must contain 44 records", records.size(),
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
assertEquals(1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(),
"Expecting a single commit.");
assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(), "Latest commit should be 001");
assertEquals(records.size(),
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
"Must contain 44 records");
/**
* Write 2 (insert) This will do a bulk insert of 1 record with the same row_key as record1 in the previous insert
@@ -135,10 +137,10 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// verify that there are 2 commits
metaClient = HoodieTableMetaClient.reload(metaClient);
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting two commits.", 2, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
Assert.assertEquals("Latest commit should be 002", newCommitTime, timeline.lastInstant().get().getTimestamp());
assertEquals(2, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), "Expecting two commits.");
assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(), "Latest commit should be 002");
Dataset<Row> dataSet = getRecords();
assertEquals("Must contain 45 records", 45, dataSet.count());
assertEquals(45, dataSet.count(), "Must contain 45 records");
/**
* Write 3 (insert) This will bulk insert 2 new completely new records. At this point, we will have 2 files with
@@ -155,10 +157,10 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// verify that there are now 3 commits
metaClient = HoodieTableMetaClient.reload(metaClient);
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting three commits.", 3, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
Assert.assertEquals("Latest commit should be 003", newCommitTime, timeline.lastInstant().get().getTimestamp());
assertEquals(3, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), "Expecting three commits.");
assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(), "Latest commit should be 003");
dataSet = getRecords();
assertEquals("Must contain 47 records", 47, dataSet.count());
assertEquals(47, dataSet.count(), "Must contain 47 records");
/**
* Write 4 (updates) This will generate 2 upsert records with id1 and id2. The rider and driver names in the
@@ -185,12 +187,12 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// verify there are now 4 commits
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting four commits.", 4, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
Assert.assertEquals("Latest commit should be 004", timeline.lastInstant().get().getTimestamp(), newCommitTime);
assertEquals(4, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), "Expecting four commits.");
assertEquals(timeline.lastInstant().get().getTimestamp(), newCommitTime, "Latest commit should be 004");
// Check the entire dataset has 47 records still
dataSet = getRecords();
assertEquals("Must contain 47 records", 47, dataSet.count());
assertEquals(47, dataSet.count(), "Must contain 47 records");
Row[] rows = (Row[]) dataSet.collect();
int record1Count = 0;
int record2Count = 0;
@@ -233,19 +235,18 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
List<WriteStatus> statuses = writeClient.insert(recordsRDD, newCommitTime).collect();
// All records should be inserts into new parquet
Assert.assertTrue(statuses.stream()
assertTrue(statuses.stream()
.filter(status -> status.getStat().getPrevCommit() != HoodieWriteStat.NULL_COMMIT).count() > 0);
// Num writes should be equal to the number of records inserted
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 100);
assertEquals(100,
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get());
// Num update writes should be equal to the number of records updated
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(),
0);
assertEquals(0,
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get());
// Num update writes should be equal to the number of insert records converted to updates as part of small file
// handling
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 100);
assertEquals(100,
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get());
// Update all the 100 records
metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -258,20 +259,18 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
statuses = writeClient.upsert(updatedRecordsRDD, newCommitTime).collect();
// All records should be upserts into existing parquet
Assert.assertEquals(
statuses.stream().filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count(),
0);
assertEquals(0,
statuses.stream().filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count());
// Num writes should be equal to the number of records inserted
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 100);
assertEquals(100,
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get());
// Num update writes should be equal to the number of records updated
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(),
100);
assertEquals(100,
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get());
// Num update writes should be equal to the number of insert records converted to updates as part of small file
// handling
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 0);
assertEquals(0,
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get());
newCommitTime = "102";
writeClient.startCommitWithTime(newCommitTime);
@@ -282,24 +281,23 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
statuses = writeClient.upsert(allRecordsRDD, newCommitTime).collect();
// All records should be upserts into existing parquet (with inserts as updates small file handled)
Assert.assertEquals((long) statuses.stream()
.filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count(), 0);
assertEquals(0, (long) statuses.stream()
.filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count());
// Num writes should be equal to the total number of records written
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 200);
assertEquals(200,
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get());
// Num update writes should be equal to the number of records updated (including inserts converted as updates)
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(),
100);
assertEquals(100,
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get());
// Num update writes should be equal to the number of insert records converted to updates as part of small file
// handling
Assert.assertEquals(
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 100);
assertEquals(100,
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get());
// Verify all records have location set
statuses.forEach(writeStatus -> {
writeStatus.getWrittenRecords().forEach(r -> {
// Ensure New Location is set
Assert.assertTrue(r.getNewLocation().isPresent());
assertTrue(r.getNewLocation().isPresent());
});
});
}
@@ -309,7 +307,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
// Check the entire dataset has 8 records still
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
fullPartitionPaths[i] = Paths.get(basePath, dataGen.getPartitionPaths()[i], "*").toString();
}
Dataset<Row> dataSet = HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths);
return dataSet;
@@ -323,7 +321,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
void assertNoWriteErrors(List<WriteStatus> statuses) {
// Verify there are no errors
for (WriteStatus status : statuses) {
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
assertFalse(status.hasErrors(), "Errors found in write of " + status.getFileId());
}
}

View File

@@ -26,12 +26,12 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path;
import org.junit.Assert;
import org.junit.Test;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import static org.junit.Assert.fail;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
* Tests for {@link HoodieStorageWriterFactory}.
@@ -48,17 +48,14 @@ public class TestHoodieStorageWriterFactory extends TestHoodieClientBase {
SparkTaskContextSupplier supplier = new SparkTaskContextSupplier();
HoodieStorageWriter<IndexedRecord> parquetWriter = HoodieStorageWriterFactory.getStorageWriter(instantTime,
parquetPath, table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA, supplier);
Assert.assertTrue(parquetWriter instanceof HoodieParquetWriter);
assertTrue(parquetWriter instanceof HoodieParquetWriter);
// other file format exception.
final Path logPath = new Path(basePath + "/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1");
try {
final Throwable thrown = assertThrows(UnsupportedOperationException.class, () -> {
HoodieStorageWriter<IndexedRecord> logWriter = HoodieStorageWriterFactory.getStorageWriter(instantTime, logPath,
table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA, supplier);
fail("should fail since log storage writer is not supported yet.");
} catch (Exception e) {
Assert.assertTrue(e instanceof UnsupportedOperationException);
Assert.assertTrue(e.getMessage().contains("format not supported yet."));
}
}, "should fail since log storage writer is not supported yet.");
assertTrue(thrown.getMessage().contains("format not supported yet."));
}
}

View File

@@ -60,8 +60,7 @@ import org.apache.hadoop.fs.RemoteIterator;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
import org.junit.Assert;
import org.junit.Test;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
@@ -82,9 +81,9 @@ import java.util.stream.Stream;
import scala.Tuple3;
import static org.apache.hudi.common.model.HoodieTestUtils.DEFAULT_PARTITION_PATHS;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
* Test Cleaning related logic.
@@ -123,15 +122,16 @@ public class TestCleaner extends TestHoodieClientBase {
// verify that there is a commit
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
assertEquals("Expecting a single commit.", 1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
assertEquals(1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), "Expecting a single commit.");
// Should have 100 records in table (check using Index), all in locations marked at commit
HoodieTable table = HoodieTable.create(metaClient, client.getConfig(), jsc);
assertFalse(table.getCompletedCommitsTimeline().empty());
String instantTime = table.getCompletedCommitsTimeline().getInstants().findFirst().get().getTimestamp();
assertFalse(table.getCompletedCleanTimeline().empty());
assertEquals("The clean instant should be the same as the commit instant", instantTime,
table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp());
assertEquals(instantTime,
table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp(),
"The clean instant should be the same as the commit instant");
HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
List<HoodieRecord> taggedRecords = index.tagLocation(jsc.parallelize(records, 1), jsc, table).collect();
@@ -272,22 +272,22 @@ public class TestCleaner extends TestHoodieClientBase {
return compactionFileIdToLatestFileSlice.get(fileGroup.getFileGroupId()).getBaseInstantTime()
.equals(df.getCommitTime());
}).findAny());
Assert.assertTrue("Data File selected for compaction is retained",
dataFileForCompactionPresent.isPresent());
assertTrue(dataFileForCompactionPresent.isPresent(),
"Data File selected for compaction is retained");
} else {
// file has no more than max versions
String fileId = fileGroup.getFileGroupId().getFileId();
List<HoodieBaseFile> dataFiles = fileGroup.getAllBaseFiles().collect(Collectors.toList());
assertTrue("fileId " + fileId + " has more than " + maxVersions + " versions",
dataFiles.size() <= maxVersions);
assertTrue(dataFiles.size() <= maxVersions,
"fileId " + fileId + " has more than " + maxVersions + " versions");
// Each file, has the latest N versions (i.e cleaning gets rid of older versions)
List<String> commitedVersions = new ArrayList<>(fileIdToVersions.get(fileId));
for (int i = 0; i < dataFiles.size(); i++) {
assertEquals("File " + fileId + " does not have latest versions on commits" + commitedVersions,
(dataFiles.get(i)).getCommitTime(),
commitedVersions.get(commitedVersions.size() - 1 - i));
assertEquals((dataFiles.get(i)).getCommitTime(),
commitedVersions.get(commitedVersions.size() - 1 - i),
"File " + fileId + " does not have latest versions on commits" + commitedVersions);
}
}
}
@@ -395,8 +395,8 @@ public class TestCleaner extends TestHoodieClientBase {
LOG.debug("Data File - " + value);
commitTimes.add(value.getCommitTime());
});
assertEquals("Only contain acceptable versions of file should be present",
acceptableCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()), commitTimes);
assertEquals(acceptableCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()), commitTimes,
"Only contain acceptable versions of file should be present");
}
}
} catch (IOException ioe) {
@@ -432,17 +432,17 @@ public class TestCleaner extends TestHoodieClientBase {
HoodieInstant completedCleanInstant = new HoodieInstant(State.COMPLETED, HoodieTimeline.CLEAN_ACTION, cleanInstantTs);
metaClient.reloadActiveTimeline().revertToInflight(completedCleanInstant);
HoodieCleanMetadata cleanMetadata2 = writeClient.clean(getNextInstant());
Assert.assertEquals(cleanMetadata1.getEarliestCommitToRetain(), cleanMetadata2.getEarliestCommitToRetain());
Assert.assertEquals(new Integer(0), cleanMetadata2.getTotalFilesDeleted());
Assert.assertEquals(cleanMetadata1.getPartitionMetadata().keySet(), cleanMetadata2.getPartitionMetadata().keySet());
assertEquals(cleanMetadata1.getEarliestCommitToRetain(), cleanMetadata2.getEarliestCommitToRetain());
assertEquals(new Integer(0), cleanMetadata2.getTotalFilesDeleted());
assertEquals(cleanMetadata1.getPartitionMetadata().keySet(), cleanMetadata2.getPartitionMetadata().keySet());
final HoodieCleanMetadata retriedCleanMetadata = CleanerUtils.getCleanerMetadata(HoodieTableMetaClient.reload(metaClient), completedCleanInstant);
cleanMetadata1.getPartitionMetadata().keySet().forEach(k -> {
HoodieCleanPartitionMetadata p1 = cleanMetadata1.getPartitionMetadata().get(k);
HoodieCleanPartitionMetadata p2 = retriedCleanMetadata.getPartitionMetadata().get(k);
Assert.assertEquals(p1.getDeletePathPatterns(), p2.getDeletePathPatterns());
Assert.assertEquals(p1.getSuccessDeleteFiles(), p2.getFailedDeleteFiles());
Assert.assertEquals(p1.getPartitionPath(), p2.getPartitionPath());
Assert.assertEquals(k, p1.getPartitionPath());
assertEquals(p1.getDeletePathPatterns(), p2.getDeletePathPatterns());
assertEquals(p1.getSuccessDeleteFiles(), p2.getFailedDeleteFiles());
assertEquals(p1.getPartitionPath(), p2.getPartitionPath());
assertEquals(k, p1.getPartitionPath());
});
}
@@ -478,12 +478,12 @@ public class TestCleaner extends TestHoodieClientBase {
metaClient = HoodieTableMetaClient.reload(metaClient);
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config);
assertEquals("Must not clean any files", 0,
assertEquals(0,
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertEquals("Must not clean any files", 0,
.size(), "Must not clean any files");
assertEquals(0,
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size());
.size(), "Must not clean any files");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
@@ -501,12 +501,12 @@ public class TestCleaner extends TestHoodieClientBase {
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config);
assertEquals("Must clean 1 file", 1,
assertEquals(1,
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertEquals("Must clean 1 file", 1,
.size(), "Must clean 1 file");
assertEquals(1,
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size());
.size(), "Must clean 1 file");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file2P0C1));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001",
@@ -526,9 +526,9 @@ public class TestCleaner extends TestHoodieClientBase {
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config);
assertEquals("Must clean two files", 2,
assertEquals(2,
getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
.getSuccessDeleteFiles().size());
.getSuccessDeleteFiles().size(), "Must clean two files");
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file1P0C0));
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
@@ -539,9 +539,9 @@ public class TestCleaner extends TestHoodieClientBase {
// No cleaning on partially written file, with no commit.
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file3P0C2); // update
List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config);
assertEquals("Must not clean any files", 0,
assertEquals(0,
getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size());
.size(), "Must not clean any files");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
file3P0C2));
}
@@ -578,9 +578,9 @@ public class TestCleaner extends TestHoodieClientBase {
HoodieTestUtils.createCompactionCommitFiles(fs, basePath, "001");
List<HoodieCleanStat> hoodieCleanStats = runCleaner(config);
assertEquals("Must clean three files, one parquet and 2 log files", 3,
assertEquals(3,
getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size());
.size(), "Must clean three files, one parquet and 2 log files");
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0));
assertFalse(HoodieTestUtils.doesLogFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
@@ -646,37 +646,37 @@ public class TestCleaner extends TestHoodieClientBase {
CleanMetadataMigrator migrator = new CleanMetadataMigrator(metaClient);
HoodieCleanMetadata oldMetadata =
migrator.migrateToVersion(metadata, metadata.getVersion(), CleanerUtils.CLEAN_METADATA_VERSION_1);
Assert.assertEquals(CleanerUtils.CLEAN_METADATA_VERSION_1, oldMetadata.getVersion());
assertEquals(CleanerUtils.CLEAN_METADATA_VERSION_1, oldMetadata.getVersion());
testCleanMetadataEquality(metadata, oldMetadata);
testCleanMetadataPathEquality(oldMetadata, oldExpected);
HoodieCleanMetadata newMetadata = migrator.upgradeToLatest(oldMetadata, oldMetadata.getVersion());
Assert.assertEquals(CleanerUtils.LATEST_CLEAN_METADATA_VERSION, newMetadata.getVersion());
assertEquals(CleanerUtils.LATEST_CLEAN_METADATA_VERSION, newMetadata.getVersion());
testCleanMetadataEquality(oldMetadata, newMetadata);
testCleanMetadataPathEquality(newMetadata, newExpected);
testCleanMetadataPathEquality(oldMetadata, oldExpected);
}
public void testCleanMetadataEquality(HoodieCleanMetadata input1, HoodieCleanMetadata input2) {
Assert.assertEquals(input1.getEarliestCommitToRetain(), input2.getEarliestCommitToRetain());
Assert.assertEquals(input1.getStartCleanTime(), input2.getStartCleanTime());
Assert.assertEquals(input1.getTimeTakenInMillis(), input2.getTimeTakenInMillis());
Assert.assertEquals(input1.getTotalFilesDeleted(), input2.getTotalFilesDeleted());
assertEquals(input1.getEarliestCommitToRetain(), input2.getEarliestCommitToRetain());
assertEquals(input1.getStartCleanTime(), input2.getStartCleanTime());
assertEquals(input1.getTimeTakenInMillis(), input2.getTimeTakenInMillis());
assertEquals(input1.getTotalFilesDeleted(), input2.getTotalFilesDeleted());
Map<String, HoodieCleanPartitionMetadata> map1 = input1.getPartitionMetadata();
Map<String, HoodieCleanPartitionMetadata> map2 = input2.getPartitionMetadata();
Assert.assertEquals(map1.keySet(), map2.keySet());
assertEquals(map1.keySet(), map2.keySet());
List<String> partitions1 = map1.values().stream().map(HoodieCleanPartitionMetadata::getPartitionPath).collect(
Collectors.toList());
List<String> partitions2 = map2.values().stream().map(HoodieCleanPartitionMetadata::getPartitionPath).collect(
Collectors.toList());
Assert.assertEquals(partitions1, partitions2);
assertEquals(partitions1, partitions2);
List<String> policies1 = map1.values().stream().map(HoodieCleanPartitionMetadata::getPolicy).collect(Collectors.toList());
List<String> policies2 = map2.values().stream().map(HoodieCleanPartitionMetadata::getPolicy).collect(Collectors.toList());
Assert.assertEquals(policies1, policies2);
assertEquals(policies1, policies2);
}
private void testCleanMetadataPathEquality(HoodieCleanMetadata metadata, Map<String, Tuple3> expected) {
@@ -687,9 +687,9 @@ public class TestCleaner extends TestHoodieClientBase {
String partitionPath = entry.getKey();
HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
Assert.assertEquals(expected.get(partitionPath)._1(), partitionMetadata.getDeletePathPatterns());
Assert.assertEquals(expected.get(partitionPath)._2(), partitionMetadata.getSuccessDeleteFiles());
Assert.assertEquals(expected.get(partitionPath)._3(), partitionMetadata.getFailedDeleteFiles());
assertEquals(expected.get(partitionPath)._1(), partitionMetadata.getDeletePathPatterns());
assertEquals(expected.get(partitionPath)._2(), partitionMetadata.getSuccessDeleteFiles());
assertEquals(expected.get(partitionPath)._3(), partitionMetadata.getFailedDeleteFiles());
}
}
@@ -751,12 +751,12 @@ public class TestCleaner extends TestHoodieClientBase {
metaClient = HoodieTableMetaClient.reload(metaClient);
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry);
assertEquals("Must not clean any files", 0,
assertEquals(0,
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertEquals("Must not clean any files", 0,
.size(), "Must not clean any files");
assertEquals(0,
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size());
.size(), "Must not clean any files");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
@@ -786,12 +786,12 @@ public class TestCleaner extends TestHoodieClientBase {
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "001"),
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry);
assertEquals("Must not clean any files", 0,
assertEquals(0,
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size());
assertEquals("Must not clean any files", 0,
.size(), "Must not clean any files");
assertEquals(0,
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
.size());
.size(), "Must not clean any files");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file2P0C1));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001",
@@ -820,9 +820,10 @@ public class TestCleaner extends TestHoodieClientBase {
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config, simulateFailureRetry);
assertEquals("Must not clean any file. We have to keep 1 version before the latest commit time to keep", 0,
assertEquals(0,
getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
.getSuccessDeleteFiles().size());
.getSuccessDeleteFiles().size(),
"Must not clean any file. We have to keep 1 version before the latest commit time to keep");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0));
@@ -844,9 +845,9 @@ public class TestCleaner extends TestHoodieClientBase {
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config, simulateFailureRetry);
assertEquals("Must not clean one old file", 1,
assertEquals(1,
getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
.size());
.size(), "Must not clean one old file");
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
file1P0C0));
@@ -867,7 +868,7 @@ public class TestCleaner extends TestHoodieClientBase {
HoodieTestUtils
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "004", file3P0C2); // update
commitMetadata = generateCommitMetadata(CollectionUtils.createImmutableMap(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
CollectionUtils.createImmutableList(file3P0C2)));
CollectionUtils.createImmutableList(file3P0C2)));
metaClient.getActiveTimeline().createNewInstant(
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "004"));
metaClient.getActiveTimeline().transitionRequestedToInflight(
@@ -875,8 +876,8 @@ public class TestCleaner extends TestHoodieClientBase {
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
List<HoodieCleanStat> hoodieCleanStatsFive = runCleaner(config, simulateFailureRetry);
HoodieCleanStat cleanStat = getCleanStat(hoodieCleanStatsFive, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
assertEquals("Must not clean any files", 0,
cleanStat != null ? cleanStat.getSuccessDeleteFiles().size() : 0);
assertEquals(0,
cleanStat != null ? cleanStat.getSuccessDeleteFiles().size() : 0, "Must not clean any files");
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
file1P0C0));
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
@@ -889,8 +890,8 @@ public class TestCleaner extends TestHoodieClientBase {
@Test
public void testCleanMarkerDataFilesOnRollback() throws IOException {
List<String> markerFiles = createMarkerFiles("000", 10);
assertEquals("Some marker files are created.", 10, markerFiles.size());
assertEquals("Some marker files are created.", markerFiles.size(), getTotalTempFiles());
assertEquals(10, markerFiles.size(), "Some marker files are created.");
assertEquals(markerFiles.size(), getTotalTempFiles(), "Some marker files are created.");
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -901,7 +902,7 @@ public class TestCleaner extends TestHoodieClientBase {
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "000"), Option.empty());
metaClient.reloadActiveTimeline();
table.rollback(jsc, "001", new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "000"), true);
assertEquals("All temp files are deleted.", 0, getTotalTempFiles());
assertEquals(0, getTotalTempFiles(), "All temp files are deleted.");
}
/**
@@ -922,7 +923,7 @@ public class TestCleaner extends TestHoodieClientBase {
metaClient = HoodieTableMetaClient.reload(metaClient);
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config);
assertTrue("HoodieCleanStats should be empty for a table with empty partitionPaths", hoodieCleanStatsOne.isEmpty());
assertTrue(hoodieCleanStatsOne.isEmpty(), "HoodieCleanStats should be empty for a table with empty partitionPaths");
}
/**
@@ -998,7 +999,7 @@ public class TestCleaner extends TestHoodieClientBase {
metaClient = HoodieTableMetaClient.reload(metaClient);
List<HoodieCleanStat> cleanStats = runCleaner(config);
assertEquals("Must not clean any files", 0, cleanStats.size());
assertEquals(0, cleanStats.size(), "Must not clean any files");
}
/**
@@ -1088,11 +1089,11 @@ public class TestCleaner extends TestHoodieClientBase {
String baseInstantForCompaction = fileIdToLatestInstantBeforeCompaction.get(fileId);
Option<FileSlice> fileSliceForCompaction = Option.fromJavaOptional(hoodieTable.getSliceView()
.getLatestFileSlicesBeforeOrOn(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, baseInstantForCompaction,
true)
true)
.filter(fs -> fs.getFileId().equals(fileId)).findFirst());
Assert.assertTrue("Base Instant for Compaction must be preserved", fileSliceForCompaction.isPresent());
Assert.assertTrue("FileSlice has data-file", fileSliceForCompaction.get().getBaseFile().isPresent());
Assert.assertEquals("FileSlice has log-files", 2, fileSliceForCompaction.get().getLogFiles().count());
assertTrue(fileSliceForCompaction.isPresent(), "Base Instant for Compaction must be preserved");
assertTrue(fileSliceForCompaction.get().getBaseFile().isPresent(), "FileSlice has data-file");
assertEquals(2, fileSliceForCompaction.get().getLogFiles().count(), "FileSlice has log-files");
});
// Test for progress (Did we clean some files ?)
@@ -1100,10 +1101,10 @@ public class TestCleaner extends TestHoodieClientBase {
.flatMap(cleanStat -> convertPathToFileIdWithCommitTime(newMetaClient, cleanStat.getDeletePathPatterns())
.map(fileIdWithCommitTime -> {
if (expFileIdToPendingCompaction.containsKey(fileIdWithCommitTime.getKey())) {
Assert.assertTrue("Deleted instant time must be less than pending compaction",
HoodieTimeline.compareTimestamps(
fileIdToLatestInstantBeforeCompaction.get(fileIdWithCommitTime.getKey()),
fileIdWithCommitTime.getValue(), HoodieTimeline.GREATER));
assertTrue(HoodieTimeline.compareTimestamps(
fileIdToLatestInstantBeforeCompaction.get(fileIdWithCommitTime.getKey()),
fileIdWithCommitTime.getValue(), HoodieTimeline.GREATER),
"Deleted instant time must be less than pending compaction");
return true;
}
return false;
@@ -1111,9 +1112,9 @@ public class TestCleaner extends TestHoodieClientBase {
long numDeleted =
hoodieCleanStats.stream().mapToLong(cleanStat -> cleanStat.getDeletePathPatterns().size()).sum();
// Tighter check for regression
Assert.assertEquals("Correct number of files deleted", expNumFilesDeleted, numDeleted);
Assert.assertEquals("Correct number of files under compaction deleted", expNumFilesUnderCompactionDeleted,
numFilesUnderCompactionDeleted);
assertEquals(expNumFilesDeleted, numDeleted, "Correct number of files deleted");
assertEquals(expNumFilesUnderCompactionDeleted, numFilesUnderCompactionDeleted,
"Correct number of files under compaction deleted");
}
/**

View File

@@ -25,22 +25,24 @@ import org.apache.hudi.common.fs.ConsistencyGuardConfig;
import org.apache.hudi.common.fs.FailSafeConsistencyGuard;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.util.concurrent.TimeoutException;
import static org.junit.jupiter.api.Assertions.assertThrows;
public class TestConsistencyGuard extends HoodieClientTestHarness {
@Before
@BeforeEach
public void setup() {
initPath();
initFileSystemWithDefaultConfiguration();
}
@After
@AfterEach
public void tearDown() throws Exception {
cleanupFileSystem();
}
@@ -65,35 +67,43 @@ public class TestConsistencyGuard extends HoodieClientTestHarness {
.asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-1_000.parquet"));
}
@Test(expected = TimeoutException.class)
@Test
public void testCheckFailingAppear() throws Exception {
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
passing.waitTillAllFilesAppear(basePath + "/partition/path", Arrays
.asList(basePath + "/partition/path/f1_1-0-2_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet"));
assertThrows(TimeoutException.class, () -> {
passing.waitTillAllFilesAppear(basePath + "/partition/path", Arrays
.asList(basePath + "/partition/path/f1_1-0-2_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet"));
});
}
@Test(expected = TimeoutException.class)
@Test
public void testCheckFailingAppears() throws Exception {
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
passing.waitTillFileAppears(new Path(basePath + "/partition/path/f1_1-0-2_000.parquet"));
assertThrows(TimeoutException.class, () -> {
passing.waitTillFileAppears(new Path(basePath + "/partition/path/f1_1-0-2_000.parquet"));
});
}
@Test(expected = TimeoutException.class)
@Test
public void testCheckFailingDisappear() throws Exception {
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
passing.waitTillAllFilesDisappear(basePath + "/partition/path", Arrays
.asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet"));
assertThrows(TimeoutException.class, () -> {
passing.waitTillAllFilesDisappear(basePath + "/partition/path", Arrays
.asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet"));
});
}
@Test(expected = TimeoutException.class)
@Test
public void testCheckFailingDisappears() throws Exception {
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
passing.waitTillFileDisappears(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"));
assertThrows(TimeoutException.class, () -> {
passing.waitTillFileDisappears(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"));
});
}
private ConsistencyGuardConfig getConsistencyGuardConfig() {

View File

@@ -18,8 +18,6 @@
package org.apache.hudi.table;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hudi.client.HoodieReadClient;
import org.apache.hudi.client.HoodieWriteClient;
import org.apache.hudi.client.WriteStatus;
@@ -57,23 +55,27 @@ import org.apache.hudi.hadoop.HoodieParquetInputFormat;
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.index.HoodieIndex.IndexType;
import org.apache.hudi.table.action.deltacommit.DeleteDeltaCommitActionExecutor;
import org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor;
import org.apache.hudi.table.action.deltacommit.DeleteDeltaCommitActionExecutor;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.spark.api.java.JavaRDD;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -82,9 +84,9 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestMergeOnReadTable extends HoodieClientTestHarness {
@@ -94,7 +96,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
private HoodieParquetRealtimeInputFormat rtInputFormat;
private JobConf rtJobConf;
@Before
@BeforeEach
public void init() throws IOException {
initDFS();
initSparkContexts("TestHoodieMergeOnReadTable");
@@ -114,7 +116,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
rtInputFormat.setConf(rtJobConf);
}
@After
@AfterEach
public void clean() throws IOException {
cleanupDFS();
cleanupSparkContexts();
@@ -159,13 +161,13 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// verify that there is a commit
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTimeline timeline = metaClient.getCommitTimeline().filterCompletedInstants();
assertEquals("Expecting a single commit.", 1,
timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
assertEquals(1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(),
"Expecting a single commit.");
String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp();
assertTrue(HoodieTimeline.compareTimestamps("000", latestCompactionCommitTime, HoodieTimeline.LESSER));
assertEquals("Must contain 200 records", 200,
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count());
assertEquals(200, HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count(),
"Must contain 200 records");
}
}
@@ -310,7 +312,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
assertTrue(deltaCommit.isPresent());
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent());
@@ -323,8 +325,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestBaseFiles();
assertTrue("should list the parquet files we wrote in the delta commit",
dataFilesToRead.findAny().isPresent());
assertTrue(dataFilesToRead.findAny().isPresent(),
"should list the parquet files we wrote in the delta commit");
/**
* Write 2 (only updates, written to .log file)
@@ -352,7 +354,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
metaClient = HoodieTableMetaClient.reload(metaClient);
deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
assertTrue(deltaCommit.isPresent());
assertEquals("Latest Delta commit should be 004", "004", deltaCommit.get().getTimestamp());
assertEquals("004", deltaCommit.get().getTimestamp(), "Latest Delta commit should be 004");
commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent());
@@ -365,7 +367,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
// Wrote 20 records and deleted 20 records, so remaining 20-20 = 0
assertEquals("Must contain 0 records", 0, recordsRead.size());
assertEquals(0, recordsRead.size(), "Must contain 0 records");
}
}
@@ -394,7 +396,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertTrue(commit.isPresent());
assertEquals("commit should be 001", "001", commit.get().getTimestamp());
assertEquals("001", commit.get().getTimestamp(), "commit should be 001");
/**
* Write 2 (updates)
@@ -451,7 +453,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
assertTrue(deltaCommit.isPresent());
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent());
@@ -464,8 +466,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestBaseFiles();
assertTrue("should list the parquet files we wrote in the delta commit",
dataFilesToRead.findAny().isPresent());
assertTrue(dataFilesToRead.findAny().isPresent(),
"should list the parquet files we wrote in the delta commit");
/**
* Write 2 (inserts + updates - testing failed delta commit)
@@ -491,11 +493,11 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
secondClient.rollback(commitTime1);
allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
// After rollback, there should be no parquet file with the failed commit time
Assert.assertEquals(Arrays.stream(allFiles)
.filter(file -> file.getPath().getName().contains(commitTime1)).count(), 0);
assertEquals(0, Arrays.stream(allFiles)
.filter(file -> file.getPath().getName().contains(commitTime1)).count());
dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
assertEquals(recordsRead.size(), 200);
assertEquals(200, recordsRead.size());
}
/**
@@ -511,7 +513,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
assertEquals(recordsRead.size(), 200);
assertEquals(200, recordsRead.size());
writeRecords = jsc.parallelize(copyOfRecords, 1);
writeStatusJavaRDD = thirdClient.upsert(writeRecords, commitTime2);
@@ -524,8 +526,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
thirdClient.rollback(commitTime2);
allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
// After rollback, there should be no parquet file with the failed commit time
Assert.assertEquals(Arrays.stream(allFiles)
.filter(file -> file.getPath().getName().contains(commitTime2)).count(), 0);
assertEquals(0, Arrays.stream(allFiles)
.filter(file -> file.getPath().getName().contains(commitTime2)).count());
metaClient = HoodieTableMetaClient.reload(metaClient);
hoodieTable = HoodieTable.create(metaClient, cfg, jsc);
@@ -533,7 +535,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
// check that the number of records read is still correct after rollback operation
assertEquals(recordsRead.size(), 200);
assertEquals(200, recordsRead.size());
// Test compaction commit rollback
/**
@@ -598,7 +600,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
assertTrue(deltaCommit.isPresent());
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent());
@@ -611,8 +613,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestBaseFiles();
assertTrue("Should list the parquet files we wrote in the delta commit",
dataFilesToRead.findAny().isPresent());
assertTrue(dataFilesToRead.findAny().isPresent(),
"Should list the parquet files we wrote in the delta commit");
/**
* Write 2 (inserts + updates)
@@ -628,7 +630,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<String> dataFiles = roView.getLatestBaseFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
assertEquals(recordsRead.size(), 200);
assertEquals(200, recordsRead.size());
statuses = nClient.upsert(jsc.parallelize(copyOfRecords, 1), newCommitTime).collect();
// Verify there are no errors
@@ -761,7 +763,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
assertTrue(deltaCommit.isPresent());
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent());
@@ -776,8 +778,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestBaseFiles();
List<HoodieBaseFile> dataFilesList = dataFilesToRead.collect(Collectors.toList());
assertTrue("Should list the parquet files we wrote in the delta commit",
dataFilesList.size() > 0);
assertTrue(dataFilesList.size() > 0,
"Should list the parquet files we wrote in the delta commit");
/**
* Write 2 (only updates + inserts, written to .log file + correction of existing parquet file size)
@@ -795,7 +797,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
metaClient = HoodieTableMetaClient.reload(metaClient);
deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
assertTrue(deltaCommit.isPresent());
assertEquals("Latest Delta commit should be 002", "002", deltaCommit.get().getTimestamp());
assertEquals("002", deltaCommit.get().getTimestamp(), "Latest Delta commit should be 002");
commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent());
@@ -813,7 +815,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
// Wrote 20 records in 2 batches
assertEquals("Must contain 40 records", 40, recordsRead.size());
assertEquals(40, recordsRead.size(), "Must contain 40 records");
}
}
@@ -855,7 +857,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<FileSlice> groupedLogFiles =
table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
for (FileSlice fileSlice : groupedLogFiles) {
assertEquals("There should be 1 log file written for every data file", 1, fileSlice.getLogFiles().count());
assertEquals(1, fileSlice.getLogFiles().count(), "There should be 1 log file written for every data file");
}
}
@@ -874,14 +876,15 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
table = HoodieTable.create(metaClient, config, jsc);
HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
assertTrue("Compaction commit should be > than last insert", HoodieTimeline
.compareTimestamps(timeline.lastInstant().get().getTimestamp(), newCommitTime, HoodieTimeline.GREATER));
assertTrue(HoodieTimeline
.compareTimestamps(timeline.lastInstant().get().getTimestamp(), newCommitTime, HoodieTimeline.GREATER),
"Compaction commit should be > than last insert");
for (String partitionPath : dataGen.getPartitionPaths()) {
List<FileSlice> groupedLogFiles =
table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
for (FileSlice slice : groupedLogFiles) {
assertEquals("After compaction there should be no log files visible on a full view", 0, slice.getLogFiles().count());
assertEquals(0, slice.getLogFiles().count(), "After compaction there should be no log files visible on a full view");
}
List<WriteStatus> writeStatuses = result.collect();
assertTrue(writeStatuses.stream().anyMatch(writeStatus -> writeStatus.getStat().getPartitionPath().contentEquals(partitionPath)));
@@ -911,23 +914,23 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
for (String partitionPath : dataGen.getPartitionPaths()) {
assertEquals(0, tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getBaseFile().isPresent()).count());
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
}
Assert.assertTrue(numLogFiles > 0);
assertTrue(numLogFiles > 0);
// Do a compaction
String instantTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
statuses = writeClient.compact(instantTime);
assertEquals(statuses.map(status -> status.getStat().getPath().contains("parquet")).count(), numLogFiles);
Assert.assertEquals(statuses.count(), numLogFiles);
assertEquals(statuses.count(), numLogFiles);
writeClient.commitCompaction(instantTime, statuses, Option.empty());
}
}
@Test
public void testInsertsGeneratedIntoLogFilesRollback() throws Exception {
public void testInsertsGeneratedIntoLogFilesRollback(@TempDir java.nio.file.Path tempFolder) throws Exception {
// insert 100 records
// Setting IndexType to be InMemory to simulate Global Index nature
HoodieWriteConfig config = getConfigBuilder(false, IndexType.INMEMORY).build();
@@ -942,14 +945,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
List<WriteStatus> writeStatuses = statuses.collect();
// Ensure that inserts are written to only log files
Assert.assertEquals(
writeStatuses.stream().filter(writeStatus -> !writeStatus.getStat().getPath().contains("log")).count(), 0);
Assert.assertTrue(
assertEquals(0,
writeStatuses.stream().filter(writeStatus -> !writeStatus.getStat().getPath().contains("log")).count());
assertTrue(
writeStatuses.stream().anyMatch(writeStatus -> writeStatus.getStat().getPath().contains("log")));
// rollback a failed commit
boolean rollback = writeClient.rollback(newCommitTime);
Assert.assertTrue(rollback);
assertTrue(rollback);
newCommitTime = "101";
writeClient.startCommitWithTime(newCommitTime);
@@ -972,9 +975,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// Save the .commit file to local directory.
// Rollback will be called twice to test the case where rollback failed first time and retried.
// We got the "BaseCommitTime cannot be null" exception before the fix
TemporaryFolder folder = new TemporaryFolder();
folder.create();
File file = folder.newFile();
File file = Files.createTempFile(tempFolder, null, null).toFile();
metaClient.getFs().copyToLocalFile(new Path(metaClient.getMetaPath(), fileName),
new Path(file.getAbsolutePath()));
writeClient.rollback(newCommitTime);
@@ -985,8 +986,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
long numLogFiles = 0;
for (String partitionPath : dataGen.getPartitionPaths()) {
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
}
@@ -996,7 +997,6 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Thread.sleep(1000);
// Rollback again to pretend the first rollback failed partially. This should not error our
writeClient.rollback(newCommitTime);
folder.delete();
}
}
@@ -1022,19 +1022,19 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
long numLogFiles = 0;
for (String partitionPath : dataGen.getPartitionPaths()) {
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
}
Assert.assertTrue(numLogFiles > 0);
assertTrue(numLogFiles > 0);
// Do a compaction
newCommitTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
statuses = writeClient.compact(newCommitTime);
// Ensure all log files have been compacted into parquet files
assertEquals(statuses.map(status -> status.getStat().getPath().contains("parquet")).count(), numLogFiles);
Assert.assertEquals(statuses.count(), numLogFiles);
assertEquals(statuses.count(), numLogFiles);
writeClient.commitCompaction(newCommitTime, statuses, Option.empty());
// Trigger a rollback of compaction
writeClient.rollback(newCommitTime);
@@ -1044,8 +1044,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> lastInstant = ((SyncableFileSystemView) tableRTFileSystemView).getLastInstant();
System.out.println("Last Instant =" + lastInstant);
for (String partitionPath : dataGen.getPartitionPaths()) {
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
}
}
}
@@ -1077,7 +1077,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
// Read from commit file
table = HoodieTable.create(cfg, jsc);
@@ -1094,14 +1094,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
inserts += stat.getValue().getInserts();
}
}
Assert.assertEquals(inserts, 200);
assertEquals(200, inserts);
instantTime = "002";
client.startCommitWithTime(instantTime);
records = dataGen.generateUpdates(instantTime, records);
writeRecords = jsc.parallelize(records, 1);
statuses = client.upsert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
// Read from commit file
table = HoodieTable.create(cfg, jsc);
@@ -1122,8 +1122,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
}
}
Assert.assertEquals(inserts, 200);
Assert.assertEquals(upserts, 200);
assertEquals(200, inserts);
assertEquals(200, upserts);
client.rollback(instantTime);
@@ -1145,8 +1145,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
upserts += stat.getValue().getUpserts();
}
}
Assert.assertEquals(inserts, 200);
Assert.assertEquals(upserts, 0);
assertEquals(200, inserts);
assertEquals(0, upserts);
}
}
@@ -1168,7 +1168,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
// Read from commit file
HoodieTable table = HoodieTable.create(cfg, jsc);
@@ -1188,7 +1188,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
fileIdToUpsertsMap.put(stat.getKey(), stat.getValue().getUpserts());
}
}
Assert.assertEquals(inserts, 200);
assertEquals(200, inserts);
instantTime = "001";
client.startCommitWithTime(instantTime);
@@ -1197,7 +1197,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
records.addAll(dataGen.generateInserts(instantTime, 200));
writeRecords = jsc.parallelize(records, 1);
statuses = client.upsert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
// Read from commit file
table = HoodieTable.create(cfg, jsc);
@@ -1221,8 +1221,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
}
}
Assert.assertEquals(inserts, 400);
Assert.assertEquals(upserts, 200);
assertEquals(400, inserts);
assertEquals(200, upserts);
// Test small file handling after compaction
instantTime = "002";
@@ -1243,8 +1243,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
// Ensure that the rolling stats from the extra metadata of delta commits is copied over to the compaction commit
for (Map.Entry<String, Map<String, HoodieRollingStat>> entry : rollingStatMetadata.getPartitionToRollingStats()
.entrySet()) {
Assert.assertTrue(rollingStatMetadata1.getPartitionToRollingStats().containsKey(entry.getKey()));
Assert.assertEquals(rollingStatMetadata1.getPartitionToRollingStats().get(entry.getKey()).size(),
assertTrue(rollingStatMetadata1.getPartitionToRollingStats().containsKey(entry.getKey()));
assertEquals(rollingStatMetadata1.getPartitionToRollingStats().get(entry.getKey()).size(),
entry.getValue().size());
}
@@ -1256,7 +1256,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
records.addAll(dataGen.generateInserts(instantTime, 200));
writeRecords = jsc.parallelize(records, 1);
statuses = client.upsert(writeRecords, instantTime);
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
// Read from commit file
table = HoodieTable.create(cfg, jsc);
@@ -1279,8 +1279,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
}
}
Assert.assertEquals(inserts, 600);
Assert.assertEquals(upserts, 600);
assertEquals(600, inserts);
assertEquals(600, upserts);
}
}
@@ -1309,21 +1309,21 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
assertTrue(deltaCommit.isPresent());
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent());
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
BaseFileOnlyView roView =
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
Stream<HoodieBaseFile> dataFilesToRead = roView.getLatestBaseFiles();
assertFalse(dataFilesToRead.findAny().isPresent());
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestBaseFiles();
assertTrue("should list the parquet files we wrote in the delta commit",
dataFilesToRead.findAny().isPresent());
assertTrue(dataFilesToRead.findAny().isPresent(),
"should list the parquet files we wrote in the delta commit");
/**
* Write 2 (only updates, written to .log file)
@@ -1386,7 +1386,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
private void assertNoWriteErrors(List<WriteStatus> statuses) {
// Verify there are no errors
for (WriteStatus status : statuses) {
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
assertFalse(status.hasErrors(), "Errors found in write of " + status.getFileId());
}
}
@@ -1402,21 +1402,21 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
assertTrue(deltaCommit.isPresent());
Assert.assertEquals("Delta commit should be specified value", commitTime, deltaCommit.get().getTimestamp());
assertEquals(commitTime, deltaCommit.get().getTimestamp(), "Delta commit should be specified value");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().lastInstant();
assertFalse(commit.isPresent());
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
BaseFileOnlyView roView =
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
Stream<HoodieBaseFile> dataFilesToRead = roView.getLatestBaseFiles();
assertTrue(!dataFilesToRead.findAny().isPresent());
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
dataFilesToRead = roView.getLatestBaseFiles();
assertTrue("should list the parquet files we wrote in the delta commit",
dataFilesToRead.findAny().isPresent());
assertTrue(dataFilesToRead.findAny().isPresent(),
"should list the parquet files we wrote in the delta commit");
return allFiles;
}
@@ -1435,8 +1435,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
metaClient = HoodieTableMetaClient.reload(metaClient);
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
assertTrue(deltaCommit.isPresent());
assertEquals("Latest Delta commit should match specified time",
commitTime, deltaCommit.get().getTimestamp());
assertEquals(commitTime, deltaCommit.get().getTimestamp(),
"Latest Delta commit should match specified time");
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent());
@@ -1452,7 +1452,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
throws Exception {
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ);
setupIncremental(roJobConf, startCommitTime, numCommitsToPull, stopAtCompaction);
FileInputFormat.setInputPaths(roJobConf, basePath + "/" + partitionPath);
FileInputFormat.setInputPaths(roJobConf, Paths.get(basePath, partitionPath).toString());
return roInputFormat.listStatus(roJobConf);
}
@@ -1465,7 +1465,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
throws Exception {
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ);
setupIncremental(rtJobConf, startCommitTime, numCommitsToPull, false);
FileInputFormat.setInputPaths(rtJobConf, basePath + "/" + partitionPath);
FileInputFormat.setInputPaths(rtJobConf, Paths.get(basePath, partitionPath).toString());
return rtInputFormat.listStatus(rtJobConf);
}
@@ -1492,9 +1492,9 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
JobConf jobConf, int expectedRecords, String... expectedCommits) {
assertEquals(expectedNumFiles, files.length);
Set<String> expectedCommitsSet = Arrays.asList(expectedCommits).stream().collect(Collectors.toSet());
Set<String> expectedCommitsSet = Arrays.stream(expectedCommits).collect(Collectors.toSet());
List<GenericRecord> records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(
Arrays.asList(basePath + "/" + partitionPath), basePath, jobConf, inputFormat);
Collections.singletonList(Paths.get(basePath, partitionPath).toString()), basePath, jobConf, inputFormat);
assertEquals(expectedRecords, records.size());
Set<String> actualCommits = records.stream().map(r ->
r.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString()).collect(Collectors.toSet());

View File

@@ -47,26 +47,25 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.parquet.avro.AvroReadSupport;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.spark.TaskContext;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -74,7 +73,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
private static final Logger LOG = LogManager.getLogger(TestCopyOnWriteActionExecutor.class);
@Before
@BeforeEach
public void setUp() throws Exception {
initSparkContexts("TestCopyOnWriteActionExecutor");
initPath();
@@ -83,7 +82,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
initFileSystem();
}
@After
@AfterEach
public void tearDown() throws Exception {
cleanupSparkContexts();
cleanupMetaClient();
@@ -110,8 +109,8 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
return Pair.of(io.makeNewPath(record.getPartitionPath()), writeToken);
}).collect().get(0);
Assert.assertEquals(newPathWithWriteToken.getKey().toString(), this.basePath + "/" + partitionPath + "/"
+ FSUtils.makeDataFileName(instantTime, newPathWithWriteToken.getRight(), fileName));
assertEquals(newPathWithWriteToken.getKey().toString(), Paths.get(this.basePath, partitionPath,
FSUtils.makeDataFileName(instantTime, newPathWithWriteToken.getRight(), fileName)).toString());
}
private HoodieWriteConfig makeHoodieClientConfig() throws Exception {
@@ -134,7 +133,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
writeClient.startCommitWithTime(firstCommitTime);
metaClient = HoodieTableMetaClient.reload(metaClient);
String partitionPath = "/2016/01/31";
String partitionPath = "2016/01/31";
HoodieCopyOnWriteTable table = (HoodieCopyOnWriteTable) HoodieTable.create(metaClient, config, jsc);
// Get some records belong to the same partition (2016/01/31)
@@ -227,7 +226,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
updatedReader.close();
// Also check the numRecordsWritten
WriteStatus writeStatus = statuses.get(0);
assertEquals("Should be only one file generated", 1, statuses.size());
assertEquals(1, statuses.size(), "Should be only one file generated");
assertEquals(4, writeStatus.getStat().getNumWrites());// 3 rewritten records + 1 new record
}
@@ -239,7 +238,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
hoodieInputFormat.setConf(jobConf);
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.COPY_ON_WRITE);
setupIncremental(jobConf, startCommitTime, numCommitsToPull);
FileInputFormat.setInputPaths(jobConf, basePath + partitionPath);
FileInputFormat.setInputPaths(jobConf, Paths.get(basePath, partitionPath).toString());
return hoodieInputFormat.listStatus(jobConf);
}
@@ -390,13 +389,13 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
// Check the updated file
int counts = 0;
for (File file : new File(basePath + "/2016/01/31").listFiles()) {
for (File file : Paths.get(basePath, "2016/01/31").toFile().listFiles()) {
if (file.getName().endsWith(".parquet") && FSUtils.getCommitTime(file.getName()).equals(instantTime)) {
LOG.info(file.getName() + "-" + file.length());
counts++;
}
}
assertEquals("If the number of records are more than 1150, then there should be a new file", 3, counts);
assertEquals(3, counts, "If the number of records are more than 1150, then there should be a new file");
}
@Test
@@ -416,7 +415,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
WriteStatus writeStatus = ws.get(0).get(0);
String fileId = writeStatus.getFileId();
metaClient.getFs().create(new Path(basePath + "/.hoodie/000.commit")).close();
metaClient.getFs().create(new Path(Paths.get(basePath, ".hoodie", "000.commit").toString())).close();
final List<HoodieRecord> updates = dataGen.generateUpdatesWithHoodieAvroPayload(instantTime, inserts);
String partitionPath = updates.get(0).getPartitionPath();
@@ -429,11 +428,8 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
assertEquals(updates.size() - numRecordsInPartition, updateStatus.get(0).get(0).getTotalErrorRecords());
}
@After
@AfterEach
public void cleanup() {
if (basePath != null) {
new File(basePath).delete();
}
if (jsc != null) {
jsc.stop();
}

View File

@@ -18,10 +18,6 @@
package org.apache.hudi.table.action.commit;
import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.List;
import org.apache.hudi.common.HoodieClientTestHarness;
import org.apache.hudi.common.HoodieClientTestUtils;
import org.apache.hudi.common.HoodieTestDataGenerator;
@@ -36,18 +32,25 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieCopyOnWriteTable;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.WorkloadProfile;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.List;
import scala.Tuple2;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestUpsertPartitioner extends HoodieClientTestHarness {
private static final Logger LOG = LogManager.getLogger(TestUpsertPartitioner.class);
@Before
@BeforeEach
public void setUp() throws Exception {
initSparkContexts("TestUpsertPartitioner");
initPath();
@@ -56,7 +59,7 @@ public class TestUpsertPartitioner extends HoodieClientTestHarness {
initFileSystem();
}
@After
@AfterEach
public void tearDown() throws Exception {
cleanupSparkContexts();
cleanupMetaClient();
@@ -89,8 +92,9 @@ public class TestUpsertPartitioner extends HoodieClientTestHarness {
records.addAll(updateRecords);
WorkloadProfile profile = new WorkloadProfile(jsc.parallelize(records));
UpsertPartitioner partitioner = new UpsertPartitioner(profile, jsc, table, config);
assertEquals("Update record should have gone to the 1 update partition", 0, partitioner.getPartition(
new Tuple2<>(updateRecords.get(0).getKey(), Option.ofNullable(updateRecords.get(0).getCurrentLocation()))));
assertEquals(0, partitioner.getPartition(
new Tuple2<>(updateRecords.get(0).getKey(), Option.ofNullable(updateRecords.get(0).getCurrentLocation()))),
"Update record should have gone to the 1 update partition");
return partitioner;
}
@@ -100,7 +104,7 @@ public class TestUpsertPartitioner extends HoodieClientTestHarness {
// Inserts + Updates... Check all updates go together & inserts subsplit
UpsertPartitioner partitioner = getUpsertPartitioner(0, 200, 100, 1024, testPartitionPath, false);
List<InsertBucket> insertBuckets = partitioner.getInsertBuckets(testPartitionPath);
assertEquals("Total of 2 insert buckets", 2, insertBuckets.size());
assertEquals(2, insertBuckets.size(), "Total of 2 insert buckets");
}
@Test
@@ -111,33 +115,33 @@ public class TestUpsertPartitioner extends HoodieClientTestHarness {
UpsertPartitioner partitioner = getUpsertPartitioner(1000 * 1024, 400, 100, 800 * 1024, testPartitionPath, false);
List<InsertBucket> insertBuckets = partitioner.getInsertBuckets(testPartitionPath);
assertEquals("Should have 3 partitions", 3, partitioner.numPartitions());
assertEquals("Bucket 0 is UPDATE", BucketType.UPDATE,
partitioner.getBucketInfo(0).bucketType);
assertEquals("Bucket 1 is INSERT", BucketType.INSERT,
partitioner.getBucketInfo(1).bucketType);
assertEquals("Bucket 2 is INSERT", BucketType.INSERT,
partitioner.getBucketInfo(2).bucketType);
assertEquals("Total of 3 insert buckets", 3, insertBuckets.size());
assertEquals("First insert bucket must be same as update bucket", 0, insertBuckets.get(0).bucketNumber);
assertEquals("First insert bucket should have weight 0.5", 0.5, insertBuckets.get(0).weight, 0.01);
assertEquals(3, partitioner.numPartitions(), "Should have 3 partitions");
assertEquals(BucketType.UPDATE, partitioner.getBucketInfo(0).bucketType,
"Bucket 0 is UPDATE");
assertEquals(BucketType.INSERT, partitioner.getBucketInfo(1).bucketType,
"Bucket 1 is INSERT");
assertEquals(BucketType.INSERT, partitioner.getBucketInfo(2).bucketType,
"Bucket 2 is INSERT");
assertEquals(3, insertBuckets.size(), "Total of 3 insert buckets");
assertEquals(0, insertBuckets.get(0).bucketNumber, "First insert bucket must be same as update bucket");
assertEquals(0.5, insertBuckets.get(0).weight, 0.01, "First insert bucket should have weight 0.5");
// Now with insert split size auto tuned
partitioner = getUpsertPartitioner(1000 * 1024, 2400, 100, 800 * 1024, testPartitionPath, true);
insertBuckets = partitioner.getInsertBuckets(testPartitionPath);
assertEquals("Should have 4 partitions", 4, partitioner.numPartitions());
assertEquals("Bucket 0 is UPDATE", BucketType.UPDATE,
partitioner.getBucketInfo(0).bucketType);
assertEquals("Bucket 1 is INSERT", BucketType.INSERT,
partitioner.getBucketInfo(1).bucketType);
assertEquals("Bucket 2 is INSERT", BucketType.INSERT,
partitioner.getBucketInfo(2).bucketType);
assertEquals("Bucket 3 is INSERT", BucketType.INSERT,
partitioner.getBucketInfo(3).bucketType);
assertEquals("Total of 4 insert buckets", 4, insertBuckets.size());
assertEquals("First insert bucket must be same as update bucket", 0, insertBuckets.get(0).bucketNumber);
assertEquals("First insert bucket should have weight 0.5", 200.0 / 2400, insertBuckets.get(0).weight, 0.01);
assertEquals(4, partitioner.numPartitions(), "Should have 4 partitions");
assertEquals(BucketType.UPDATE, partitioner.getBucketInfo(0).bucketType,
"Bucket 0 is UPDATE");
assertEquals(BucketType.INSERT, partitioner.getBucketInfo(1).bucketType,
"Bucket 1 is INSERT");
assertEquals(BucketType.INSERT, partitioner.getBucketInfo(2).bucketType,
"Bucket 2 is INSERT");
assertEquals(BucketType.INSERT, partitioner.getBucketInfo(3).bucketType,
"Bucket 3 is INSERT");
assertEquals(4, insertBuckets.size(), "Total of 4 insert buckets");
assertEquals(0, insertBuckets.get(0).bucketNumber, "First insert bucket must be same as update bucket");
assertEquals(200.0 / 2400, insertBuckets.get(0).weight, 0.01, "First insert bucket should have weight 0.5");
}
private HoodieWriteConfig.Builder makeHoodieClientConfigBuilder() throws Exception {

View File

@@ -52,8 +52,7 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaRDD;
import org.junit.Assert;
import org.junit.Test;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.ArrayList;
@@ -63,9 +62,10 @@ import java.util.Map;
import java.util.stream.Collectors;
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
* Test Cases for Async Compaction and Ingestion interaction.
@@ -111,9 +111,9 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
HoodieInstant pendingCompactionInstant =
metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
assertEquals("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp(),
compactionInstantTime);
assertEquals("Pending Compaction instant has expected state", pendingCompactionInstant.getState(), State.REQUESTED);
assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(),
"Pending Compaction instant has expected instant time");
assertEquals(State.REQUESTED, pendingCompactionInstant.getState(), "Pending Compaction instant has expected state");
moveCompactionFromRequestedToInflight(compactionInstantTime, cfg);
@@ -169,11 +169,11 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieInstant pendingCompactionInstant =
metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
assertEquals("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp(),
compactionInstantTime);
assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(),
"Pending Compaction instant has expected instant time");
HoodieInstant inflightInstant =
metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
assertEquals("inflight instant has expected instant time", inflightInstant.getTimestamp(), inflightInstantTime);
assertEquals(inflightInstantTime, inflightInstant.getTimestamp(), "inflight instant has expected instant time");
// This should rollback
client.startCommitWithTime(nextInflightInstantTime);
@@ -181,13 +181,14 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
// Validate
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
assertEquals("inflight instant has expected instant time", inflightInstant.getTimestamp(), nextInflightInstantTime);
assertEquals("Expect only one inflight instant", 1, metaClient.getActiveTimeline()
.filterPendingExcludingCompaction().getInstants().count());
assertEquals(inflightInstant.getTimestamp(), nextInflightInstantTime, "inflight instant has expected instant time");
assertEquals(1, metaClient.getActiveTimeline()
.filterPendingExcludingCompaction().getInstants().count(),
"Expect only one inflight instant");
// Expect pending Compaction to be present
pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
assertEquals("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp(),
compactionInstantTime);
assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(),
"Pending Compaction instant has expected instant time");
}
}
@@ -237,8 +238,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
String compactionInstantTime = "006";
int numRecs = 2000;
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
final List<HoodieRecord> initalRecords = dataGen.generateInserts(firstInstantTime, numRecs);
final List<HoodieRecord> records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), initalRecords, cfg, true,
new ArrayList<>());
// Schedule compaction but do not run them
@@ -246,17 +247,12 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieInstant pendingCompactionInstant =
metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
assertEquals("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp(), compactionInstantTime);
assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
boolean gotException = false;
try {
assertThrows(IllegalArgumentException.class, () -> {
runNextDeltaCommits(client, readClient, Arrays.asList(failedInstantTime), records, cfg, false,
Arrays.asList(compactionInstantTime));
} catch (IllegalArgumentException iex) {
// Latest pending compaction instant time must be earlier than this instant time. Should fail here
gotException = true;
}
assertTrue("Latest pending compaction instant time must be earlier than this instant time", gotException);
}, "Latest pending compaction instant time must be earlier than this instant time");
}
@Test
@@ -283,17 +279,12 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieInstant inflightInstant =
metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
assertEquals("inflight instant has expected instant time", inflightInstant.getTimestamp(), inflightInstantTime);
assertEquals(inflightInstantTime, inflightInstant.getTimestamp(), "inflight instant has expected instant time");
boolean gotException = false;
try {
assertThrows(IllegalArgumentException.class, () -> {
// Schedule compaction but do not run them
scheduleCompaction(compactionInstantTime, client, cfg);
} catch (IllegalArgumentException iex) {
// Earliest ingestion inflight instant time must be later than compaction time. Should fail here
gotException = true;
}
assertTrue("Earliest ingestion inflight instant time must be later than compaction time", gotException);
}, "Earliest ingestion inflight instant time must be later than compaction time");
}
@Test
@@ -304,44 +295,32 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
HoodieWriteClient client = getHoodieWriteClient(cfg, true);
HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
String firstInstantTime = "001";
String secondInstantTime = "004";
String compactionInstantTime = "002";
final String firstInstantTime = "001";
final String secondInstantTime = "004";
final String compactionInstantTime = "002";
int numRecs = 2000;
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
new ArrayList<>());
boolean gotException = false;
try {
assertThrows(IllegalArgumentException.class, () -> {
// Schedule compaction but do not run them
scheduleCompaction(compactionInstantTime, client, cfg);
} catch (IllegalArgumentException iex) {
gotException = true;
}
assertTrue("Compaction Instant to be scheduled cannot have older timestamp", gotException);
}, "Compaction Instant to be scheduled cannot have older timestamp");
// Schedule with timestamp same as that of committed instant
gotException = false;
try {
assertThrows(IllegalArgumentException.class, () -> {
// Schedule compaction but do not run them
scheduleCompaction(secondInstantTime, client, cfg);
} catch (IllegalArgumentException iex) {
gotException = true;
}
assertTrue("Compaction Instant to be scheduled cannot have same timestamp as committed instant", gotException);
}, "Compaction Instant to be scheduled cannot have same timestamp as committed instant");
compactionInstantTime = "006";
scheduleCompaction(compactionInstantTime, client, cfg);
gotException = false;
try {
final String compactionInstantTime2 = "006";
scheduleCompaction(compactionInstantTime2, client, cfg);
assertThrows(IllegalArgumentException.class, () -> {
// Schedule compaction with the same times as a pending compaction
scheduleCompaction(secondInstantTime, client, cfg);
} catch (IllegalArgumentException iex) {
gotException = true;
}
assertTrue("Compaction Instant to be scheduled cannot have same timestamp as a pending compaction", gotException);
}, "Compaction Instant to be scheduled cannot have same timestamp as a pending compaction");
}
@Test
@@ -406,13 +385,13 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
fileSliceList.forEach(fileSlice -> {
Pair<String, HoodieCompactionOperation> opPair = fgIdToCompactionOperation.get(fileSlice.getFileGroupId());
if (opPair != null) {
assertEquals("Expect baseInstant to match compaction Instant", fileSlice.getBaseInstantTime(), opPair.getKey());
assertTrue("Expect atleast one log file to be present where the latest delta commit was written",
fileSlice.getLogFiles().count() > 0);
assertFalse("Expect no data-file to be present", fileSlice.getBaseFile().isPresent());
assertEquals(fileSlice.getBaseInstantTime(), opPair.getKey(), "Expect baseInstant to match compaction Instant");
assertTrue(fileSlice.getLogFiles().count() > 0,
"Expect atleast one log file to be present where the latest delta commit was written");
assertFalse(fileSlice.getBaseFile().isPresent(), "Expect no data-file to be present");
} else {
assertTrue("Expect baseInstant to be less than or equal to latestDeltaCommit",
fileSlice.getBaseInstantTime().compareTo(latestDeltaCommit) <= 0);
assertTrue(fileSlice.getBaseInstantTime().compareTo(latestDeltaCommit) <= 0,
"Expect baseInstant to be less than or equal to latestDeltaCommit");
}
});
}
@@ -446,8 +425,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieTable hoodieTable = getHoodieTable(metaClient, cfg);
List<HoodieBaseFile> dataFilesToRead = getCurrentLatestDataFiles(hoodieTable, cfg);
assertTrue("should list the parquet files we wrote in the delta commit",
dataFilesToRead.stream().findAny().isPresent());
assertTrue(dataFilesToRead.stream().findAny().isPresent(),
"should list the parquet files we wrote in the delta commit");
validateDeltaCommit(firstInstant, fgIdToCompactionOperation, cfg);
}
@@ -467,7 +446,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant);
HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants()
.filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get();
assertTrue("Instant must be marked inflight", instant.isInflight());
assertTrue(instant.isInflight(), "Instant must be marked inflight");
}
private void scheduleCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg)
@@ -475,7 +454,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
HoodieInstant instant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().get();
assertEquals("Last compaction instant must be the one set", instant.getTimestamp(), compactionInstantTime);
assertEquals(compactionInstantTime, instant.getTimestamp(), "Last compaction instant must be the one set");
}
private void scheduleAndExecuteCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieTable table,
@@ -489,28 +468,30 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
client.compact(compactionInstantTime);
List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table);
assertTrue("Ensure latest file-slices are not empty", fileSliceList.stream().findAny().isPresent());
assertFalse("Verify all file-slices have base-instant same as compaction instant", fileSliceList.stream()
.anyMatch(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime)));
assertFalse("Verify all file-slices have data-files",
fileSliceList.stream().anyMatch(fs -> !fs.getBaseFile().isPresent()));
assertTrue(fileSliceList.stream().findAny().isPresent(), "Ensure latest file-slices are not empty");
assertFalse(fileSliceList.stream()
.anyMatch(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime)),
"Verify all file-slices have base-instant same as compaction instant");
assertFalse(fileSliceList.stream().anyMatch(fs -> !fs.getBaseFile().isPresent()),
"Verify all file-slices have data-files");
if (hasDeltaCommitAfterPendingCompaction) {
assertFalse("Verify all file-slices have atleast one log-file",
fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() == 0));
assertFalse(fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() == 0),
"Verify all file-slices have atleast one log-file");
} else {
assertFalse("Verify all file-slices have no log-files",
fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() > 0));
assertFalse(fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() > 0),
"Verify all file-slices have no log-files");
}
// verify that there is a commit
table = getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath(), true), cfg);
HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants();
String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp();
assertEquals("Expect compaction instant time to be the latest commit time", latestCompactionCommitTime,
compactionInstantTime);
Assert.assertEquals("Must contain expected records", expectedNumRecs,
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count());
assertEquals(latestCompactionCommitTime, compactionInstantTime,
"Expect compaction instant time to be the latest commit time");
assertEquals(expectedNumRecs,
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count(),
"Must contain expected records");
}
@@ -530,11 +511,11 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
Option<HoodieInstant> deltaCommit =
metaClient.getActiveTimeline().reload().getDeltaCommitTimeline().filterCompletedInstants().lastInstant();
if (skipCommit && !cfg.shouldAutoCommit()) {
assertTrue("Delta commit should not be latest instant",
deltaCommit.get().getTimestamp().compareTo(instantTime) < 0);
assertTrue(deltaCommit.get().getTimestamp().compareTo(instantTime) < 0,
"Delta commit should not be latest instant");
} else {
assertTrue(deltaCommit.isPresent());
assertEquals("Delta commit should be latest instant", instantTime, deltaCommit.get().getTimestamp());
assertEquals(instantTime, deltaCommit.get().getTimestamp(), "Delta commit should be latest instant");
}
return statusList;
}

View File

@@ -43,23 +43,24 @@ import org.apache.hudi.table.HoodieTable;
import org.apache.hadoop.conf.Configuration;
import org.apache.spark.api.java.JavaRDD;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestHoodieCompactor extends HoodieClientTestHarness {
private Configuration hadoopConf;
private HoodieTableMetaClient metaClient;
@Before
@BeforeEach
public void setUp() throws Exception {
// Initialize a local spark env
initSparkContexts("TestHoodieCompactor");
@@ -72,7 +73,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
initTestDataGenerator();
}
@After
@AfterEach
public void tearDown() throws Exception {
cleanupFileSystem();
cleanupTestDataGenerator();
@@ -100,13 +101,15 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build());
}
@Test(expected = HoodieNotSupportedException.class)
@Test
public void testCompactionOnCopyOnWriteFail() throws Exception {
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.COPY_ON_WRITE);
HoodieTable<?> table = HoodieTable.create(metaClient, getConfig(), jsc);
String compactionInstantTime = HoodieActiveTimeline.createNewInstantTime();
table.scheduleCompaction(jsc, compactionInstantTime, Option.empty());
table.compact(jsc, compactionInstantTime);
assertThrows(HoodieNotSupportedException.class, () -> {
table.scheduleCompaction(jsc, compactionInstantTime, Option.empty());
table.compact(jsc, compactionInstantTime);
});
}
@Test
@@ -123,7 +126,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
String compactionInstantTime = HoodieActiveTimeline.createNewInstantTime();
Option<HoodieCompactionPlan> plan = table.scheduleCompaction(jsc, compactionInstantTime, Option.empty());
assertFalse("If there is nothing to compact, result will be empty", plan.isPresent());
assertFalse(plan.isPresent(), "If there is nothing to compact, result will be empty");
}
}
@@ -159,7 +162,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
List<FileSlice> groupedLogFiles =
table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
for (FileSlice fileSlice : groupedLogFiles) {
assertEquals("There should be 1 log file written for every data file", 1, fileSlice.getLogFiles().count());
assertEquals(1, fileSlice.getLogFiles().count(), "There should be 1 log file written for every data file");
}
}
HoodieTestUtils.createDeltaCommitFiles(basePath, newCommitTime);