[HUDI-810] Migrate ClientTestHarness to JUnit 5 (#1553)
This commit is contained in:
@@ -35,16 +35,16 @@ import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Test Cases for rollback of snapshots and commits.
|
||||
@@ -105,12 +105,12 @@ public class TestClientRollback extends TestHoodieClientBase {
|
||||
List<HoodieBaseFile> dataFiles = partitionPaths.stream().flatMap(s -> {
|
||||
return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"));
|
||||
}).collect(Collectors.toList());
|
||||
assertEquals("The data files for commit 003 should be present", 3, dataFiles.size());
|
||||
assertEquals(3, dataFiles.size(), "The data files for commit 003 should be present");
|
||||
|
||||
dataFiles = partitionPaths.stream().flatMap(s -> {
|
||||
return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"));
|
||||
}).collect(Collectors.toList());
|
||||
assertEquals("The data files for commit 002 should be present", 3, dataFiles.size());
|
||||
assertEquals(3, dataFiles.size(), "The data files for commit 002 should be present");
|
||||
|
||||
/**
|
||||
* Write 4 (updates)
|
||||
@@ -128,15 +128,12 @@ public class TestClientRollback extends TestHoodieClientBase {
|
||||
final BaseFileOnlyView view2 = table.getBaseFileOnlyView();
|
||||
|
||||
dataFiles = partitionPaths.stream().flatMap(s -> view2.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
|
||||
assertEquals("The data files for commit 004 should be present", 3, dataFiles.size());
|
||||
assertEquals(3, dataFiles.size(), "The data files for commit 004 should be present");
|
||||
|
||||
// rolling back to a non existent savepoint must not succeed
|
||||
try {
|
||||
assertThrows(HoodieRollbackException.class, () -> {
|
||||
client.restoreToSavepoint("001");
|
||||
fail("Rolling back to non-existent savepoint should not be allowed");
|
||||
} catch (HoodieRollbackException e) {
|
||||
// this is good
|
||||
}
|
||||
}, "Rolling back to non-existent savepoint should not be allowed");
|
||||
|
||||
// rollback to savepoint 002
|
||||
HoodieInstant savepoint = table.getCompletedSavepointTimeline().getInstants().findFirst().get();
|
||||
@@ -146,13 +143,13 @@ public class TestClientRollback extends TestHoodieClientBase {
|
||||
table = HoodieTable.create(metaClient, getConfig(), jsc);
|
||||
final BaseFileOnlyView view3 = table.getBaseFileOnlyView();
|
||||
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"))).collect(Collectors.toList());
|
||||
assertEquals("The data files for commit 002 be available", 3, dataFiles.size());
|
||||
assertEquals(3, dataFiles.size(), "The data files for commit 002 be available");
|
||||
|
||||
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"))).collect(Collectors.toList());
|
||||
assertEquals("The data files for commit 003 should be rolled back", 0, dataFiles.size());
|
||||
assertEquals(0, dataFiles.size(), "The data files for commit 003 should be rolled back");
|
||||
|
||||
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
|
||||
assertEquals("The data files for commit 004 should be rolled back", 0, dataFiles.size());
|
||||
assertEquals(0, dataFiles.size(), "The data files for commit 004 should be rolled back");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -195,12 +192,9 @@ public class TestClientRollback extends TestHoodieClientBase {
|
||||
try (HoodieWriteClient client = getHoodieWriteClient(config, false);) {
|
||||
|
||||
// Rollback commit 1 (this should fail, since commit2 is still around)
|
||||
try {
|
||||
assertThrows(HoodieRollbackException.class, () -> {
|
||||
client.rollback(commitTime1);
|
||||
fail("Should have thrown an exception ");
|
||||
} catch (HoodieRollbackException hrbe) {
|
||||
// should get here
|
||||
}
|
||||
}, "Should have thrown an exception ");
|
||||
|
||||
// Rollback commit3
|
||||
client.rollback(commitTime3);
|
||||
|
||||
@@ -36,10 +36,9 @@ import org.apache.hudi.table.action.compact.OperationResult;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
@@ -52,6 +51,9 @@ import java.util.stream.Stream;
|
||||
import static org.apache.hudi.client.CompactionAdminClient.getRenamingActionsToAlignWithCompactionOperation;
|
||||
import static org.apache.hudi.client.CompactionAdminClient.renameLogFile;
|
||||
import static org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
|
||||
@@ -60,7 +62,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
private HoodieTableMetaClient metaClient;
|
||||
private CompactionAdminClient client;
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initPath();
|
||||
initSparkContexts();
|
||||
@@ -68,7 +70,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
client = new CompactionAdminClient(jsc, basePath);
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() {
|
||||
client.close();
|
||||
metaClient = null;
|
||||
@@ -137,7 +139,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true);
|
||||
List<ValidationOpResult> result = client.validateCompactionPlan(metaClient, compactionInstant, 1);
|
||||
if (expNumRepairs > 0) {
|
||||
Assert.assertTrue("Expect some failures in validation", result.stream().anyMatch(r -> !r.isSuccess()));
|
||||
assertTrue(result.stream().anyMatch(r -> !r.isSuccess()), "Expect some failures in validation");
|
||||
}
|
||||
// Now repair
|
||||
List<Pair<HoodieLogFile, HoodieLogFile>> undoFiles =
|
||||
@@ -155,18 +157,18 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
Map<String, String> expRenameFiles = renameFiles.stream()
|
||||
.collect(Collectors.toMap(p -> p.getLeft().getPath().toString(), x -> x.getRight().getPath().toString()));
|
||||
if (expNumRepairs > 0) {
|
||||
Assert.assertFalse("Rename Files must be non-empty", renameFiles.isEmpty());
|
||||
assertFalse(renameFiles.isEmpty(), "Rename Files must be non-empty");
|
||||
} else {
|
||||
Assert.assertTrue("Rename Files must be empty", renameFiles.isEmpty());
|
||||
assertTrue(renameFiles.isEmpty(), "Rename Files must be empty");
|
||||
}
|
||||
expRenameFiles.forEach((key, value) -> LOG.info("Key :" + key + " renamed to " + value + " rolled back to "
|
||||
+ renameFilesFromUndo.get(key)));
|
||||
|
||||
Assert.assertEquals("Undo must completely rollback renames", expRenameFiles, renameFilesFromUndo);
|
||||
assertEquals(expRenameFiles, renameFilesFromUndo, "Undo must completely rollback renames");
|
||||
// Now expect validation to succeed
|
||||
result = client.validateCompactionPlan(metaClient, compactionInstant, 1);
|
||||
Assert.assertTrue("Expect no failures in validation", result.stream().allMatch(OperationResult::isSuccess));
|
||||
Assert.assertEquals("Expected Num Repairs", expNumRepairs, undoFiles.size());
|
||||
assertTrue(result.stream().allMatch(OperationResult::isSuccess), "Expect no failures in validation");
|
||||
assertEquals(expNumRepairs, undoFiles.size(), "Expected Num Repairs");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -178,8 +180,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true);
|
||||
// Ensure compaction-plan is good to begin with
|
||||
List<ValidationOpResult> validationResults = client.validateCompactionPlan(metaClient, compactionInstant, 1);
|
||||
Assert.assertFalse("Some validations failed",
|
||||
validationResults.stream().anyMatch(v -> !v.isSuccess()));
|
||||
assertFalse(validationResults.stream().anyMatch(v -> !v.isSuccess()),
|
||||
"Some validations failed");
|
||||
}
|
||||
|
||||
private void validateRenameFiles(List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles, String ingestionInstant,
|
||||
@@ -189,8 +191,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
Set<HoodieLogFile> uniqOldLogFiles = new HashSet<>();
|
||||
|
||||
renameFiles.forEach(lfPair -> {
|
||||
Assert.assertFalse("Old Log File Names do not collide", uniqOldLogFiles.contains(lfPair.getKey()));
|
||||
Assert.assertFalse("New Log File Names do not collide", uniqNewLogFiles.contains(lfPair.getValue()));
|
||||
assertFalse(uniqOldLogFiles.contains(lfPair.getKey()), "Old Log File Names do not collide");
|
||||
assertFalse(uniqNewLogFiles.contains(lfPair.getValue()), "New Log File Names do not collide");
|
||||
uniqOldLogFiles.add(lfPair.getKey());
|
||||
uniqNewLogFiles.add(lfPair.getValue());
|
||||
});
|
||||
@@ -198,17 +200,17 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
renameFiles.forEach(lfPair -> {
|
||||
HoodieLogFile oldLogFile = lfPair.getLeft();
|
||||
HoodieLogFile newLogFile = lfPair.getValue();
|
||||
Assert.assertEquals("Base Commit time is expected", ingestionInstant, newLogFile.getBaseCommitTime());
|
||||
Assert.assertEquals("Base Commit time is expected", compactionInstant, oldLogFile.getBaseCommitTime());
|
||||
Assert.assertEquals("File Id is expected", oldLogFile.getFileId(), newLogFile.getFileId());
|
||||
assertEquals(ingestionInstant, newLogFile.getBaseCommitTime(), "Base Commit time is expected");
|
||||
assertEquals(compactionInstant, oldLogFile.getBaseCommitTime(), "Base Commit time is expected");
|
||||
assertEquals(oldLogFile.getFileId(), newLogFile.getFileId(), "File Id is expected");
|
||||
HoodieLogFile lastLogFileBeforeCompaction =
|
||||
fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], ingestionInstant)
|
||||
.filter(fs -> fs.getFileId().equals(oldLogFile.getFileId())).map(fs -> fs.getLogFiles().findFirst().get())
|
||||
.findFirst().get();
|
||||
Assert.assertEquals("Log Version expected",
|
||||
lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(), newLogFile.getLogVersion());
|
||||
Assert.assertTrue("Log version does not collide",
|
||||
newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion());
|
||||
assertEquals(lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(),
|
||||
newLogFile.getLogVersion(), "Log Version expected");
|
||||
assertTrue(newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion(),
|
||||
"Log version does not collide");
|
||||
});
|
||||
}
|
||||
|
||||
@@ -243,8 +245,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0])
|
||||
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).flatMap(FileSlice::getLogFiles)
|
||||
.collect(Collectors.toSet());
|
||||
Assert.assertEquals("Log files belonging to file-slices created because of compaction request must be renamed",
|
||||
expLogFilesToBeRenamed, gotLogFilesToBeRenamed);
|
||||
assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed,
|
||||
"Log files belonging to file-slices created because of compaction request must be renamed");
|
||||
|
||||
if (skipUnSchedule) {
|
||||
// Do the renaming only but do not touch the compaction plan - Needed for repair tests
|
||||
@@ -274,9 +276,10 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
|
||||
// Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
|
||||
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true)
|
||||
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).forEach(fs -> {
|
||||
Assert.assertFalse("No Data file must be present", fs.getBaseFile().isPresent());
|
||||
Assert.assertEquals("No Log Files", 0, fs.getLogFiles().count());
|
||||
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant))
|
||||
.forEach(fs -> {
|
||||
assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
|
||||
assertEquals(0, fs.getLogFiles().count(), "No Log Files");
|
||||
});
|
||||
|
||||
// Ensure same number of log-files before and after renaming per fileId
|
||||
@@ -286,10 +289,10 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
.map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count()))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
|
||||
Assert.assertEquals("Each File Id has same number of log-files", fileIdToCountsBeforeRenaming,
|
||||
fileIdToCountsAfterRenaming);
|
||||
Assert.assertEquals("Not Empty", numEntriesPerInstant, fileIdToCountsAfterRenaming.size());
|
||||
Assert.assertEquals("Expected number of renames", expNumRenames, renameFiles.size());
|
||||
assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming,
|
||||
"Each File Id has same number of log-files");
|
||||
assertEquals(numEntriesPerInstant, fileIdToCountsAfterRenaming.size(), "Not Empty");
|
||||
assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
|
||||
return renameFiles;
|
||||
}
|
||||
|
||||
@@ -315,8 +318,8 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant))
|
||||
.filter(fs -> fs.getFileId().equals(op.getFileId())).flatMap(FileSlice::getLogFiles)
|
||||
.collect(Collectors.toSet());
|
||||
Assert.assertEquals("Log files belonging to file-slices created because of compaction request must be renamed",
|
||||
expLogFilesToBeRenamed, gotLogFilesToBeRenamed);
|
||||
assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed,
|
||||
"Log files belonging to file-slices created because of compaction request must be renamed");
|
||||
validateRenameFiles(renameFiles, ingestionInstant, compactionInstant, fsView);
|
||||
|
||||
Map<String, Long> fileIdToCountsBeforeRenaming =
|
||||
@@ -335,9 +338,10 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
// Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
|
||||
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true)
|
||||
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant))
|
||||
.filter(fs -> fs.getFileId().equals(op.getFileId())).forEach(fs -> {
|
||||
Assert.assertFalse("No Data file must be present", fs.getBaseFile().isPresent());
|
||||
Assert.assertEquals("No Log Files", 0, fs.getLogFiles().count());
|
||||
.filter(fs -> fs.getFileId().equals(op.getFileId()))
|
||||
.forEach(fs -> {
|
||||
assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
|
||||
assertEquals(0, fs.getLogFiles().count(), "No Log Files");
|
||||
});
|
||||
|
||||
// Ensure same number of log-files before and after renaming per fileId
|
||||
@@ -348,9 +352,9 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
.map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count()))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
|
||||
Assert.assertEquals("Each File Id has same number of log-files", fileIdToCountsBeforeRenaming,
|
||||
fileIdToCountsAfterRenaming);
|
||||
Assert.assertEquals("Not Empty", 1, fileIdToCountsAfterRenaming.size());
|
||||
Assert.assertEquals("Expected number of renames", expNumRenames, renameFiles.size());
|
||||
assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming,
|
||||
"Each File Id has same number of log-files");
|
||||
assertEquals(1, fileIdToCountsAfterRenaming.size(), "Not Empty");
|
||||
assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,9 +51,8 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.sql.SQLContext;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
@@ -64,9 +63,9 @@ import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Base Class providing setup/cleanup and utility methods for testing Hoodie Client facing tests.
|
||||
@@ -75,12 +74,12 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(TestHoodieClientBase.class);
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initResources();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
cleanupResources();
|
||||
}
|
||||
@@ -170,7 +169,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
|
||||
public static void assertNoWriteErrors(List<WriteStatus> statuses) {
|
||||
// Verify there are no errors
|
||||
for (WriteStatus status : statuses) {
|
||||
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
|
||||
assertFalse(status.hasErrors(), "Errors found in write of " + status.getFileId());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -200,7 +199,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
|
||||
assertTrue(HoodiePartitionMetadata.hasPartitionMetadata(fs, new Path(basePath, partitionPath)));
|
||||
HoodiePartitionMetadata pmeta = new HoodiePartitionMetadata(fs, new Path(basePath, partitionPath));
|
||||
pmeta.readFromFS();
|
||||
Assert.assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_DEPTH, pmeta.getPartitionDepth());
|
||||
assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_DEPTH, pmeta.getPartitionDepth());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -212,9 +211,9 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
|
||||
*/
|
||||
protected void checkTaggedRecords(List<HoodieRecord> taggedRecords, String instantTime) {
|
||||
for (HoodieRecord rec : taggedRecords) {
|
||||
assertTrue("Record " + rec + " found with no location.", rec.isCurrentLocationKnown());
|
||||
assertEquals("All records should have commit time " + instantTime + ", since updates were made",
|
||||
rec.getCurrentLocation().getInstantTime(), instantTime);
|
||||
assertTrue(rec.isCurrentLocationKnown(), "Record " + rec + " found with no location.");
|
||||
assertEquals(rec.getCurrentLocation().getInstantTime(), instantTime,
|
||||
"All records should have commit time " + instantTime + ", since updates were made");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,7 +230,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
|
||||
if (!partitionToKeys.containsKey(partitionPath)) {
|
||||
partitionToKeys.put(partitionPath, new HashSet<>());
|
||||
}
|
||||
assertFalse("key " + key + " is duplicate within partition " + partitionPath, partitionToKeys.get(partitionPath).contains(key));
|
||||
assertFalse(partitionToKeys.get(partitionPath).contains(key), "key " + key + " is duplicate within partition " + partitionPath);
|
||||
partitionToKeys.get(partitionPath).add(key);
|
||||
}
|
||||
}
|
||||
@@ -472,30 +471,30 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
|
||||
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
|
||||
|
||||
if (assertForCommit) {
|
||||
assertEquals("Expecting " + expTotalCommits + " commits.", expTotalCommits,
|
||||
timeline.findInstantsAfter(initCommitTime, Integer.MAX_VALUE).countInstants());
|
||||
Assert.assertEquals("Latest commit should be " + newCommitTime, newCommitTime,
|
||||
timeline.lastInstant().get().getTimestamp());
|
||||
assertEquals("Must contain " + expRecordsInThisCommit + " records", expRecordsInThisCommit,
|
||||
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
|
||||
assertEquals(expTotalCommits, timeline.findInstantsAfter(initCommitTime, Integer.MAX_VALUE).countInstants(),
|
||||
"Expecting " + expTotalCommits + " commits.");
|
||||
assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(),
|
||||
"Latest commit should be " + newCommitTime);
|
||||
assertEquals(expRecordsInThisCommit, HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
|
||||
"Must contain " + expRecordsInThisCommit + " records");
|
||||
|
||||
// Check the entire dataset has all records still
|
||||
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
|
||||
for (int i = 0; i < fullPartitionPaths.length; i++) {
|
||||
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
|
||||
}
|
||||
assertEquals("Must contain " + expTotalRecords + " records", expTotalRecords,
|
||||
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
|
||||
assertEquals(expTotalRecords, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
|
||||
"Must contain " + expTotalRecords + " records");
|
||||
|
||||
// Check that the incremental consumption from prevCommitTime
|
||||
assertEquals("Incremental consumption from " + prevCommitTime + " should give all records in latest commit",
|
||||
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
|
||||
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count());
|
||||
assertEquals(HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
|
||||
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count(),
|
||||
"Incremental consumption from " + prevCommitTime + " should give all records in latest commit");
|
||||
if (commitTimesBetweenPrevAndNew.isPresent()) {
|
||||
commitTimesBetweenPrevAndNew.get().forEach(ct -> {
|
||||
assertEquals("Incremental consumption from " + ct + " should give all records in latest commit",
|
||||
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
|
||||
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, ct).count());
|
||||
assertEquals(HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
|
||||
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, ct).count(),
|
||||
"Incremental consumption from " + ct + " should give all records in latest commit");
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -540,26 +539,26 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
|
||||
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
|
||||
|
||||
if (assertForCommit) {
|
||||
assertEquals("Expecting 3 commits.", 3,
|
||||
timeline.findInstantsAfter(initCommitTime, Integer.MAX_VALUE).countInstants());
|
||||
Assert.assertEquals("Latest commit should be " + newCommitTime, newCommitTime,
|
||||
timeline.lastInstant().get().getTimestamp());
|
||||
assertEquals("Must contain " + expRecordsInThisCommit + " records", expRecordsInThisCommit,
|
||||
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
|
||||
assertEquals(3, timeline.findInstantsAfter(initCommitTime, Integer.MAX_VALUE).countInstants(),
|
||||
"Expecting 3 commits.");
|
||||
assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(),
|
||||
"Latest commit should be " + newCommitTime);
|
||||
assertEquals(expRecordsInThisCommit, HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
|
||||
"Must contain " + expRecordsInThisCommit + " records");
|
||||
|
||||
// Check the entire dataset has all records still
|
||||
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
|
||||
for (int i = 0; i < fullPartitionPaths.length; i++) {
|
||||
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
|
||||
}
|
||||
assertEquals("Must contain " + expTotalRecords + " records", expTotalRecords,
|
||||
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
|
||||
assertEquals(expTotalRecords, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
|
||||
"Must contain " + expTotalRecords + " records");
|
||||
|
||||
// Check that the incremental consumption from prevCommitTime
|
||||
assertEquals("Incremental consumption from " + prevCommitTime + " should give no records in latest commit,"
|
||||
+ " since it is a delete operation",
|
||||
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
|
||||
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count());
|
||||
assertEquals(HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
|
||||
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, prevCommitTime).count(),
|
||||
"Incremental consumption from " + prevCommitTime + " should give no records in latest commit,"
|
||||
+ " since it is a delete operation");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
|
||||
package org.apache.hudi.client;
|
||||
|
||||
import java.util.HashSet;
|
||||
import org.apache.hudi.common.HoodieClientTestUtils;
|
||||
import org.apache.hudi.common.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.common.TestRawTripPayload;
|
||||
@@ -49,15 +48,14 @@ import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.index.HoodieIndex.IndexType;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.commit.WriteHelper;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.table.action.commit.WriteHelper;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
@@ -65,6 +63,7 @@ import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@@ -75,10 +74,10 @@ import static org.apache.hudi.common.HoodieTestDataGenerator.NULL_SCHEMA;
|
||||
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
|
||||
import static org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.VERSION_0;
|
||||
import static org.apache.hudi.common.util.ParquetUtils.readRowKeysFromParquet;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@@ -154,11 +153,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
JavaRDD<WriteStatus> result = insertFirstBatch(cfg, client, newCommitTime, prevCommitTime, numRecords, writeFn,
|
||||
isPrepped, false, numRecords);
|
||||
|
||||
assertFalse("If Autocommit is false, then commit should not be made automatically",
|
||||
HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
|
||||
assertTrue("Commit should succeed", client.commit(newCommitTime, result));
|
||||
assertTrue("After explicit commit, commit file should be created",
|
||||
HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
|
||||
assertFalse(HoodieTestUtils.doesCommitExist(basePath, newCommitTime),
|
||||
"If Autocommit is false, then commit should not be made automatically");
|
||||
assertTrue(client.commit(newCommitTime, result), "Commit should succeed");
|
||||
assertTrue(HoodieTestUtils.doesCommitExist(basePath, newCommitTime),
|
||||
"After explicit commit, commit file should be created");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -251,7 +250,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
if (!partitionToKeys.containsKey(partitionPath)) {
|
||||
partitionToKeys.put(partitionPath, new HashSet<>());
|
||||
}
|
||||
assertFalse("key " + key + " is duplicate within partition " + partitionPath, partitionToKeys.get(partitionPath).contains(key));
|
||||
assertFalse(partitionToKeys.get(partitionPath).contains(key), "key " + key + " is duplicate within partition " + partitionPath);
|
||||
partitionToKeys.get(partitionPath).add(key);
|
||||
}
|
||||
}
|
||||
@@ -326,8 +325,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
for (int i = 0; i < fullPartitionPaths.length; i++) {
|
||||
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
|
||||
}
|
||||
assertEquals("Must contain " + 200 + " records", 200,
|
||||
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
|
||||
assertEquals(200, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
|
||||
"Must contain " + 200 + " records");
|
||||
|
||||
// Perform Delete again on upgraded dataset.
|
||||
prevCommitTime = newCommitTime;
|
||||
@@ -340,17 +339,17 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
|
||||
HoodieActiveTimeline activeTimeline = new HoodieActiveTimeline(metaClient, false);
|
||||
List<HoodieInstant> instants = activeTimeline.getCommitTimeline().getInstants().collect(Collectors.toList());
|
||||
Assert.assertEquals(5, instants.size());
|
||||
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "001"),
|
||||
assertEquals(5, instants.size());
|
||||
assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "001"),
|
||||
instants.get(0));
|
||||
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "004"),
|
||||
assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "004"),
|
||||
instants.get(1));
|
||||
// New Format should have all states of instants
|
||||
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "006"),
|
||||
assertEquals(new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "006"),
|
||||
instants.get(2));
|
||||
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "006"),
|
||||
assertEquals(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "006"),
|
||||
instants.get(3));
|
||||
Assert.assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "006"),
|
||||
assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "006"),
|
||||
instants.get(4));
|
||||
}
|
||||
|
||||
@@ -425,8 +424,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
for (int i = 0; i < fullPartitionPaths.length; i++) {
|
||||
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
|
||||
}
|
||||
assertEquals("Must contain 100 records", 100,
|
||||
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
|
||||
assertEquals(100, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
|
||||
"Must contain 100 records");
|
||||
|
||||
/**
|
||||
* Write 2. Updates with different partition
|
||||
@@ -448,8 +447,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
for (int i = 0; i < fullPartitionPaths.length; i++) {
|
||||
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
|
||||
}
|
||||
assertEquals("Must contain 100 records", 100,
|
||||
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
|
||||
assertEquals(100, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
|
||||
"Must contain 100 records");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -476,12 +475,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
|
||||
assertNoWriteErrors(statuses);
|
||||
|
||||
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
|
||||
assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
|
||||
String file1 = statuses.get(0).getFileId();
|
||||
Assert.assertEquals("file should contain 100 records",
|
||||
assertEquals(100,
|
||||
readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath()))
|
||||
.size(),
|
||||
100);
|
||||
.size(), "file should contain 100 records");
|
||||
|
||||
// Update + Inserts such that they just expand file1
|
||||
String commitTime2 = "002";
|
||||
@@ -496,18 +494,18 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
statuses = client.upsert(insertAndUpdatesRDD2, commitTime2).collect();
|
||||
assertNoWriteErrors(statuses);
|
||||
|
||||
assertEquals("Just 1 file needs to be updated.", 1, statuses.size());
|
||||
assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
|
||||
assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit());
|
||||
assertEquals(1, statuses.size(), "Just 1 file needs to be updated.");
|
||||
assertEquals(file1, statuses.get(0).getFileId(), "Existing file should be expanded");
|
||||
assertEquals(commitTime1, statuses.get(0).getStat().getPrevCommit(), "Existing file should be expanded");
|
||||
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
|
||||
assertEquals("file should contain 140 records", readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
|
||||
140);
|
||||
assertEquals(140, readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
|
||||
"file should contain 140 records");
|
||||
|
||||
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
|
||||
for (GenericRecord record : records) {
|
||||
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||
assertEquals("only expect commit2", commitTime2, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
|
||||
assertTrue("key expected to be part of commit2", keys2.contains(recordKey) || keys1.contains(recordKey));
|
||||
assertEquals(commitTime2, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString(), "only expect commit2");
|
||||
assertTrue(keys2.contains(recordKey) || keys1.contains(recordKey), "key expected to be part of commit2");
|
||||
}
|
||||
|
||||
// update + inserts such that file1 is updated and expanded, a new file2 is created.
|
||||
@@ -522,7 +520,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
statuses = client.upsert(insertAndUpdatesRDD3, commitTime3).collect();
|
||||
assertNoWriteErrors(statuses);
|
||||
|
||||
assertEquals("2 files needs to be committed.", 2, statuses.size());
|
||||
assertEquals(2, statuses.size(), "2 files needs to be committed.");
|
||||
HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||
|
||||
HoodieTable table = getHoodieTable(metadata, config);
|
||||
@@ -533,7 +531,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
int numTotalUpdatesInCommit3 = 0;
|
||||
for (HoodieBaseFile file : files) {
|
||||
if (file.getFileName().contains(file1)) {
|
||||
assertEquals("Existing file should be expanded", commitTime3, file.getCommitTime());
|
||||
assertEquals(commitTime3, file.getCommitTime(), "Existing file should be expanded");
|
||||
records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath()));
|
||||
for (GenericRecord record : records) {
|
||||
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||
@@ -547,21 +545,21 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
}
|
||||
}
|
||||
}
|
||||
assertEquals("All keys added in commit 2 must be updated in commit3 correctly", 0, keys2.size());
|
||||
assertEquals(0, keys2.size(), "All keys added in commit 2 must be updated in commit3 correctly");
|
||||
} else {
|
||||
assertEquals("New file must be written for commit 3", commitTime3, file.getCommitTime());
|
||||
assertEquals(commitTime3, file.getCommitTime(), "New file must be written for commit 3");
|
||||
records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath()));
|
||||
for (GenericRecord record : records) {
|
||||
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||
assertEquals("only expect commit3", commitTime3,
|
||||
record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
|
||||
assertTrue("key expected to be part of commit3", keys3.contains(recordKey));
|
||||
assertEquals(commitTime3, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString(),
|
||||
"only expect commit3");
|
||||
assertTrue(keys3.contains(recordKey), "key expected to be part of commit3");
|
||||
}
|
||||
numTotalInsertsInCommit3 += records.size();
|
||||
}
|
||||
}
|
||||
assertEquals("Total updates in commit3 must add up", inserts2.size(), numTotalUpdatesInCommit3);
|
||||
assertEquals("Total inserts in commit3 must add up", keys3.size(), numTotalInsertsInCommit3);
|
||||
assertEquals(numTotalUpdatesInCommit3, inserts2.size(), "Total updates in commit3 must add up");
|
||||
assertEquals(numTotalInsertsInCommit3, keys3.size(), "Total inserts in commit3 must add up");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -588,12 +586,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
assertNoWriteErrors(statuses);
|
||||
assertPartitionMetadata(new String[] {testPartitionPath}, fs);
|
||||
|
||||
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
|
||||
assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
|
||||
String file1 = statuses.get(0).getFileId();
|
||||
assertEquals("file should contain 100 records",
|
||||
assertEquals(100,
|
||||
readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath()))
|
||||
.size(),
|
||||
100);
|
||||
.size(), "file should contain 100 records");
|
||||
|
||||
// Second, set of Inserts should just expand file1
|
||||
String commitTime2 = "002";
|
||||
@@ -604,21 +601,21 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
statuses = client.insert(insertRecordsRDD2, commitTime2).collect();
|
||||
assertNoWriteErrors(statuses);
|
||||
|
||||
assertEquals("Just 1 file needs to be updated.", 1, statuses.size());
|
||||
assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
|
||||
assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit());
|
||||
assertEquals(1, statuses.size(), "Just 1 file needs to be updated.");
|
||||
assertEquals(file1, statuses.get(0).getFileId(), "Existing file should be expanded");
|
||||
assertEquals(commitTime1, statuses.get(0).getStat().getPrevCommit(), "Existing file should be expanded");
|
||||
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
|
||||
assertEquals("file should contain 140 records", readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
|
||||
140);
|
||||
assertEquals(140, readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
|
||||
"file should contain 140 records");
|
||||
|
||||
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
|
||||
for (GenericRecord record : records) {
|
||||
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||
String recCommitTime = record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
|
||||
assertTrue("Record expected to be part of commit 1 or commit2",
|
||||
commitTime1.equals(recCommitTime) || commitTime2.equals(recCommitTime));
|
||||
assertTrue("key expected to be part of commit 1 or commit2",
|
||||
keys2.contains(recordKey) || keys1.contains(recordKey));
|
||||
assertTrue(commitTime1.equals(recCommitTime) || commitTime2.equals(recCommitTime),
|
||||
"Record expected to be part of commit 1 or commit2");
|
||||
assertTrue(keys2.contains(recordKey) || keys1.contains(recordKey),
|
||||
"key expected to be part of commit 1 or commit2");
|
||||
}
|
||||
|
||||
// Lots of inserts such that file1 is updated and expanded, a new file2 is created.
|
||||
@@ -628,22 +625,22 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
JavaRDD<HoodieRecord> insertRecordsRDD3 = jsc.parallelize(insert3, 1);
|
||||
statuses = client.insert(insertRecordsRDD3, commitTime3).collect();
|
||||
assertNoWriteErrors(statuses);
|
||||
assertEquals("2 files needs to be committed.", 2, statuses.size());
|
||||
assertEquals(2, statuses.size(), "2 files needs to be committed.");
|
||||
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||
HoodieTable table = getHoodieTable(metaClient, config);
|
||||
List<HoodieBaseFile> files = table.getBaseFileOnlyView()
|
||||
.getLatestBaseFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList());
|
||||
assertEquals("Total of 2 valid data files", 2, files.size());
|
||||
assertEquals(2, files.size(), "Total of 2 valid data files");
|
||||
|
||||
int totalInserts = 0;
|
||||
for (HoodieBaseFile file : files) {
|
||||
assertEquals("All files must be at commit 3", commitTime3, file.getCommitTime());
|
||||
assertEquals(commitTime3, file.getCommitTime(), "All files must be at commit 3");
|
||||
records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath()));
|
||||
totalInserts += records.size();
|
||||
}
|
||||
assertEquals("Total number of records must add up", totalInserts,
|
||||
inserts1.size() + inserts2.size() + insert3.size());
|
||||
assertEquals(totalInserts, inserts1.size() + inserts2.size() + insert3.size(),
|
||||
"Total number of records must add up");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -670,12 +667,11 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
|
||||
assertNoWriteErrors(statuses);
|
||||
|
||||
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
|
||||
assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
|
||||
String file1 = statuses.get(0).getFileId();
|
||||
Assert.assertEquals("file should contain 100 records",
|
||||
assertEquals(100,
|
||||
readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, statuses.get(0).getStat().getPath()))
|
||||
.size(),
|
||||
100);
|
||||
.size(), "file should contain 100 records");
|
||||
|
||||
// Delete 20 among 100 inserted
|
||||
testDeletes(client, inserts1, 20, file1, "002", 80, keysSoFar);
|
||||
@@ -701,15 +697,16 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
JavaRDD<HoodieKey> deleteKeys3 = jsc.parallelize(hoodieKeysToDelete3, 1);
|
||||
statuses = client.delete(deleteKeys3, commitTime6).collect();
|
||||
assertNoWriteErrors(statuses);
|
||||
assertEquals("Just 0 write status for delete.", 0, statuses.size());
|
||||
assertEquals(0, statuses.size(), "Just 0 write status for delete.");
|
||||
|
||||
// Check the entire dataset has all records still
|
||||
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
|
||||
for (int i = 0; i < fullPartitionPaths.length; i++) {
|
||||
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
|
||||
}
|
||||
assertEquals("Must contain " + 150 + " records", 150,
|
||||
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
|
||||
assertEquals(150,
|
||||
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
|
||||
"Must contain " + 150 + " records");
|
||||
|
||||
// delete another batch. previous delete commit should have persisted the schema. If not,
|
||||
// this will throw exception
|
||||
@@ -735,8 +732,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
for (int i = 0; i < fullPartitionPaths.length; i++) {
|
||||
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
|
||||
}
|
||||
assertEquals("Must contain " + expectedTotalRecords + " records", expectedTotalRecords,
|
||||
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
|
||||
assertEquals(expectedTotalRecords,
|
||||
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
|
||||
"Must contain " + expectedTotalRecords + " records");
|
||||
return Pair.of(keys, inserts);
|
||||
}
|
||||
|
||||
@@ -751,26 +749,28 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
|
||||
assertNoWriteErrors(statuses);
|
||||
|
||||
assertEquals("Just 1 file needs to be added.", 1, statuses.size());
|
||||
assertEquals("Existing file should be expanded", existingFile, statuses.get(0).getFileId());
|
||||
assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
|
||||
assertEquals(existingFile, statuses.get(0).getFileId(), "Existing file should be expanded");
|
||||
|
||||
// Check the entire dataset has all records still
|
||||
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
|
||||
for (int i = 0; i < fullPartitionPaths.length; i++) {
|
||||
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
|
||||
}
|
||||
assertEquals("Must contain " + exepctedRecords + " records", exepctedRecords,
|
||||
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count());
|
||||
assertEquals(exepctedRecords,
|
||||
HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(),
|
||||
"Must contain " + exepctedRecords + " records");
|
||||
|
||||
Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
|
||||
assertEquals("file should contain 110 records", readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
|
||||
exepctedRecords);
|
||||
assertEquals(exepctedRecords,
|
||||
readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(),
|
||||
"file should contain 110 records");
|
||||
|
||||
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
|
||||
for (GenericRecord record : records) {
|
||||
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||
assertTrue("key expected to be part of " + instantTime, keys.contains(recordKey));
|
||||
assertFalse("Key deleted", hoodieKeysToDelete.contains(recordKey));
|
||||
assertTrue(keys.contains(recordKey), "key expected to be part of " + instantTime);
|
||||
assertFalse(hoodieKeysToDelete.contains(recordKey), "Key deleted");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -795,12 +795,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
List<HoodieKey> hoodieKeysToDelete = HoodieClientTestUtils
|
||||
.getKeysToDelete(HoodieClientTestUtils.getHoodieKeys(dummyInserts), 20);
|
||||
JavaRDD<HoodieKey> deleteKeys = jsc.parallelize(hoodieKeysToDelete, 1);
|
||||
try {
|
||||
assertThrows(HoodieIOException.class, () -> {
|
||||
client.delete(deleteKeys, commitTime1).collect();
|
||||
fail("Should have thrown Exception");
|
||||
} catch (HoodieIOException e) {
|
||||
// ignore
|
||||
}
|
||||
}, "Should have thrown Exception");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -822,9 +819,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
|
||||
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, instantTime);
|
||||
|
||||
assertTrue("Commit should succeed", client.commit(instantTime, result));
|
||||
assertTrue("After explicit commit, commit file should be created",
|
||||
HoodieTestUtils.doesCommitExist(basePath, instantTime));
|
||||
assertTrue(client.commit(instantTime, result), "Commit should succeed");
|
||||
assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime),
|
||||
"After explicit commit, commit file should be created");
|
||||
|
||||
// Get parquet file paths from commit metadata
|
||||
String actionType = metaClient.getCommitActionType();
|
||||
@@ -868,9 +865,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
|
||||
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, instantTime);
|
||||
|
||||
assertTrue("Commit should succeed", client.commit(instantTime, result));
|
||||
assertTrue("After explicit commit, commit file should be created",
|
||||
HoodieTestUtils.doesCommitExist(basePath, instantTime));
|
||||
assertTrue(client.commit(instantTime, result), "Commit should succeed");
|
||||
assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime),
|
||||
"After explicit commit, commit file should be created");
|
||||
|
||||
// Read from commit file
|
||||
String filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime);
|
||||
@@ -888,7 +885,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
inserts += stat.getValue().getInserts();
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(inserts, 200);
|
||||
assertEquals(200, inserts);
|
||||
|
||||
// Update + Inserts such that they just expand file1
|
||||
instantTime = "001";
|
||||
@@ -898,9 +895,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
writeRecords = jsc.parallelize(records, 1);
|
||||
result = client.upsert(writeRecords, instantTime);
|
||||
|
||||
assertTrue("Commit should succeed", client.commit(instantTime, result));
|
||||
assertTrue("After explicit commit, commit file should be created",
|
||||
HoodieTestUtils.doesCommitExist(basePath, instantTime));
|
||||
assertTrue(client.commit(instantTime, result), "Commit should succeed");
|
||||
assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime),
|
||||
"After explicit commit, commit file should be created");
|
||||
|
||||
// Read from commit file
|
||||
filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime);
|
||||
@@ -919,8 +916,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
upserts += stat.getValue().getUpserts();
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(inserts, 200);
|
||||
Assert.assertEquals(upserts, 200);
|
||||
assertEquals(200, inserts);
|
||||
assertEquals(200, upserts);
|
||||
|
||||
}
|
||||
|
||||
@@ -937,9 +934,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
|
||||
// Delete orphan marker and commit should succeed
|
||||
metaClient.getFs().delete(result.getKey(), false);
|
||||
assertTrue("Commit should succeed", client.commit(instantTime, result.getRight()));
|
||||
assertTrue("After explicit commit, commit file should be created",
|
||||
HoodieTestUtils.doesCommitExist(basePath, instantTime));
|
||||
assertTrue(client.commit(instantTime, result.getRight()), "Commit should succeed");
|
||||
assertTrue(HoodieTestUtils.doesCommitExist(basePath, instantTime),
|
||||
"After explicit commit, commit file should be created");
|
||||
// Marker directory must be removed
|
||||
assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime))));
|
||||
}
|
||||
@@ -954,8 +951,8 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
|
||||
// Rollback of this commit should succeed
|
||||
client.rollback(instantTime);
|
||||
assertFalse("After explicit rollback, commit file should not be present",
|
||||
HoodieTestUtils.doesCommitExist(basePath, instantTime));
|
||||
assertFalse(HoodieTestUtils.doesCommitExist(basePath, instantTime),
|
||||
"After explicit rollback, commit file should not be present");
|
||||
// Marker directory must be removed after rollback
|
||||
assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime))));
|
||||
}
|
||||
@@ -984,12 +981,10 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
metaClient.getFs().create(markerFilePath);
|
||||
LOG.info("Created a dummy marker path=" + markerFilePath);
|
||||
|
||||
try {
|
||||
Exception e = assertThrows(HoodieCommitException.class, () -> {
|
||||
client.commit(instantTime, result);
|
||||
fail("Commit should fail due to consistency check");
|
||||
} catch (HoodieCommitException cme) {
|
||||
assertTrue(cme.getCause() instanceof HoodieIOException);
|
||||
}
|
||||
}, "Commit should fail due to consistency check");
|
||||
assertTrue(e.getCause() instanceof HoodieIOException);
|
||||
return Pair.of(markerFilePath, result);
|
||||
}
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.sql.AnalysisException;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
@@ -36,7 +36,8 @@ import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
/**
|
||||
@@ -79,11 +80,13 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
|
||||
});
|
||||
}
|
||||
|
||||
@Test(expected = IllegalStateException.class)
|
||||
@Test
|
||||
public void testReadROViewFailsWithoutSqlContext() {
|
||||
HoodieReadClient readClient = new HoodieReadClient(jsc, getConfig());
|
||||
JavaRDD<HoodieKey> recordsRDD = jsc.parallelize(new ArrayList<>(), 1);
|
||||
readClient.readROView(recordsRDD, 1);
|
||||
assertThrows(IllegalStateException.class, () -> {
|
||||
readClient.readROView(recordsRDD, 1);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -131,14 +134,11 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
|
||||
assertEquals(75, rows.count());
|
||||
|
||||
JavaRDD<HoodieKey> keysWithoutPaths = keyToPathPair.filter(keyPath -> !keyPath._2.isPresent())
|
||||
.map(keyPath -> keyPath._1);
|
||||
.map(keyPath -> keyPath._1);
|
||||
|
||||
try {
|
||||
assertThrows(AnalysisException.class, () -> {
|
||||
anotherReadClient.readROView(keysWithoutPaths, 1);
|
||||
} catch (Exception e) {
|
||||
// data frame reader throws exception for empty records. ignore the error.
|
||||
assertEquals(e.getClass(), AnalysisException.class);
|
||||
}
|
||||
});
|
||||
|
||||
// Actual tests of getPendingCompactions method are in TestAsyncCompaction
|
||||
// This is just testing empty list
|
||||
|
||||
@@ -39,13 +39,13 @@ import org.apache.log4j.Logger;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class TestMultiFS extends HoodieClientTestHarness {
|
||||
|
||||
@@ -54,14 +54,14 @@ public class TestMultiFS extends HoodieClientTestHarness {
|
||||
protected String tableName = "hoodie_rt";
|
||||
private String tableType = HoodieTableType.COPY_ON_WRITE.name();
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initSparkContexts();
|
||||
initDFS();
|
||||
initTestDataGenerator();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
cleanupSparkContexts();
|
||||
cleanupDFS();
|
||||
@@ -103,7 +103,7 @@ public class TestMultiFS extends HoodieClientTestHarness {
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), dfsBasePath);
|
||||
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
|
||||
Dataset<Row> readRecords = HoodieClientTestUtils.readCommit(dfsBasePath, sqlContext, timeline, readCommitTime);
|
||||
assertEquals("Should contain 100 records", readRecords.count(), records.size());
|
||||
assertEquals(readRecords.count(), records.size(), "Should contain 100 records");
|
||||
|
||||
// Write to local
|
||||
HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), tablePath, HoodieTableType.valueOf(tableType),
|
||||
@@ -122,7 +122,7 @@ public class TestMultiFS extends HoodieClientTestHarness {
|
||||
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
|
||||
Dataset<Row> localReadRecords =
|
||||
HoodieClientTestUtils.readCommit(tablePath, sqlContext, timeline, writeCommitTime);
|
||||
assertEquals("Should contain 100 records", localReadRecords.count(), localRecords.size());
|
||||
assertEquals(localReadRecords.count(), localRecords.size(), "Should contain 100 records");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
|
||||
package org.apache.hudi.client;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.HoodieClientTestUtils;
|
||||
import org.apache.hudi.common.HoodieTestDataGenerator;
|
||||
@@ -36,9 +34,12 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieInsertException;
|
||||
import org.apache.hudi.exception.HoodieUpsertException;
|
||||
import org.apache.hudi.index.HoodieIndex.IndexType;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
@@ -51,10 +52,10 @@ import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA
|
||||
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_SCHEMA_PREFIX;
|
||||
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_SCHEMA_SUFFIX;
|
||||
import static org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.VERSION_1;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.fail;
|
||||
|
||||
public class TestTableSchemaEvolution extends TestHoodieClientBase {
|
||||
private final String initCommitTime = "000";
|
||||
@@ -73,60 +74,60 @@ public class TestTableSchemaEvolution extends TestHoodieClientBase {
|
||||
public static final String TRIP_EXAMPLE_SCHEMA_DEVOLVED = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA + FARE_NESTED_SCHEMA
|
||||
+ TRIP_SCHEMA_SUFFIX;
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initResources();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() {
|
||||
cleanupSparkContexts();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSchemaCompatibilityBasic() throws Exception {
|
||||
assertTrue("Same schema is compatible",
|
||||
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA));
|
||||
assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA),
|
||||
"Same schema is compatible");
|
||||
|
||||
String reorderedSchema = TRIP_SCHEMA_PREFIX + TIP_NESTED_SCHEMA + FARE_NESTED_SCHEMA
|
||||
String reorderedSchema = TRIP_SCHEMA_PREFIX + TIP_NESTED_SCHEMA + FARE_NESTED_SCHEMA
|
||||
+ MAP_TYPE_SCHEMA + TRIP_SCHEMA_SUFFIX;
|
||||
assertTrue("Reordered fields are compatible",
|
||||
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, reorderedSchema));
|
||||
assertTrue("Reordered fields are compatible",
|
||||
TableSchemaResolver.isSchemaCompatible(reorderedSchema, TRIP_EXAMPLE_SCHEMA));
|
||||
assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, reorderedSchema),
|
||||
"Reordered fields are compatible");
|
||||
assertTrue(TableSchemaResolver.isSchemaCompatible(reorderedSchema, TRIP_EXAMPLE_SCHEMA),
|
||||
"Reordered fields are compatible");
|
||||
|
||||
String renamedSchema = TRIP_EXAMPLE_SCHEMA.replace("tip_history", "tip_future");
|
||||
assertFalse("Renamed fields are not compatible",
|
||||
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, renamedSchema));
|
||||
assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, renamedSchema),
|
||||
"Renamed fields are not compatible");
|
||||
|
||||
assertFalse("Deleted single field is not compatible",
|
||||
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA_DEVOLVED));
|
||||
String deletedMultipleFieldSchema = TRIP_SCHEMA_PREFIX + TIP_NESTED_SCHEMA + TRIP_SCHEMA_SUFFIX;
|
||||
assertFalse("Deleted multiple fields are not compatible",
|
||||
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, deletedMultipleFieldSchema));
|
||||
assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA_DEVOLVED),
|
||||
"Deleted single field is not compatible");
|
||||
String deletedMultipleFieldSchema = TRIP_SCHEMA_PREFIX + TIP_NESTED_SCHEMA + TRIP_SCHEMA_SUFFIX;
|
||||
assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, deletedMultipleFieldSchema),
|
||||
"Deleted multiple fields are not compatible");
|
||||
|
||||
String renamedRecordSchema = TRIP_EXAMPLE_SCHEMA.replace("triprec", "triprec_renamed");
|
||||
assertFalse("Renamed record name is not compatible",
|
||||
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, renamedRecordSchema));
|
||||
assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, renamedRecordSchema),
|
||||
"Renamed record name is not compatible");
|
||||
|
||||
String swappedFieldSchema = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA.replace("city_to_state", "fare")
|
||||
+ FARE_NESTED_SCHEMA.replace("fare", "city_to_state") + TIP_NESTED_SCHEMA + TRIP_SCHEMA_SUFFIX;
|
||||
assertFalse("Swapped fields are not compatible",
|
||||
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, swappedFieldSchema));
|
||||
assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, swappedFieldSchema),
|
||||
"Swapped fields are not compatible");
|
||||
|
||||
String typeChangeSchema = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA + FARE_NESTED_SCHEMA
|
||||
+ TIP_NESTED_SCHEMA.replace("string", "boolean") + TRIP_SCHEMA_SUFFIX;
|
||||
assertFalse("Field type change is not compatible",
|
||||
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, typeChangeSchema));
|
||||
assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, typeChangeSchema),
|
||||
"Field type change is not compatible");
|
||||
|
||||
assertTrue("Added field with default is compatible (Evolved Schema)",
|
||||
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA_EVOLVED));
|
||||
assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_EXAMPLE_SCHEMA_EVOLVED),
|
||||
"Added field with default is compatible (Evolved Schema)");
|
||||
|
||||
String multipleAddedFieldSchema = TRIP_SCHEMA_PREFIX + MAP_TYPE_SCHEMA + FARE_NESTED_SCHEMA
|
||||
+ TIP_NESTED_SCHEMA + EXTRA_FIELD_SCHEMA + EXTRA_FIELD_SCHEMA.replace("new_field", "new_new_field")
|
||||
+ TRIP_SCHEMA_SUFFIX;
|
||||
assertTrue("Multiple added fields with defauls are compatible",
|
||||
TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, multipleAddedFieldSchema));
|
||||
assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, multipleAddedFieldSchema),
|
||||
"Multiple added fields with defauls are compatible");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@@ -31,26 +31,26 @@ import org.apache.hudi.common.util.ParquetUtils;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.io.HoodieCreateHandle;
|
||||
import org.apache.hudi.io.HoodieMergeHandle;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.parquet.avro.AvroReadSupport;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initPath();
|
||||
HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath);
|
||||
@@ -58,7 +58,7 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
|
||||
initFileSystem();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() {
|
||||
cleanupSparkContexts();
|
||||
}
|
||||
@@ -103,7 +103,7 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
|
||||
String fileId = insertResult.getFileId();
|
||||
|
||||
final HoodieTable table2 = HoodieTable.create(config2, jsc);
|
||||
Assert.assertEquals(1, jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||
assertEquals(1, jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||
// New content with values for the newly added field
|
||||
String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
|
||||
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12,\"added_field\":1}";
|
||||
@@ -116,9 +116,9 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
|
||||
record1.seal();
|
||||
updateRecords.add(record1);
|
||||
|
||||
try {
|
||||
assertDoesNotThrow(() -> {
|
||||
HoodieMergeHandle mergeHandle = new HoodieMergeHandle(config2, "101", table2,
|
||||
updateRecords.iterator(), record1.getPartitionPath(), fileId, supplier);
|
||||
updateRecords.iterator(), record1.getPartitionPath(), fileId, supplier);
|
||||
Configuration conf = new Configuration();
|
||||
AvroReadSupport.setAvroReadSchema(conf, mergeHandle.getWriterSchema());
|
||||
List<GenericRecord> oldRecords = ParquetUtils.readAvroRecords(conf,
|
||||
@@ -127,10 +127,9 @@ public class TestUpdateSchemaEvolution extends HoodieClientTestHarness {
|
||||
mergeHandle.write(rec);
|
||||
}
|
||||
mergeHandle.close();
|
||||
} catch (ClassCastException e) {
|
||||
fail("UpdateFunction could not read records written with exampleSchema.txt using the "
|
||||
+ "exampleEvolvedSchema.txt");
|
||||
}
|
||||
}, "UpdateFunction could not read records written with exampleSchema.txt using the "
|
||||
+ "exampleEvolvedSchema.txt");
|
||||
|
||||
return 1;
|
||||
}).collect().size());
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.minicluster.HdfsTestService;
|
||||
import org.apache.hudi.common.model.HoodieTestUtils;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.testutils.HoodieCommonTestHarnessJunit5;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
@@ -44,7 +45,7 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||
/**
|
||||
* The test harness for resource initialization and cleanup.
|
||||
*/
|
||||
public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness implements Serializable {
|
||||
public abstract class HoodieClientTestHarness extends HoodieCommonTestHarnessJunit5 implements Serializable {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(HoodieClientTestHarness.class);
|
||||
|
||||
|
||||
@@ -28,16 +28,17 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.execution.LazyInsertIterable.HoodieInsertValueGenResult;
|
||||
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
import static org.apache.hudi.execution.LazyInsertIterable.getTransformFunction;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@@ -45,12 +46,12 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
|
||||
|
||||
private final String instantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initTestDataGenerator();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
cleanupTestDataGenerator();
|
||||
}
|
||||
@@ -73,7 +74,8 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void finish() {}
|
||||
protected void finish() {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Integer getResult() {
|
||||
@@ -87,9 +89,9 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
|
||||
getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
|
||||
int result = executor.execute();
|
||||
// It should buffer and write 100 records
|
||||
Assert.assertEquals(result, 100);
|
||||
assertEquals(100, result);
|
||||
// There should be no remaining records in the buffer
|
||||
Assert.assertFalse(executor.isRemaining());
|
||||
assertFalse(executor.isRemaining());
|
||||
} finally {
|
||||
if (executor != null) {
|
||||
executor.shutdownNow();
|
||||
|
||||
@@ -34,10 +34,10 @@ import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.execution.LazyInsertIterable.HoodieInsertValueGenResult;
|
||||
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.Timeout;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
@@ -54,6 +54,9 @@ import java.util.stream.IntStream;
|
||||
import scala.Tuple2;
|
||||
|
||||
import static org.apache.hudi.execution.LazyInsertIterable.getTransformFunction;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@@ -61,13 +64,13 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
||||
|
||||
private final String instantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initTestDataGenerator();
|
||||
initExecutorServiceWithFixedThreadPool(2);
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
cleanupTestDataGenerator();
|
||||
cleanupExecutorService();
|
||||
@@ -76,7 +79,8 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
||||
// Test to ensure that we are reading all records from queue iterator in the same order
|
||||
// without any exceptions.
|
||||
@SuppressWarnings("unchecked")
|
||||
@Test(timeout = 60000)
|
||||
@Test
|
||||
@Timeout(value = 60)
|
||||
public void testRecordReading() throws Exception {
|
||||
final int numRecords = 128;
|
||||
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
|
||||
@@ -96,15 +100,15 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
||||
originalRecord.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA);
|
||||
final HoodieInsertValueGenResult<HoodieRecord> payload = queue.iterator().next();
|
||||
// Ensure that record ordering is guaranteed.
|
||||
Assert.assertEquals(originalRecord, payload.record);
|
||||
assertEquals(originalRecord, payload.record);
|
||||
// cached insert value matches the expected insert value.
|
||||
Assert.assertEquals(originalInsertValue,
|
||||
assertEquals(originalInsertValue,
|
||||
payload.record.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA));
|
||||
recordsRead++;
|
||||
}
|
||||
Assert.assertFalse(queue.iterator().hasNext() || originalRecordIterator.hasNext());
|
||||
assertFalse(queue.iterator().hasNext() || originalRecordIterator.hasNext());
|
||||
// all the records should be read successfully.
|
||||
Assert.assertEquals(numRecords, recordsRead);
|
||||
assertEquals(numRecords, recordsRead);
|
||||
// should not throw any exceptions.
|
||||
resFuture.get();
|
||||
}
|
||||
@@ -113,7 +117,8 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
||||
* Test to ensure that we are reading all records from queue iterator when we have multiple producers.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
@Test(timeout = 60000)
|
||||
@Test
|
||||
@Timeout(value = 60)
|
||||
public void testCompositeProducerRecordReading() throws Exception {
|
||||
final int numRecords = 1000;
|
||||
final int numProducers = 40;
|
||||
@@ -129,7 +134,7 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
||||
List<HoodieRecord> pRecs = dataGen.generateInserts(instantTime, numRecords);
|
||||
int j = 0;
|
||||
for (HoodieRecord r : pRecs) {
|
||||
Assert.assertTrue(!keyToProducerAndIndexMap.containsKey(r.getRecordKey()));
|
||||
assertFalse(keyToProducerAndIndexMap.containsKey(r.getRecordKey()));
|
||||
keyToProducerAndIndexMap.put(r.getRecordKey(), new Tuple2<>(i, j));
|
||||
j++;
|
||||
}
|
||||
@@ -192,12 +197,12 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
||||
countMap.put(producerPos._1(), countMap.get(producerPos._1()) + 1);
|
||||
lastSeenMap.put(producerPos._1(), lastSeenPos + 1);
|
||||
// Ensure we are seeing the next record generated
|
||||
Assert.assertEquals(lastSeenPos + 1, producerPos._2().intValue());
|
||||
assertEquals(lastSeenPos + 1, producerPos._2().intValue());
|
||||
}
|
||||
|
||||
for (int i = 0; i < numProducers; i++) {
|
||||
// Ensure we have seen all the records for each producers
|
||||
Assert.assertEquals(Integer.valueOf(numRecords), countMap.get(i));
|
||||
assertEquals(Integer.valueOf(numRecords), countMap.get(i));
|
||||
}
|
||||
|
||||
// Ensure Close future is done
|
||||
@@ -206,7 +211,8 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
||||
|
||||
// Test to ensure that record queueing is throttled when we hit memory limit.
|
||||
@SuppressWarnings("unchecked")
|
||||
@Test(timeout = 60000)
|
||||
@Test
|
||||
@Timeout(value = 60)
|
||||
public void testMemoryLimitForBuffering() throws Exception {
|
||||
final int numRecords = 128;
|
||||
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
|
||||
@@ -229,14 +235,14 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
||||
while (!isQueueFull(queue.rateLimiter)) {
|
||||
Thread.sleep(10);
|
||||
}
|
||||
Assert.assertEquals(0, queue.rateLimiter.availablePermits());
|
||||
Assert.assertEquals(recordLimit, queue.currentRateLimit);
|
||||
Assert.assertEquals(recordLimit, queue.size());
|
||||
Assert.assertEquals(recordLimit - 1, queue.samplingRecordCounter.get());
|
||||
assertEquals(0, queue.rateLimiter.availablePermits());
|
||||
assertEquals(recordLimit, queue.currentRateLimit);
|
||||
assertEquals(recordLimit, queue.size());
|
||||
assertEquals(recordLimit - 1, queue.samplingRecordCounter.get());
|
||||
|
||||
// try to read 2 records.
|
||||
Assert.assertEquals(hoodieRecords.get(0), queue.iterator().next().record);
|
||||
Assert.assertEquals(hoodieRecords.get(1), queue.iterator().next().record);
|
||||
assertEquals(hoodieRecords.get(0), queue.iterator().next().record);
|
||||
assertEquals(hoodieRecords.get(1), queue.iterator().next().record);
|
||||
|
||||
// waiting for permits to expire.
|
||||
while (!isQueueFull(queue.rateLimiter)) {
|
||||
@@ -245,17 +251,18 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
||||
// No change is expected in rate limit or number of queued records. We only expect
|
||||
// queueing thread to read
|
||||
// 2 more records into the queue.
|
||||
Assert.assertEquals(0, queue.rateLimiter.availablePermits());
|
||||
Assert.assertEquals(recordLimit, queue.currentRateLimit);
|
||||
Assert.assertEquals(recordLimit, queue.size());
|
||||
Assert.assertEquals(recordLimit - 1 + 2, queue.samplingRecordCounter.get());
|
||||
assertEquals(0, queue.rateLimiter.availablePermits());
|
||||
assertEquals(recordLimit, queue.currentRateLimit);
|
||||
assertEquals(recordLimit, queue.size());
|
||||
assertEquals(recordLimit - 1 + 2, queue.samplingRecordCounter.get());
|
||||
}
|
||||
|
||||
// Test to ensure that exception in either queueing thread or BufferedIterator-reader thread
|
||||
// is propagated to
|
||||
// another thread.
|
||||
@SuppressWarnings("unchecked")
|
||||
@Test(timeout = 60000)
|
||||
@Test
|
||||
@Timeout(value = 60)
|
||||
public void testException() throws Exception {
|
||||
final int numRecords = 256;
|
||||
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
|
||||
@@ -285,13 +292,10 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
||||
// notify queueing thread of an exception and ensure that it exits.
|
||||
final Exception e = new Exception("Failing it :)");
|
||||
queue1.markAsFailed(e);
|
||||
try {
|
||||
resFuture.get();
|
||||
Assert.fail("exception is expected");
|
||||
} catch (ExecutionException e1) {
|
||||
Assert.assertEquals(HoodieException.class, e1.getCause().getClass());
|
||||
Assert.assertEquals(e, e1.getCause().getCause());
|
||||
}
|
||||
final Throwable thrown1 = assertThrows(ExecutionException.class, resFuture::get,
|
||||
"exception is expected");
|
||||
assertEquals(HoodieException.class, thrown1.getCause().getClass());
|
||||
assertEquals(e, thrown1.getCause().getCause());
|
||||
|
||||
// second let us raise an exception while doing record queueing. this exception should get
|
||||
// propagated to
|
||||
@@ -314,19 +318,14 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
||||
return true;
|
||||
});
|
||||
|
||||
try {
|
||||
final Throwable thrown2 = assertThrows(Exception.class, () -> {
|
||||
queue2.iterator().hasNext();
|
||||
Assert.fail("exception is expected");
|
||||
} catch (Exception e1) {
|
||||
Assert.assertEquals(expectedException, e1.getCause());
|
||||
}
|
||||
}, "exception is expected");
|
||||
assertEquals(expectedException, thrown2.getCause());
|
||||
// queueing thread should also have exited. make sure that it is not running.
|
||||
try {
|
||||
res.get();
|
||||
Assert.fail("exception is expected");
|
||||
} catch (ExecutionException e2) {
|
||||
Assert.assertEquals(expectedException, e2.getCause());
|
||||
}
|
||||
final Throwable thrown3 = assertThrows(ExecutionException.class, res::get,
|
||||
"exception is expected");
|
||||
assertEquals(expectedException, thrown3.getCause());
|
||||
}
|
||||
|
||||
private boolean isQueueFull(Semaphore rateLimiter) {
|
||||
|
||||
@@ -32,19 +32,20 @@ import org.apache.hudi.index.hbase.HBaseIndexQPSResourceAllocator;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
|
||||
|
||||
private static String tableName = "test_table";
|
||||
private static final String TABLE_NAME = "test_table";
|
||||
private static final String QPS_TEST_SUFFIX_PATH = "qps_test_suffix";
|
||||
private HBaseTestingUtility utility;
|
||||
private Configuration hbaseConfig;
|
||||
private static String QPS_TEST_SUFFIX_PATH = "qps_test_suffix";
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
utility = new HBaseTestingUtility();
|
||||
utility.startMiniCluster();
|
||||
@@ -52,12 +53,12 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
|
||||
initSparkContexts("TestQPSResourceAllocator");
|
||||
|
||||
initPath();
|
||||
basePath = folder.getRoot().getAbsolutePath() + QPS_TEST_SUFFIX_PATH;
|
||||
basePath = tempDir.resolve(QPS_TEST_SUFFIX_PATH).toAbsolutePath().toString();
|
||||
// Initialize table
|
||||
initMetaClient();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
cleanupSparkContexts();
|
||||
cleanupMetaClient();
|
||||
@@ -71,9 +72,9 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
|
||||
HoodieWriteConfig config = getConfig(Option.empty());
|
||||
HBaseIndex index = new HBaseIndex(config);
|
||||
HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config);
|
||||
Assert.assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
|
||||
assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
|
||||
DefaultHBaseQPSResourceAllocator.class.getName());
|
||||
Assert.assertEquals(config.getHbaseIndexQPSFraction(),
|
||||
assertEquals(config.getHbaseIndexQPSFraction(),
|
||||
hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f);
|
||||
}
|
||||
|
||||
@@ -82,9 +83,9 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
|
||||
HoodieWriteConfig config = getConfig(Option.of(HoodieHBaseIndexConfig.DEFAULT_HBASE_INDEX_QPS_ALLOCATOR_CLASS));
|
||||
HBaseIndex index = new HBaseIndex(config);
|
||||
HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config);
|
||||
Assert.assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
|
||||
assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
|
||||
DefaultHBaseQPSResourceAllocator.class.getName());
|
||||
Assert.assertEquals(config.getHbaseIndexQPSFraction(),
|
||||
assertEquals(config.getHbaseIndexQPSFraction(),
|
||||
hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f);
|
||||
}
|
||||
|
||||
@@ -93,9 +94,9 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
|
||||
HoodieWriteConfig config = getConfig(Option.of("InvalidResourceAllocatorClassName"));
|
||||
HBaseIndex index = new HBaseIndex(config);
|
||||
HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = index.createQPSResourceAllocator(config);
|
||||
Assert.assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
|
||||
assertEquals(hBaseIndexQPSResourceAllocator.getClass().getName(),
|
||||
DefaultHBaseQPSResourceAllocator.class.getName());
|
||||
Assert.assertEquals(config.getHbaseIndexQPSFraction(),
|
||||
assertEquals(config.getHbaseIndexQPSFraction(),
|
||||
hBaseIndexQPSResourceAllocator.acquireQPSResources(config.getHbaseIndexQPSFraction(), 100), 0.0f);
|
||||
}
|
||||
|
||||
@@ -117,7 +118,7 @@ public class TestHBaseQPSResourceAllocator extends HoodieClientTestHarness {
|
||||
private HoodieHBaseIndexConfig getConfigWithResourceAllocator(Option<String> resourceAllocatorClass) {
|
||||
HoodieHBaseIndexConfig.Builder builder = new HoodieHBaseIndexConfig.Builder()
|
||||
.hbaseZkPort(Integer.parseInt(hbaseConfig.get("hbase.zookeeper.property.clientPort")))
|
||||
.hbaseZkQuorum(hbaseConfig.get("hbase.zookeeper.quorum")).hbaseTableName(tableName).hbaseIndexGetBatchSize(100);
|
||||
.hbaseZkQuorum(hbaseConfig.get("hbase.zookeeper.quorum")).hbaseTableName(TABLE_NAME).hbaseIndexGetBatchSize(100);
|
||||
if (resourceAllocatorClass.isPresent()) {
|
||||
builder.withQPSResourceAllocatorType(resourceAllocatorClass.get());
|
||||
}
|
||||
|
||||
@@ -50,14 +50,13 @@ import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.FixMethodOrder;
|
||||
import org.junit.Test;
|
||||
import org.junit.runners.MethodSorters;
|
||||
import org.mockito.Mockito;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.MethodOrderer;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.TestMethodOrder;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
@@ -65,12 +64,13 @@ import java.util.List;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.atMost;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
@@ -78,9 +78,9 @@ import static org.mockito.Mockito.when;
|
||||
/**
|
||||
* Note :: HBaseTestingUtility is really flaky with issues where the HbaseMiniCluster fails to shutdown across tests,
|
||||
* (see one problem here : https://issues.apache .org/jira/browse/HBASE-15835). Hence, the need to use
|
||||
* MethodSorters.NAME_ASCENDING to make sure the tests run in order. Please alter the order of tests running carefully.
|
||||
* {@link MethodOrderer.Alphanumeric} to make sure the tests run in order. Please alter the order of tests running carefully.
|
||||
*/
|
||||
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
|
||||
@TestMethodOrder(MethodOrderer.Alphanumeric.class)
|
||||
public class TestHbaseIndex extends HoodieClientTestHarness {
|
||||
|
||||
private static HBaseTestingUtility utility;
|
||||
@@ -89,14 +89,14 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
|
||||
|
||||
public TestHbaseIndex() {}
|
||||
|
||||
@AfterClass
|
||||
@AfterAll
|
||||
public static void clean() throws Exception {
|
||||
if (utility != null) {
|
||||
utility.shutdownMiniCluster();
|
||||
}
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
@BeforeAll
|
||||
public static void init() throws Exception {
|
||||
// Initialize HbaseMiniCluster
|
||||
hbaseConfig = HBaseConfiguration.create();
|
||||
@@ -108,7 +108,7 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
|
||||
utility.createTable(TableName.valueOf(tableName), Bytes.toBytes("_s"));
|
||||
}
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
// Initialize a local spark env
|
||||
initSparkContexts("TestHbaseIndex");
|
||||
@@ -120,7 +120,7 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
|
||||
initMetaClient();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
cleanupSparkContexts();
|
||||
cleanupTestDataGenerator();
|
||||
@@ -257,8 +257,8 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
|
||||
HBaseIndex index = new HBaseIndex(config);
|
||||
|
||||
// Mock hbaseConnection and related entities
|
||||
Connection hbaseConnection = Mockito.mock(Connection.class);
|
||||
HTable table = Mockito.mock(HTable.class);
|
||||
Connection hbaseConnection = mock(Connection.class);
|
||||
HTable table = mock(HTable.class);
|
||||
when(hbaseConnection.getTable(TableName.valueOf(tableName))).thenReturn(table);
|
||||
when(table.get((List<Get>) any())).thenReturn(new Result[0]);
|
||||
|
||||
@@ -306,8 +306,8 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
|
||||
writeClient.commit(newCommitTime, writeStatues);
|
||||
|
||||
// Mock hbaseConnection and related entities
|
||||
Connection hbaseConnection = Mockito.mock(Connection.class);
|
||||
HTable table = Mockito.mock(HTable.class);
|
||||
Connection hbaseConnection = mock(Connection.class);
|
||||
HTable table = mock(HTable.class);
|
||||
when(hbaseConnection.getTable(TableName.valueOf(tableName))).thenReturn(table);
|
||||
when(table.get((List<Get>) any())).thenReturn(new Result[0]);
|
||||
|
||||
@@ -335,28 +335,28 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
|
||||
// 8 (batchSize) * 200 (parallelism) * 10 (maxReqsInOneSecond) * 10 (numRegionServers) * 0.1 (qpsFraction)) => 16000
|
||||
// We assume requests get distributed to Region Servers uniformly, so each RS gets 1600 request
|
||||
// 1600 happens to be 10% of 16667 (maxQPSPerRegionServer) as expected.
|
||||
assertEquals(putBatchSize, 8);
|
||||
assertEquals(8, putBatchSize);
|
||||
|
||||
// Number of Region Servers are halved, total requests sent in a second are also halved, so batchSize is also halved
|
||||
int putBatchSize2 = batchSizeCalculator.getBatchSize(5, 16667, 1200, 200, 100, 0.1f);
|
||||
assertEquals(putBatchSize2, 4);
|
||||
assertEquals(4, putBatchSize2);
|
||||
|
||||
// If the parallelism is halved, batchSize has to double
|
||||
int putBatchSize3 = batchSizeCalculator.getBatchSize(10, 16667, 1200, 100, 100, 0.1f);
|
||||
assertEquals(putBatchSize3, 16);
|
||||
assertEquals(16, putBatchSize3);
|
||||
|
||||
// If the parallelism is halved, batchSize has to double.
|
||||
// This time parallelism is driven by numTasks rather than numExecutors
|
||||
int putBatchSize4 = batchSizeCalculator.getBatchSize(10, 16667, 100, 200, 100, 0.1f);
|
||||
assertEquals(putBatchSize4, 16);
|
||||
assertEquals(16, putBatchSize4);
|
||||
|
||||
// If sleepTimeMs is halved, batchSize has to halve
|
||||
int putBatchSize5 = batchSizeCalculator.getBatchSize(10, 16667, 1200, 200, 100, 0.05f);
|
||||
assertEquals(putBatchSize5, 4);
|
||||
assertEquals(4, putBatchSize5);
|
||||
|
||||
// If maxQPSPerRegionServer is doubled, batchSize also doubles
|
||||
int putBatchSize6 = batchSizeCalculator.getBatchSize(10, 33334, 1200, 200, 100, 0.1f);
|
||||
assertEquals(putBatchSize6, 16);
|
||||
assertEquals(16, putBatchSize6);
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -494,19 +494,15 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFeatureSupport() throws Exception {
|
||||
public void testFeatureSupport() {
|
||||
HoodieWriteConfig config = getConfig();
|
||||
HBaseIndex index = new HBaseIndex(config);
|
||||
|
||||
assertTrue(index.canIndexLogFiles());
|
||||
try {
|
||||
assertThrows(UnsupportedOperationException.class, () -> {
|
||||
HoodieTable hoodieTable = HoodieTable.create(metaClient, config, jsc);
|
||||
index.fetchRecordLocation(jsc.parallelize(new ArrayList<HoodieKey>(), 1), jsc, hoodieTable);
|
||||
fail("HbaseIndex supports fetchRecordLocation");
|
||||
} catch (UnsupportedOperationException ex) {
|
||||
// Expected so ignore
|
||||
ex.getStackTrace();
|
||||
}
|
||||
}, "HbaseIndex supports fetchRecordLocation");
|
||||
}
|
||||
|
||||
private WriteStatus getSampleWriteStatus(final int numInserts, final int numUpdateWrites) {
|
||||
@@ -521,7 +517,7 @@ public class TestHbaseIndex extends HoodieClientTestHarness {
|
||||
private void assertNoWriteErrors(List<WriteStatus> statuses) {
|
||||
// Verify there are no errors
|
||||
for (WriteStatus status : statuses) {
|
||||
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
|
||||
assertFalse(status.hasErrors(), "Errors found in write of " + status.getFileId());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -39,23 +39,28 @@ import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestHoodieIndex extends HoodieClientTestHarness {
|
||||
|
||||
@Before
|
||||
private HoodieWriteConfig.Builder clientConfigBuilder;
|
||||
private HoodieIndexConfig.Builder indexConfigBuilder;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initSparkContexts("TestHoodieIndex");
|
||||
initPath();
|
||||
initMetaClient();
|
||||
clientConfigBuilder = HoodieWriteConfig.newBuilder();
|
||||
indexConfigBuilder = HoodieIndexConfig.newBuilder();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() {
|
||||
cleanupSparkContexts();
|
||||
cleanupMetaClient();
|
||||
@@ -63,8 +68,6 @@ public class TestHoodieIndex extends HoodieClientTestHarness {
|
||||
|
||||
@Test
|
||||
public void testCreateIndex() {
|
||||
HoodieWriteConfig.Builder clientConfigBuilder = HoodieWriteConfig.newBuilder();
|
||||
HoodieIndexConfig.Builder indexConfigBuilder = HoodieIndexConfig.newBuilder();
|
||||
// Different types
|
||||
HoodieWriteConfig config = clientConfigBuilder.withPath(basePath)
|
||||
.withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.HBASE)
|
||||
@@ -84,27 +87,27 @@ public class TestHoodieIndex extends HoodieClientTestHarness {
|
||||
config = clientConfigBuilder.withPath(basePath)
|
||||
.withIndexConfig(indexConfigBuilder.withIndexClass(DummyHoodieIndex.class.getName()).build()).build();
|
||||
assertTrue(HoodieIndex.createIndex(config, jsc) instanceof DummyHoodieIndex);
|
||||
}
|
||||
|
||||
config = clientConfigBuilder.withPath(basePath)
|
||||
@Test
|
||||
public void testCreateIndex_withException() {
|
||||
final HoodieWriteConfig config1 = clientConfigBuilder.withPath(basePath)
|
||||
.withIndexConfig(indexConfigBuilder.withIndexClass(IndexWithConstructor.class.getName()).build()).build();
|
||||
try {
|
||||
HoodieIndex.createIndex(config, jsc);
|
||||
fail("exception is expected");
|
||||
} catch (HoodieIndexException e) {
|
||||
assertTrue(e.getMessage().contains("is not a subclass of HoodieIndex"));
|
||||
}
|
||||
final Throwable thrown1 = assertThrows(HoodieException.class, () -> {
|
||||
HoodieIndex.createIndex(config1, jsc);
|
||||
}, "exception is expected");
|
||||
assertTrue(thrown1.getMessage().contains("is not a subclass of HoodieIndex"));
|
||||
|
||||
config = clientConfigBuilder.withPath(basePath)
|
||||
.withIndexConfig(indexConfigBuilder.withIndexClass(IndexWithoutConstructor.class.getName()).build()).build();
|
||||
try {
|
||||
HoodieIndex.createIndex(config, jsc);
|
||||
fail("exception is expected");
|
||||
} catch (HoodieException e) {
|
||||
assertTrue(e.getMessage().contains("Unable to instantiate class"));
|
||||
}
|
||||
final HoodieWriteConfig config2 = clientConfigBuilder.withPath(basePath)
|
||||
.withIndexConfig(indexConfigBuilder.withIndexClass(IndexWithoutConstructor.class.getName()).build()).build();
|
||||
final Throwable thrown2 = assertThrows(HoodieException.class, () -> {
|
||||
HoodieIndex.createIndex(config2, jsc);
|
||||
}, "exception is expected");
|
||||
assertTrue(thrown2.getMessage().contains("Unable to instantiate class"));
|
||||
}
|
||||
|
||||
public static class DummyHoodieIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
||||
|
||||
public DummyHoodieIndex(HoodieWriteConfig config) {
|
||||
super(config);
|
||||
}
|
||||
@@ -146,7 +149,9 @@ public class TestHoodieIndex extends HoodieClientTestHarness {
|
||||
}
|
||||
|
||||
public static class IndexWithConstructor {
|
||||
public IndexWithConstructor(HoodieWriteConfig config) {}
|
||||
|
||||
public IndexWithConstructor(HoodieWriteConfig config) {
|
||||
}
|
||||
}
|
||||
|
||||
public static class IndexWithoutConstructor {
|
||||
|
||||
@@ -41,17 +41,18 @@ import org.apache.avro.Schema;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
@@ -59,40 +60,31 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
|
||||
private String schemaStr;
|
||||
private Schema schema;
|
||||
|
||||
private boolean rangePruning;
|
||||
private boolean treeFiltering;
|
||||
private boolean bucketizedChecking;
|
||||
private static final String TEST_NAME_WITH_PARAMS = "[{index}] Test with rangePruning={0}, treeFiltering={1}, bucketizedChecking={2}";
|
||||
|
||||
@Parameterized.Parameters(name = "{index}: Test with rangePruning={0}, treeFiltering ={1}, bucketizedChecking is:{2}")
|
||||
public static Collection<Object[]> data() {
|
||||
public static Stream<Arguments> configParams() {
|
||||
Object[][] data =
|
||||
new Object[][] {{true, true, true}, {false, true, true}, {true, true, false}, {true, false, true}};
|
||||
return Arrays.asList(data);
|
||||
return Stream.of(data).map(Arguments::of);
|
||||
}
|
||||
|
||||
public TestHoodieBloomIndex(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) {
|
||||
this.rangePruning = rangePruning;
|
||||
this.treeFiltering = treeFiltering;
|
||||
this.bucketizedChecking = bucketizedChecking;
|
||||
}
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initSparkContexts("TestHoodieBloomIndex");
|
||||
initPath();
|
||||
@@ -103,14 +95,14 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
initMetaClient();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
cleanupSparkContexts();
|
||||
cleanupFileSystem();
|
||||
cleanupMetaClient();
|
||||
}
|
||||
|
||||
private HoodieWriteConfig makeConfig() {
|
||||
private HoodieWriteConfig makeConfig(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) {
|
||||
return HoodieWriteConfig.newBuilder().withPath(basePath)
|
||||
.withIndexConfig(HoodieIndexConfig.newBuilder().bloomIndexPruneByRanges(rangePruning)
|
||||
.bloomIndexTreebasedFilter(treeFiltering).bloomIndexBucketizedChecking(bucketizedChecking)
|
||||
@@ -118,9 +110,10 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
.build();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLoadInvolvedFiles() throws IOException {
|
||||
HoodieWriteConfig config = makeConfig();
|
||||
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
|
||||
@MethodSource("configParams")
|
||||
public void testLoadInvolvedFiles(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws IOException {
|
||||
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
|
||||
HoodieBloomIndex index = new HoodieBloomIndex(config);
|
||||
|
||||
// Create some partitions, and put some files
|
||||
@@ -128,9 +121,9 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
// "2016/04/01": 1 file (2_0_20160401010101.parquet)
|
||||
// "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet,
|
||||
// 4_0_20150312101010.parquet)
|
||||
new File(basePath + "/2016/01/21").mkdirs();
|
||||
new File(basePath + "/2016/04/01").mkdirs();
|
||||
new File(basePath + "/2015/03/12").mkdirs();
|
||||
Files.createDirectories(Paths.get(basePath, "2016", "01", "21"));
|
||||
Files.createDirectories(Paths.get(basePath, "2016", "04", "01"));
|
||||
Files.createDirectories(Paths.get(basePath, "2015", "03", "12"));
|
||||
|
||||
TestRawTripPayload rowChange1 =
|
||||
new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
|
||||
@@ -163,16 +156,16 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
|
||||
List<Tuple2<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, jsc, table);
|
||||
// Still 0, as no valid commit
|
||||
assertEquals(filesList.size(), 0);
|
||||
assertEquals(0, filesList.size());
|
||||
|
||||
// Add some commits
|
||||
new File(basePath + "/.hoodie").mkdirs();
|
||||
new File(basePath + "/.hoodie/20160401010101.commit").createNewFile();
|
||||
new File(basePath + "/.hoodie/20150312101010.commit").createNewFile();
|
||||
java.nio.file.Path hoodieDir = Files.createDirectories(Paths.get(basePath, ".hoodie"));
|
||||
Files.createFile(hoodieDir.resolve("20160401010101.commit"));
|
||||
Files.createFile(hoodieDir.resolve("20150312101010.commit"));
|
||||
|
||||
table = HoodieTable.create(metaClient, config, jsc);
|
||||
filesList = index.loadInvolvedFiles(partitions, jsc, table);
|
||||
assertEquals(filesList.size(), 4);
|
||||
assertEquals(4, filesList.size());
|
||||
|
||||
if (rangePruning) {
|
||||
// these files will not have the key ranges
|
||||
@@ -194,9 +187,10 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRangePruning() {
|
||||
HoodieWriteConfig config = makeConfig();
|
||||
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
|
||||
@MethodSource("configParams")
|
||||
public void testRangePruning(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) {
|
||||
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
|
||||
HoodieBloomIndex index = new HoodieBloomIndex(config);
|
||||
|
||||
final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>();
|
||||
@@ -277,27 +271,27 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
// assertTrue(results.get(1)._2().equals(filename));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTagLocationWithEmptyRDD() {
|
||||
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
|
||||
@MethodSource("configParams")
|
||||
public void testTagLocationWithEmptyRDD(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) {
|
||||
// We have some records to be tagged (two different partitions)
|
||||
JavaRDD<HoodieRecord> recordRDD = jsc.emptyRDD();
|
||||
// Also create the metadata and config
|
||||
HoodieWriteConfig config = makeConfig();
|
||||
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
|
||||
|
||||
// Let's tag
|
||||
HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config);
|
||||
|
||||
try {
|
||||
assertDoesNotThrow(() -> {
|
||||
bloomIndex.tagLocation(recordRDD, jsc, table);
|
||||
} catch (IllegalArgumentException e) {
|
||||
fail("EmptyRDD should not result in IllegalArgumentException: Positive number of slices required");
|
||||
}
|
||||
}, "EmptyRDD should not result in IllegalArgumentException: Positive number of slices required");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTagLocation() throws Exception {
|
||||
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
|
||||
@MethodSource("configParams")
|
||||
public void testTagLocation(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws Exception {
|
||||
// We have some records to be tagged (two different partitions)
|
||||
String rowKey1 = UUID.randomUUID().toString();
|
||||
String rowKey2 = UUID.randomUUID().toString();
|
||||
@@ -322,7 +316,7 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record4));
|
||||
|
||||
// Also create the metadata and config
|
||||
HoodieWriteConfig config = makeConfig();
|
||||
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
|
||||
|
||||
@@ -365,8 +359,9 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCheckExists() throws Exception {
|
||||
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
|
||||
@MethodSource("configParams")
|
||||
public void testCheckExists(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws Exception {
|
||||
// We have some records to be tagged (two different partitions)
|
||||
|
||||
String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
|
||||
@@ -392,7 +387,7 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
JavaRDD<HoodieKey> keysRDD = jsc.parallelize(Arrays.asList(key1, key2, key3, key4));
|
||||
|
||||
// Also create the metadata and config
|
||||
HoodieWriteConfig config = makeConfig();
|
||||
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
|
||||
|
||||
@@ -437,8 +432,9 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBloomFilterFalseError() throws IOException, InterruptedException {
|
||||
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
|
||||
@MethodSource("configParams")
|
||||
public void testBloomFilterFalseError(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws IOException, InterruptedException {
|
||||
// We have two hoodie records
|
||||
String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
|
||||
+ "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
|
||||
@@ -463,7 +459,7 @@ public class TestHoodieBloomIndex extends HoodieClientTestHarness {
|
||||
|
||||
// We do the tag
|
||||
JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2));
|
||||
HoodieWriteConfig config = makeConfig();
|
||||
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
|
||||
|
||||
|
||||
@@ -36,12 +36,14 @@ import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
@@ -53,12 +55,12 @@ import java.util.stream.Collectors;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.fail;
|
||||
|
||||
public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
|
||||
|
||||
@@ -67,7 +69,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
|
||||
public TestHoodieGlobalBloomIndex() {
|
||||
}
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initSparkContexts("TestHoodieGlobalBloomIndex");
|
||||
initPath();
|
||||
@@ -77,7 +79,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
|
||||
initMetaClient();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() {
|
||||
cleanupSparkContexts();
|
||||
cleanupMetaClient();
|
||||
@@ -93,12 +95,12 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
|
||||
// "2016/04/01": 1 file (2_0_20160401010101.parquet)
|
||||
// "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet,
|
||||
// 4_0_20150312101010.parquet)
|
||||
new File(basePath + "/2016/01/21").mkdirs();
|
||||
new File(basePath + "/2016/01/21/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
|
||||
new File(basePath + "/2016/04/01").mkdirs();
|
||||
new File(basePath + "/2016/04/01/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
|
||||
new File(basePath + "/2015/03/12").mkdirs();
|
||||
new File(basePath + "/2015/03/12/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
|
||||
Path dir1 = Files.createDirectories(Paths.get(basePath, "2016", "01", "21"));
|
||||
Files.createFile(dir1.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
Path dir2 = Files.createDirectories(Paths.get(basePath, "2016", "04", "01"));
|
||||
Files.createFile(dir2.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
Path dir3 = Files.createDirectories(Paths.get(basePath, "2015", "03", "12"));
|
||||
Files.createFile(dir3.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
|
||||
TestRawTripPayload rowChange1 =
|
||||
new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
|
||||
@@ -133,16 +135,16 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
|
||||
// partitions will NOT be respected by this loadInvolvedFiles(...) call
|
||||
List<Tuple2<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, jsc, table);
|
||||
// Still 0, as no valid commit
|
||||
assertEquals(filesList.size(), 0);
|
||||
assertEquals(0, filesList.size());
|
||||
|
||||
// Add some commits
|
||||
new File(basePath + "/.hoodie").mkdirs();
|
||||
new File(basePath + "/.hoodie/20160401010101.commit").createNewFile();
|
||||
new File(basePath + "/.hoodie/20150312101010.commit").createNewFile();
|
||||
Path hoodieDir = Files.createDirectories(Paths.get(basePath, ".hoodie"));
|
||||
Files.createFile(hoodieDir.resolve("20160401010101.commit"));
|
||||
Files.createFile(hoodieDir.resolve("20150312101010.commit"));
|
||||
|
||||
table = HoodieTable.create(metaClient, config, jsc);
|
||||
filesList = index.loadInvolvedFiles(partitions, jsc, table);
|
||||
assertEquals(filesList.size(), 4);
|
||||
assertEquals(4, filesList.size());
|
||||
|
||||
Map<String, BloomIndexFileInfo> filesMap = toFileMap(filesList);
|
||||
// key ranges checks
|
||||
@@ -213,12 +215,12 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
|
||||
// "2016/04/01": 1 file (2_0_20160401010101.parquet)
|
||||
// "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet,
|
||||
// 4_0_20150312101010.parquet)
|
||||
new File(basePath + "/2016/01/21").mkdirs();
|
||||
new File(basePath + "/2016/01/21/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
|
||||
new File(basePath + "/2016/04/01").mkdirs();
|
||||
new File(basePath + "/2016/04/01/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
|
||||
new File(basePath + "/2015/03/12").mkdirs();
|
||||
new File(basePath + "/2015/03/12/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
|
||||
Path dir1 = Files.createDirectories(Paths.get(basePath, "2016", "01", "21"));
|
||||
Files.createFile(dir1.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
Path dir2 = Files.createDirectories(Paths.get(basePath, "2016", "04", "01"));
|
||||
Files.createFile(dir2.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
Path dir3 = Files.createDirectories(Paths.get(basePath, "2015", "03", "12"));
|
||||
Files.createFile(dir3.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
|
||||
TestRawTripPayload rowChange1 =
|
||||
new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
|
||||
@@ -262,7 +264,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
|
||||
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
|
||||
|
||||
// Add some commits
|
||||
new File(basePath + "/.hoodie").mkdirs();
|
||||
Files.createDirectories(Paths.get(basePath, ".hoodie"));
|
||||
|
||||
// partitions will NOT be respected by this loadInvolvedFiles(...) call
|
||||
JavaRDD<HoodieRecord> taggedRecordRDD = index.tagLocation(recordRDD, jsc, table);
|
||||
@@ -305,8 +307,8 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
|
||||
|
||||
// Create the original partition, and put a record, along with the meta file
|
||||
// "2016/01/31": 1 file (1_0_20160131101010.parquet)
|
||||
new File(basePath + "/2016/01/31").mkdirs();
|
||||
new File(basePath + "/2016/01/31/" + HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE).createNewFile();
|
||||
Path dir = Files.createDirectories(Paths.get(basePath, "2016", "01", "31"));
|
||||
Files.createFile(dir.resolve(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
|
||||
// this record will be saved in table and will be tagged to an empty record
|
||||
TestRawTripPayload originalPayload =
|
||||
@@ -347,7 +349,7 @@ public class TestHoodieGlobalBloomIndex extends HoodieClientTestHarness {
|
||||
HoodieTable table = HoodieTable.create(metaClient, config, jsc);
|
||||
|
||||
// Add some commits
|
||||
new File(basePath + "/.hoodie").mkdirs();
|
||||
Files.createDirectories(Paths.get(basePath, ".hoodie"));
|
||||
|
||||
// test against incoming record with a different partition
|
||||
JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Collections.singletonList(incomingRecord));
|
||||
|
||||
@@ -35,9 +35,9 @@ import org.apache.hudi.table.HoodieTimelineArchiveLog;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
@@ -47,16 +47,16 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
||||
|
||||
private Configuration hadoopConf;
|
||||
private HoodieTableMetaClient metaClient;
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void init() throws Exception {
|
||||
initDFS();
|
||||
initPath();
|
||||
@@ -67,7 +67,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
||||
metaClient = HoodieTestUtils.init(hadoopConf, basePath);
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void clean() throws IOException {
|
||||
cleanupDFS();
|
||||
cleanupSparkContexts();
|
||||
@@ -137,7 +137,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
|
||||
|
||||
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
|
||||
assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
|
||||
|
||||
HoodieTestUtils.createCleanFiles(metaClient, basePath, "100", dfs.getConf());
|
||||
HoodieTestUtils.createCleanFiles(metaClient, basePath, "101", dfs.getConf());
|
||||
@@ -151,7 +151,7 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
||||
timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
|
||||
List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList());
|
||||
|
||||
assertEquals("Loaded 6 commits and the count should match", 12, timeline.countInstants());
|
||||
assertEquals(12, timeline.countInstants(), "Loaded 6 commits and the count should match");
|
||||
|
||||
// verify in-flight instants before archive
|
||||
verifyInflightInstants(metaClient, 2);
|
||||
@@ -168,42 +168,42 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
||||
// Check compaction instants
|
||||
List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem(
|
||||
new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false);
|
||||
assertEquals("Should delete all compaction instants < 104", 4, instants.size());
|
||||
assertFalse("Requested Compaction must be absent for 100",
|
||||
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")));
|
||||
assertFalse("Inflight Compaction must be absent for 100",
|
||||
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")));
|
||||
assertFalse("Requested Compaction must be absent for 101",
|
||||
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")));
|
||||
assertFalse("Inflight Compaction must be absent for 101",
|
||||
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")));
|
||||
assertFalse("Requested Compaction must be absent for 102",
|
||||
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")));
|
||||
assertFalse("Inflight Compaction must be absent for 102",
|
||||
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")));
|
||||
assertFalse("Requested Compaction must be absent for 103",
|
||||
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")));
|
||||
assertFalse("Inflight Compaction must be absent for 103",
|
||||
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")));
|
||||
assertTrue("Requested Compaction must be present for 104",
|
||||
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104")));
|
||||
assertTrue("Inflight Compaction must be present for 104",
|
||||
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "104")));
|
||||
assertTrue("Requested Compaction must be present for 105",
|
||||
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "105")));
|
||||
assertTrue("Inflight Compaction must be present for 105",
|
||||
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "105")));
|
||||
assertEquals(4, instants.size(), "Should delete all compaction instants < 104");
|
||||
assertFalse(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")),
|
||||
"Requested Compaction must be absent for 100");
|
||||
assertFalse(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")),
|
||||
"Inflight Compaction must be absent for 100");
|
||||
assertFalse(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")),
|
||||
"Requested Compaction must be absent for 101");
|
||||
assertFalse(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")),
|
||||
"Inflight Compaction must be absent for 101");
|
||||
assertFalse(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")),
|
||||
"Requested Compaction must be absent for 102");
|
||||
assertFalse(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")),
|
||||
"Inflight Compaction must be absent for 102");
|
||||
assertFalse(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")),
|
||||
"Requested Compaction must be absent for 103");
|
||||
assertFalse(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")),
|
||||
"Inflight Compaction must be absent for 103");
|
||||
assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104")),
|
||||
"Requested Compaction must be present for 104");
|
||||
assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "104")),
|
||||
"Inflight Compaction must be present for 104");
|
||||
assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "105")),
|
||||
"Requested Compaction must be present for 105");
|
||||
assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "105")),
|
||||
"Inflight Compaction must be present for 105");
|
||||
|
||||
// read the file
|
||||
HoodieArchivedTimeline archivedTimeline = new HoodieArchivedTimeline(metaClient);
|
||||
assertEquals("Total archived records and total read records are the same count",
|
||||
24, archivedTimeline.countInstants());
|
||||
assertEquals(24, archivedTimeline.countInstants(),
|
||||
"Total archived records and total read records are the same count");
|
||||
|
||||
//make sure the archived commits are the same as the (originalcommits - commitsleft)
|
||||
Set<String> readCommits =
|
||||
archivedTimeline.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
|
||||
assertEquals("Read commits map should match the originalCommits - commitsLoadedFromArchival",
|
||||
originalCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()), readCommits);
|
||||
archivedTimeline.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
|
||||
assertEquals(originalCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()), readCommits,
|
||||
"Read commits map should match the originalCommits - commitsLoadedFromArchival");
|
||||
|
||||
// verify in-flight instants after archive
|
||||
verifyInflightInstants(metaClient, 2);
|
||||
@@ -247,31 +247,31 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
||||
HoodieTestDataGenerator.createCommitFile(basePath, "103", dfs.getConf());
|
||||
|
||||
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
|
||||
assertEquals("Loaded 4 commits and the count should match", 4, timeline.countInstants());
|
||||
assertEquals(4, timeline.countInstants(), "Loaded 4 commits and the count should match");
|
||||
boolean result = archiveLog.archiveIfRequired(jsc);
|
||||
assertTrue(result);
|
||||
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
|
||||
assertEquals("Should not archive commits when maxCommitsToKeep is 5", 4, timeline.countInstants());
|
||||
assertEquals(4, timeline.countInstants(), "Should not archive commits when maxCommitsToKeep is 5");
|
||||
|
||||
List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem(
|
||||
new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false);
|
||||
assertEquals("Should not delete any aux compaction files when maxCommitsToKeep is 5", 8, instants.size());
|
||||
assertTrue("Requested Compaction must be present for 100",
|
||||
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")));
|
||||
assertTrue("Inflight Compaction must be present for 100",
|
||||
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")));
|
||||
assertTrue("Requested Compaction must be present for 101",
|
||||
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")));
|
||||
assertTrue("Inflight Compaction must be present for 101",
|
||||
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")));
|
||||
assertTrue("Requested Compaction must be present for 102",
|
||||
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")));
|
||||
assertTrue("Inflight Compaction must be present for 102",
|
||||
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")));
|
||||
assertTrue("Requested Compaction must be present for 103",
|
||||
instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")));
|
||||
assertTrue("Inflight Compaction must be present for 103",
|
||||
instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")));
|
||||
assertEquals(8, instants.size(), "Should not delete any aux compaction files when maxCommitsToKeep is 5");
|
||||
assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "100")),
|
||||
"Requested Compaction must be present for 100");
|
||||
assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "100")),
|
||||
"Inflight Compaction must be present for 100");
|
||||
assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")),
|
||||
"Requested Compaction must be present for 101");
|
||||
assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "101")),
|
||||
"Inflight Compaction must be present for 101");
|
||||
assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "102")),
|
||||
"Requested Compaction must be present for 102");
|
||||
assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "102")),
|
||||
"Inflight Compaction must be present for 102");
|
||||
assertTrue(instants.contains(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "103")),
|
||||
"Requested Compaction must be present for 103");
|
||||
assertTrue(instants.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "103")),
|
||||
"Inflight Compaction must be present for 103");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -290,14 +290,14 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
||||
HoodieTestDataGenerator.createCommitFile(basePath, "105", dfs.getConf());
|
||||
|
||||
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
|
||||
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
|
||||
assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
|
||||
boolean result = archiveLog.archiveIfRequired(jsc);
|
||||
assertTrue(result);
|
||||
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
|
||||
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("100"));
|
||||
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("101"));
|
||||
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("102"));
|
||||
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("103"));
|
||||
assertTrue(timeline.containsOrBeforeTimelineStarts("100"), "Archived commits should always be safe");
|
||||
assertTrue(timeline.containsOrBeforeTimelineStarts("101"), "Archived commits should always be safe");
|
||||
assertTrue(timeline.containsOrBeforeTimelineStarts("102"), "Archived commits should always be safe");
|
||||
assertTrue(timeline.containsOrBeforeTimelineStarts("103"), "Archived commits should always be safe");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -317,19 +317,18 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
||||
HoodieTestDataGenerator.createCommitFile(basePath, "105", dfs.getConf());
|
||||
|
||||
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
|
||||
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
|
||||
assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
|
||||
boolean result = archiveLog.archiveIfRequired(jsc);
|
||||
assertTrue(result);
|
||||
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
|
||||
assertEquals(
|
||||
"Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)", 5,
|
||||
timeline.countInstants());
|
||||
assertTrue("Archived commits should always be safe",
|
||||
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101")));
|
||||
assertTrue("Archived commits should always be safe",
|
||||
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")));
|
||||
assertTrue("Archived commits should always be safe",
|
||||
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")));
|
||||
assertEquals(5, timeline.countInstants(),
|
||||
"Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101")),
|
||||
"Archived commits should always be safe");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")),
|
||||
"Archived commits should always be safe");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")),
|
||||
"Archived commits should always be safe");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -354,28 +353,29 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
||||
HoodieTestDataGenerator.createCommitFile(basePath, "107", dfs.getConf());
|
||||
|
||||
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsAndCompactionTimeline();
|
||||
assertEquals("Loaded 6 commits and the count should match", 8, timeline.countInstants());
|
||||
assertEquals(8, timeline.countInstants(), "Loaded 6 commits and the count should match");
|
||||
boolean result = archiveLog.archiveIfRequired(jsc);
|
||||
assertTrue(result);
|
||||
timeline = metaClient.getActiveTimeline().reload().getCommitsAndCompactionTimeline();
|
||||
assertFalse("Instants before oldest pending compaction can be removed",
|
||||
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "100")));
|
||||
assertEquals("Since we have a pending compaction at 101, we should never archive any commit "
|
||||
+ "after 101 (we only archive 100)", 7, timeline.countInstants());
|
||||
assertTrue("Requested Compaction must still be present",
|
||||
timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")));
|
||||
assertTrue("Instants greater than oldest pending compaction must be present",
|
||||
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")));
|
||||
assertTrue("Instants greater than oldest pending compaction must be present",
|
||||
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")));
|
||||
assertTrue("Instants greater than oldest pending compaction must be present",
|
||||
timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104")));
|
||||
assertTrue("Instants greater than oldest pending compaction must be present",
|
||||
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "105")));
|
||||
assertTrue("Instants greater than oldest pending compaction must be present",
|
||||
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "106")));
|
||||
assertTrue("Instants greater than oldest pending compaction must be present",
|
||||
timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "107")));
|
||||
assertFalse(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "100")),
|
||||
"Instants before oldest pending compaction can be removed");
|
||||
assertEquals(7, timeline.countInstants(),
|
||||
"Since we have a pending compaction at 101, we should never archive any commit "
|
||||
+ "after 101 (we only archive 100)");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "101")),
|
||||
"Requested Compaction must still be present");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")),
|
||||
"Instants greater than oldest pending compaction must be present");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")),
|
||||
"Instants greater than oldest pending compaction must be present");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "104")),
|
||||
"Instants greater than oldest pending compaction must be present");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "105")),
|
||||
"Instants greater than oldest pending compaction must be present");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "106")),
|
||||
"Instants greater than oldest pending compaction must be present");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "107")),
|
||||
"Instants greater than oldest pending compaction must be present");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -412,8 +412,8 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
||||
private void verifyInflightInstants(HoodieTableMetaClient metaClient, int expectedTotalInstants) {
|
||||
HoodieTimeline timeline = metaClient.getActiveTimeline().reload()
|
||||
.getTimelineOfActions(Collections.singleton(HoodieTimeline.CLEAN_ACTION)).filterInflights();
|
||||
assertEquals("Loaded inflight clean actions and the count should match", expectedTotalInstants,
|
||||
timeline.countInstants());
|
||||
assertEquals(expectedTotalInstants, timeline.countInstants(),
|
||||
"Loaded inflight clean actions and the count should match");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@@ -39,22 +39,23 @@ import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initSparkContexts("TestHoodieMergeHandle");
|
||||
initPath();
|
||||
@@ -63,7 +64,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
initMetaClient();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
cleanupFileSystem();
|
||||
cleanupTestDataGenerator();
|
||||
@@ -110,11 +111,12 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
// verify that there is a commit
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
|
||||
assertEquals("Expecting a single commit.", 1,
|
||||
timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
|
||||
Assert.assertEquals("Latest commit should be 001", newCommitTime, timeline.lastInstant().get().getTimestamp());
|
||||
assertEquals("Must contain 44 records", records.size(),
|
||||
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
|
||||
assertEquals(1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(),
|
||||
"Expecting a single commit.");
|
||||
assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(), "Latest commit should be 001");
|
||||
assertEquals(records.size(),
|
||||
HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
|
||||
"Must contain 44 records");
|
||||
|
||||
/**
|
||||
* Write 2 (insert) This will do a bulk insert of 1 record with the same row_key as record1 in the previous insert
|
||||
@@ -135,10 +137,10 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
// verify that there are 2 commits
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
|
||||
assertEquals("Expecting two commits.", 2, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
|
||||
Assert.assertEquals("Latest commit should be 002", newCommitTime, timeline.lastInstant().get().getTimestamp());
|
||||
assertEquals(2, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), "Expecting two commits.");
|
||||
assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(), "Latest commit should be 002");
|
||||
Dataset<Row> dataSet = getRecords();
|
||||
assertEquals("Must contain 45 records", 45, dataSet.count());
|
||||
assertEquals(45, dataSet.count(), "Must contain 45 records");
|
||||
|
||||
/**
|
||||
* Write 3 (insert) This will bulk insert 2 new completely new records. At this point, we will have 2 files with
|
||||
@@ -155,10 +157,10 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
// verify that there are now 3 commits
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
|
||||
assertEquals("Expecting three commits.", 3, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
|
||||
Assert.assertEquals("Latest commit should be 003", newCommitTime, timeline.lastInstant().get().getTimestamp());
|
||||
assertEquals(3, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), "Expecting three commits.");
|
||||
assertEquals(newCommitTime, timeline.lastInstant().get().getTimestamp(), "Latest commit should be 003");
|
||||
dataSet = getRecords();
|
||||
assertEquals("Must contain 47 records", 47, dataSet.count());
|
||||
assertEquals(47, dataSet.count(), "Must contain 47 records");
|
||||
|
||||
/**
|
||||
* Write 4 (updates) This will generate 2 upsert records with id1 and id2. The rider and driver names in the
|
||||
@@ -185,12 +187,12 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
|
||||
// verify there are now 4 commits
|
||||
timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
|
||||
assertEquals("Expecting four commits.", 4, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
|
||||
Assert.assertEquals("Latest commit should be 004", timeline.lastInstant().get().getTimestamp(), newCommitTime);
|
||||
assertEquals(4, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), "Expecting four commits.");
|
||||
assertEquals(timeline.lastInstant().get().getTimestamp(), newCommitTime, "Latest commit should be 004");
|
||||
|
||||
// Check the entire dataset has 47 records still
|
||||
dataSet = getRecords();
|
||||
assertEquals("Must contain 47 records", 47, dataSet.count());
|
||||
assertEquals(47, dataSet.count(), "Must contain 47 records");
|
||||
Row[] rows = (Row[]) dataSet.collect();
|
||||
int record1Count = 0;
|
||||
int record2Count = 0;
|
||||
@@ -233,19 +235,18 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
List<WriteStatus> statuses = writeClient.insert(recordsRDD, newCommitTime).collect();
|
||||
|
||||
// All records should be inserts into new parquet
|
||||
Assert.assertTrue(statuses.stream()
|
||||
assertTrue(statuses.stream()
|
||||
.filter(status -> status.getStat().getPrevCommit() != HoodieWriteStat.NULL_COMMIT).count() > 0);
|
||||
// Num writes should be equal to the number of records inserted
|
||||
Assert.assertEquals(
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 100);
|
||||
assertEquals(100,
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get());
|
||||
// Num update writes should be equal to the number of records updated
|
||||
Assert.assertEquals(
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(),
|
||||
0);
|
||||
assertEquals(0,
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get());
|
||||
// Num update writes should be equal to the number of insert records converted to updates as part of small file
|
||||
// handling
|
||||
Assert.assertEquals(
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 100);
|
||||
assertEquals(100,
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get());
|
||||
|
||||
// Update all the 100 records
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
@@ -258,20 +259,18 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
statuses = writeClient.upsert(updatedRecordsRDD, newCommitTime).collect();
|
||||
|
||||
// All records should be upserts into existing parquet
|
||||
Assert.assertEquals(
|
||||
statuses.stream().filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count(),
|
||||
0);
|
||||
assertEquals(0,
|
||||
statuses.stream().filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count());
|
||||
// Num writes should be equal to the number of records inserted
|
||||
Assert.assertEquals(
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 100);
|
||||
assertEquals(100,
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get());
|
||||
// Num update writes should be equal to the number of records updated
|
||||
Assert.assertEquals(
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(),
|
||||
100);
|
||||
assertEquals(100,
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get());
|
||||
// Num update writes should be equal to the number of insert records converted to updates as part of small file
|
||||
// handling
|
||||
Assert.assertEquals(
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 0);
|
||||
assertEquals(0,
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get());
|
||||
|
||||
newCommitTime = "102";
|
||||
writeClient.startCommitWithTime(newCommitTime);
|
||||
@@ -282,24 +281,23 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
statuses = writeClient.upsert(allRecordsRDD, newCommitTime).collect();
|
||||
|
||||
// All records should be upserts into existing parquet (with inserts as updates small file handled)
|
||||
Assert.assertEquals((long) statuses.stream()
|
||||
.filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count(), 0);
|
||||
assertEquals(0, (long) statuses.stream()
|
||||
.filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count());
|
||||
// Num writes should be equal to the total number of records written
|
||||
Assert.assertEquals(
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get(), 200);
|
||||
assertEquals(200,
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get());
|
||||
// Num update writes should be equal to the number of records updated (including inserts converted as updates)
|
||||
Assert.assertEquals(
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get(),
|
||||
100);
|
||||
assertEquals(100,
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get());
|
||||
// Num update writes should be equal to the number of insert records converted to updates as part of small file
|
||||
// handling
|
||||
Assert.assertEquals(
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get(), 100);
|
||||
assertEquals(100,
|
||||
(long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get());
|
||||
// Verify all records have location set
|
||||
statuses.forEach(writeStatus -> {
|
||||
writeStatus.getWrittenRecords().forEach(r -> {
|
||||
// Ensure New Location is set
|
||||
Assert.assertTrue(r.getNewLocation().isPresent());
|
||||
assertTrue(r.getNewLocation().isPresent());
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -309,7 +307,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
// Check the entire dataset has 8 records still
|
||||
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
|
||||
for (int i = 0; i < fullPartitionPaths.length; i++) {
|
||||
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
|
||||
fullPartitionPaths[i] = Paths.get(basePath, dataGen.getPartitionPaths()[i], "*").toString();
|
||||
}
|
||||
Dataset<Row> dataSet = HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths);
|
||||
return dataSet;
|
||||
@@ -323,7 +321,7 @@ public class TestHoodieMergeHandle extends HoodieClientTestHarness {
|
||||
void assertNoWriteErrors(List<WriteStatus> statuses) {
|
||||
// Verify there are no errors
|
||||
for (WriteStatus status : statuses) {
|
||||
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
|
||||
assertFalse(status.hasErrors(), "Errors found in write of " + status.getFileId());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -26,12 +26,12 @@ import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Tests for {@link HoodieStorageWriterFactory}.
|
||||
@@ -48,17 +48,14 @@ public class TestHoodieStorageWriterFactory extends TestHoodieClientBase {
|
||||
SparkTaskContextSupplier supplier = new SparkTaskContextSupplier();
|
||||
HoodieStorageWriter<IndexedRecord> parquetWriter = HoodieStorageWriterFactory.getStorageWriter(instantTime,
|
||||
parquetPath, table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA, supplier);
|
||||
Assert.assertTrue(parquetWriter instanceof HoodieParquetWriter);
|
||||
assertTrue(parquetWriter instanceof HoodieParquetWriter);
|
||||
|
||||
// other file format exception.
|
||||
final Path logPath = new Path(basePath + "/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1");
|
||||
try {
|
||||
final Throwable thrown = assertThrows(UnsupportedOperationException.class, () -> {
|
||||
HoodieStorageWriter<IndexedRecord> logWriter = HoodieStorageWriterFactory.getStorageWriter(instantTime, logPath,
|
||||
table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA, supplier);
|
||||
fail("should fail since log storage writer is not supported yet.");
|
||||
} catch (Exception e) {
|
||||
Assert.assertTrue(e instanceof UnsupportedOperationException);
|
||||
Assert.assertTrue(e.getMessage().contains("format not supported yet."));
|
||||
}
|
||||
}, "should fail since log storage writer is not supported yet.");
|
||||
assertTrue(thrown.getMessage().contains("format not supported yet."));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -60,8 +60,7 @@ import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
@@ -82,9 +81,9 @@ import java.util.stream.Stream;
|
||||
import scala.Tuple3;
|
||||
|
||||
import static org.apache.hudi.common.model.HoodieTestUtils.DEFAULT_PARTITION_PATHS;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Test Cleaning related logic.
|
||||
@@ -123,15 +122,16 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
// verify that there is a commit
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
|
||||
assertEquals("Expecting a single commit.", 1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
|
||||
assertEquals(1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), "Expecting a single commit.");
|
||||
// Should have 100 records in table (check using Index), all in locations marked at commit
|
||||
HoodieTable table = HoodieTable.create(metaClient, client.getConfig(), jsc);
|
||||
|
||||
assertFalse(table.getCompletedCommitsTimeline().empty());
|
||||
String instantTime = table.getCompletedCommitsTimeline().getInstants().findFirst().get().getTimestamp();
|
||||
assertFalse(table.getCompletedCleanTimeline().empty());
|
||||
assertEquals("The clean instant should be the same as the commit instant", instantTime,
|
||||
table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp());
|
||||
assertEquals(instantTime,
|
||||
table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp(),
|
||||
"The clean instant should be the same as the commit instant");
|
||||
|
||||
HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
|
||||
List<HoodieRecord> taggedRecords = index.tagLocation(jsc.parallelize(records, 1), jsc, table).collect();
|
||||
@@ -272,22 +272,22 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
return compactionFileIdToLatestFileSlice.get(fileGroup.getFileGroupId()).getBaseInstantTime()
|
||||
.equals(df.getCommitTime());
|
||||
}).findAny());
|
||||
Assert.assertTrue("Data File selected for compaction is retained",
|
||||
dataFileForCompactionPresent.isPresent());
|
||||
assertTrue(dataFileForCompactionPresent.isPresent(),
|
||||
"Data File selected for compaction is retained");
|
||||
} else {
|
||||
// file has no more than max versions
|
||||
String fileId = fileGroup.getFileGroupId().getFileId();
|
||||
List<HoodieBaseFile> dataFiles = fileGroup.getAllBaseFiles().collect(Collectors.toList());
|
||||
|
||||
assertTrue("fileId " + fileId + " has more than " + maxVersions + " versions",
|
||||
dataFiles.size() <= maxVersions);
|
||||
assertTrue(dataFiles.size() <= maxVersions,
|
||||
"fileId " + fileId + " has more than " + maxVersions + " versions");
|
||||
|
||||
// Each file, has the latest N versions (i.e cleaning gets rid of older versions)
|
||||
List<String> commitedVersions = new ArrayList<>(fileIdToVersions.get(fileId));
|
||||
for (int i = 0; i < dataFiles.size(); i++) {
|
||||
assertEquals("File " + fileId + " does not have latest versions on commits" + commitedVersions,
|
||||
(dataFiles.get(i)).getCommitTime(),
|
||||
commitedVersions.get(commitedVersions.size() - 1 - i));
|
||||
assertEquals((dataFiles.get(i)).getCommitTime(),
|
||||
commitedVersions.get(commitedVersions.size() - 1 - i),
|
||||
"File " + fileId + " does not have latest versions on commits" + commitedVersions);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -395,8 +395,8 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
LOG.debug("Data File - " + value);
|
||||
commitTimes.add(value.getCommitTime());
|
||||
});
|
||||
assertEquals("Only contain acceptable versions of file should be present",
|
||||
acceptableCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()), commitTimes);
|
||||
assertEquals(acceptableCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()), commitTimes,
|
||||
"Only contain acceptable versions of file should be present");
|
||||
}
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
@@ -432,17 +432,17 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
HoodieInstant completedCleanInstant = new HoodieInstant(State.COMPLETED, HoodieTimeline.CLEAN_ACTION, cleanInstantTs);
|
||||
metaClient.reloadActiveTimeline().revertToInflight(completedCleanInstant);
|
||||
HoodieCleanMetadata cleanMetadata2 = writeClient.clean(getNextInstant());
|
||||
Assert.assertEquals(cleanMetadata1.getEarliestCommitToRetain(), cleanMetadata2.getEarliestCommitToRetain());
|
||||
Assert.assertEquals(new Integer(0), cleanMetadata2.getTotalFilesDeleted());
|
||||
Assert.assertEquals(cleanMetadata1.getPartitionMetadata().keySet(), cleanMetadata2.getPartitionMetadata().keySet());
|
||||
assertEquals(cleanMetadata1.getEarliestCommitToRetain(), cleanMetadata2.getEarliestCommitToRetain());
|
||||
assertEquals(new Integer(0), cleanMetadata2.getTotalFilesDeleted());
|
||||
assertEquals(cleanMetadata1.getPartitionMetadata().keySet(), cleanMetadata2.getPartitionMetadata().keySet());
|
||||
final HoodieCleanMetadata retriedCleanMetadata = CleanerUtils.getCleanerMetadata(HoodieTableMetaClient.reload(metaClient), completedCleanInstant);
|
||||
cleanMetadata1.getPartitionMetadata().keySet().forEach(k -> {
|
||||
HoodieCleanPartitionMetadata p1 = cleanMetadata1.getPartitionMetadata().get(k);
|
||||
HoodieCleanPartitionMetadata p2 = retriedCleanMetadata.getPartitionMetadata().get(k);
|
||||
Assert.assertEquals(p1.getDeletePathPatterns(), p2.getDeletePathPatterns());
|
||||
Assert.assertEquals(p1.getSuccessDeleteFiles(), p2.getFailedDeleteFiles());
|
||||
Assert.assertEquals(p1.getPartitionPath(), p2.getPartitionPath());
|
||||
Assert.assertEquals(k, p1.getPartitionPath());
|
||||
assertEquals(p1.getDeletePathPatterns(), p2.getDeletePathPatterns());
|
||||
assertEquals(p1.getSuccessDeleteFiles(), p2.getFailedDeleteFiles());
|
||||
assertEquals(p1.getPartitionPath(), p2.getPartitionPath());
|
||||
assertEquals(k, p1.getPartitionPath());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -478,12 +478,12 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config);
|
||||
assertEquals("Must not clean any files", 0,
|
||||
assertEquals(0,
|
||||
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size());
|
||||
assertEquals("Must not clean any files", 0,
|
||||
.size(), "Must not clean any files");
|
||||
assertEquals(0,
|
||||
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size());
|
||||
.size(), "Must not clean any files");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
|
||||
file1P0C0));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
|
||||
@@ -501,12 +501,12 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config);
|
||||
assertEquals("Must clean 1 file", 1,
|
||||
assertEquals(1,
|
||||
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size());
|
||||
assertEquals("Must clean 1 file", 1,
|
||||
.size(), "Must clean 1 file");
|
||||
assertEquals(1,
|
||||
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size());
|
||||
.size(), "Must clean 1 file");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
file2P0C1));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001",
|
||||
@@ -526,9 +526,9 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config);
|
||||
assertEquals("Must clean two files", 2,
|
||||
assertEquals(2,
|
||||
getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
|
||||
.getSuccessDeleteFiles().size());
|
||||
.getSuccessDeleteFiles().size(), "Must clean two files");
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
file1P0C0));
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
@@ -539,9 +539,9 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
// No cleaning on partially written file, with no commit.
|
||||
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file3P0C2); // update
|
||||
List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config);
|
||||
assertEquals("Must not clean any files", 0,
|
||||
assertEquals(0,
|
||||
getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size());
|
||||
.size(), "Must not clean any files");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
|
||||
file3P0C2));
|
||||
}
|
||||
@@ -578,9 +578,9 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
HoodieTestUtils.createCompactionCommitFiles(fs, basePath, "001");
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStats = runCleaner(config);
|
||||
assertEquals("Must clean three files, one parquet and 2 log files", 3,
|
||||
assertEquals(3,
|
||||
getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size());
|
||||
.size(), "Must clean three files, one parquet and 2 log files");
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
|
||||
file1P0));
|
||||
assertFalse(HoodieTestUtils.doesLogFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
|
||||
@@ -646,37 +646,37 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
CleanMetadataMigrator migrator = new CleanMetadataMigrator(metaClient);
|
||||
HoodieCleanMetadata oldMetadata =
|
||||
migrator.migrateToVersion(metadata, metadata.getVersion(), CleanerUtils.CLEAN_METADATA_VERSION_1);
|
||||
Assert.assertEquals(CleanerUtils.CLEAN_METADATA_VERSION_1, oldMetadata.getVersion());
|
||||
assertEquals(CleanerUtils.CLEAN_METADATA_VERSION_1, oldMetadata.getVersion());
|
||||
testCleanMetadataEquality(metadata, oldMetadata);
|
||||
testCleanMetadataPathEquality(oldMetadata, oldExpected);
|
||||
|
||||
HoodieCleanMetadata newMetadata = migrator.upgradeToLatest(oldMetadata, oldMetadata.getVersion());
|
||||
Assert.assertEquals(CleanerUtils.LATEST_CLEAN_METADATA_VERSION, newMetadata.getVersion());
|
||||
assertEquals(CleanerUtils.LATEST_CLEAN_METADATA_VERSION, newMetadata.getVersion());
|
||||
testCleanMetadataEquality(oldMetadata, newMetadata);
|
||||
testCleanMetadataPathEquality(newMetadata, newExpected);
|
||||
testCleanMetadataPathEquality(oldMetadata, oldExpected);
|
||||
}
|
||||
|
||||
public void testCleanMetadataEquality(HoodieCleanMetadata input1, HoodieCleanMetadata input2) {
|
||||
Assert.assertEquals(input1.getEarliestCommitToRetain(), input2.getEarliestCommitToRetain());
|
||||
Assert.assertEquals(input1.getStartCleanTime(), input2.getStartCleanTime());
|
||||
Assert.assertEquals(input1.getTimeTakenInMillis(), input2.getTimeTakenInMillis());
|
||||
Assert.assertEquals(input1.getTotalFilesDeleted(), input2.getTotalFilesDeleted());
|
||||
assertEquals(input1.getEarliestCommitToRetain(), input2.getEarliestCommitToRetain());
|
||||
assertEquals(input1.getStartCleanTime(), input2.getStartCleanTime());
|
||||
assertEquals(input1.getTimeTakenInMillis(), input2.getTimeTakenInMillis());
|
||||
assertEquals(input1.getTotalFilesDeleted(), input2.getTotalFilesDeleted());
|
||||
|
||||
Map<String, HoodieCleanPartitionMetadata> map1 = input1.getPartitionMetadata();
|
||||
Map<String, HoodieCleanPartitionMetadata> map2 = input2.getPartitionMetadata();
|
||||
|
||||
Assert.assertEquals(map1.keySet(), map2.keySet());
|
||||
assertEquals(map1.keySet(), map2.keySet());
|
||||
|
||||
List<String> partitions1 = map1.values().stream().map(HoodieCleanPartitionMetadata::getPartitionPath).collect(
|
||||
Collectors.toList());
|
||||
List<String> partitions2 = map2.values().stream().map(HoodieCleanPartitionMetadata::getPartitionPath).collect(
|
||||
Collectors.toList());
|
||||
Assert.assertEquals(partitions1, partitions2);
|
||||
assertEquals(partitions1, partitions2);
|
||||
|
||||
List<String> policies1 = map1.values().stream().map(HoodieCleanPartitionMetadata::getPolicy).collect(Collectors.toList());
|
||||
List<String> policies2 = map2.values().stream().map(HoodieCleanPartitionMetadata::getPolicy).collect(Collectors.toList());
|
||||
Assert.assertEquals(policies1, policies2);
|
||||
assertEquals(policies1, policies2);
|
||||
}
|
||||
|
||||
private void testCleanMetadataPathEquality(HoodieCleanMetadata metadata, Map<String, Tuple3> expected) {
|
||||
@@ -687,9 +687,9 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
String partitionPath = entry.getKey();
|
||||
HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
|
||||
|
||||
Assert.assertEquals(expected.get(partitionPath)._1(), partitionMetadata.getDeletePathPatterns());
|
||||
Assert.assertEquals(expected.get(partitionPath)._2(), partitionMetadata.getSuccessDeleteFiles());
|
||||
Assert.assertEquals(expected.get(partitionPath)._3(), partitionMetadata.getFailedDeleteFiles());
|
||||
assertEquals(expected.get(partitionPath)._1(), partitionMetadata.getDeletePathPatterns());
|
||||
assertEquals(expected.get(partitionPath)._2(), partitionMetadata.getSuccessDeleteFiles());
|
||||
assertEquals(expected.get(partitionPath)._3(), partitionMetadata.getFailedDeleteFiles());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -751,12 +751,12 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry);
|
||||
assertEquals("Must not clean any files", 0,
|
||||
assertEquals(0,
|
||||
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size());
|
||||
assertEquals("Must not clean any files", 0,
|
||||
.size(), "Must not clean any files");
|
||||
assertEquals(0,
|
||||
getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size());
|
||||
.size(), "Must not clean any files");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
|
||||
file1P0C0));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
|
||||
@@ -786,12 +786,12 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "001"),
|
||||
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry);
|
||||
assertEquals("Must not clean any files", 0,
|
||||
assertEquals(0,
|
||||
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size());
|
||||
assertEquals("Must not clean any files", 0,
|
||||
.size(), "Must not clean any files");
|
||||
assertEquals(0,
|
||||
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size());
|
||||
.size(), "Must not clean any files");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
file2P0C1));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001",
|
||||
@@ -820,9 +820,10 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config, simulateFailureRetry);
|
||||
assertEquals("Must not clean any file. We have to keep 1 version before the latest commit time to keep", 0,
|
||||
assertEquals(0,
|
||||
getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
|
||||
.getSuccessDeleteFiles().size());
|
||||
.getSuccessDeleteFiles().size(),
|
||||
"Must not clean any file. We have to keep 1 version before the latest commit time to keep");
|
||||
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
|
||||
file1P0C0));
|
||||
@@ -844,9 +845,9 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config, simulateFailureRetry);
|
||||
assertEquals("Must not clean one old file", 1,
|
||||
assertEquals(1,
|
||||
getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size());
|
||||
.size(), "Must not clean one old file");
|
||||
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
|
||||
file1P0C0));
|
||||
@@ -867,7 +868,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
HoodieTestUtils
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "004", file3P0C2); // update
|
||||
commitMetadata = generateCommitMetadata(CollectionUtils.createImmutableMap(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
|
||||
CollectionUtils.createImmutableList(file3P0C2)));
|
||||
CollectionUtils.createImmutableList(file3P0C2)));
|
||||
metaClient.getActiveTimeline().createNewInstant(
|
||||
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "004"));
|
||||
metaClient.getActiveTimeline().transitionRequestedToInflight(
|
||||
@@ -875,8 +876,8 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
List<HoodieCleanStat> hoodieCleanStatsFive = runCleaner(config, simulateFailureRetry);
|
||||
HoodieCleanStat cleanStat = getCleanStat(hoodieCleanStatsFive, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
|
||||
assertEquals("Must not clean any files", 0,
|
||||
cleanStat != null ? cleanStat.getSuccessDeleteFiles().size() : 0);
|
||||
assertEquals(0,
|
||||
cleanStat != null ? cleanStat.getSuccessDeleteFiles().size() : 0, "Must not clean any files");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
file1P0C0));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
@@ -889,8 +890,8 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
@Test
|
||||
public void testCleanMarkerDataFilesOnRollback() throws IOException {
|
||||
List<String> markerFiles = createMarkerFiles("000", 10);
|
||||
assertEquals("Some marker files are created.", 10, markerFiles.size());
|
||||
assertEquals("Some marker files are created.", markerFiles.size(), getTotalTempFiles());
|
||||
assertEquals(10, markerFiles.size(), "Some marker files are created.");
|
||||
assertEquals(markerFiles.size(), getTotalTempFiles(), "Some marker files are created.");
|
||||
|
||||
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
@@ -901,7 +902,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "000"), Option.empty());
|
||||
metaClient.reloadActiveTimeline();
|
||||
table.rollback(jsc, "001", new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "000"), true);
|
||||
assertEquals("All temp files are deleted.", 0, getTotalTempFiles());
|
||||
assertEquals(0, getTotalTempFiles(), "All temp files are deleted.");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -922,7 +923,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config);
|
||||
assertTrue("HoodieCleanStats should be empty for a table with empty partitionPaths", hoodieCleanStatsOne.isEmpty());
|
||||
assertTrue(hoodieCleanStatsOne.isEmpty(), "HoodieCleanStats should be empty for a table with empty partitionPaths");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -998,7 +999,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
List<HoodieCleanStat> cleanStats = runCleaner(config);
|
||||
assertEquals("Must not clean any files", 0, cleanStats.size());
|
||||
assertEquals(0, cleanStats.size(), "Must not clean any files");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1088,11 +1089,11 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
String baseInstantForCompaction = fileIdToLatestInstantBeforeCompaction.get(fileId);
|
||||
Option<FileSlice> fileSliceForCompaction = Option.fromJavaOptional(hoodieTable.getSliceView()
|
||||
.getLatestFileSlicesBeforeOrOn(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, baseInstantForCompaction,
|
||||
true)
|
||||
true)
|
||||
.filter(fs -> fs.getFileId().equals(fileId)).findFirst());
|
||||
Assert.assertTrue("Base Instant for Compaction must be preserved", fileSliceForCompaction.isPresent());
|
||||
Assert.assertTrue("FileSlice has data-file", fileSliceForCompaction.get().getBaseFile().isPresent());
|
||||
Assert.assertEquals("FileSlice has log-files", 2, fileSliceForCompaction.get().getLogFiles().count());
|
||||
assertTrue(fileSliceForCompaction.isPresent(), "Base Instant for Compaction must be preserved");
|
||||
assertTrue(fileSliceForCompaction.get().getBaseFile().isPresent(), "FileSlice has data-file");
|
||||
assertEquals(2, fileSliceForCompaction.get().getLogFiles().count(), "FileSlice has log-files");
|
||||
});
|
||||
|
||||
// Test for progress (Did we clean some files ?)
|
||||
@@ -1100,10 +1101,10 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
.flatMap(cleanStat -> convertPathToFileIdWithCommitTime(newMetaClient, cleanStat.getDeletePathPatterns())
|
||||
.map(fileIdWithCommitTime -> {
|
||||
if (expFileIdToPendingCompaction.containsKey(fileIdWithCommitTime.getKey())) {
|
||||
Assert.assertTrue("Deleted instant time must be less than pending compaction",
|
||||
HoodieTimeline.compareTimestamps(
|
||||
fileIdToLatestInstantBeforeCompaction.get(fileIdWithCommitTime.getKey()),
|
||||
fileIdWithCommitTime.getValue(), HoodieTimeline.GREATER));
|
||||
assertTrue(HoodieTimeline.compareTimestamps(
|
||||
fileIdToLatestInstantBeforeCompaction.get(fileIdWithCommitTime.getKey()),
|
||||
fileIdWithCommitTime.getValue(), HoodieTimeline.GREATER),
|
||||
"Deleted instant time must be less than pending compaction");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@@ -1111,9 +1112,9 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
long numDeleted =
|
||||
hoodieCleanStats.stream().mapToLong(cleanStat -> cleanStat.getDeletePathPatterns().size()).sum();
|
||||
// Tighter check for regression
|
||||
Assert.assertEquals("Correct number of files deleted", expNumFilesDeleted, numDeleted);
|
||||
Assert.assertEquals("Correct number of files under compaction deleted", expNumFilesUnderCompactionDeleted,
|
||||
numFilesUnderCompactionDeleted);
|
||||
assertEquals(expNumFilesDeleted, numDeleted, "Correct number of files deleted");
|
||||
assertEquals(expNumFilesUnderCompactionDeleted, numFilesUnderCompactionDeleted,
|
||||
"Correct number of files under compaction deleted");
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -25,22 +25,24 @@ import org.apache.hudi.common.fs.ConsistencyGuardConfig;
|
||||
import org.apache.hudi.common.fs.FailSafeConsistencyGuard;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
public class TestConsistencyGuard extends HoodieClientTestHarness {
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setup() {
|
||||
initPath();
|
||||
initFileSystemWithDefaultConfiguration();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
cleanupFileSystem();
|
||||
}
|
||||
@@ -65,35 +67,43 @@ public class TestConsistencyGuard extends HoodieClientTestHarness {
|
||||
.asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-1_000.parquet"));
|
||||
}
|
||||
|
||||
@Test(expected = TimeoutException.class)
|
||||
@Test
|
||||
public void testCheckFailingAppear() throws Exception {
|
||||
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
||||
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
|
||||
passing.waitTillAllFilesAppear(basePath + "/partition/path", Arrays
|
||||
.asList(basePath + "/partition/path/f1_1-0-2_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet"));
|
||||
assertThrows(TimeoutException.class, () -> {
|
||||
passing.waitTillAllFilesAppear(basePath + "/partition/path", Arrays
|
||||
.asList(basePath + "/partition/path/f1_1-0-2_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet"));
|
||||
});
|
||||
}
|
||||
|
||||
@Test(expected = TimeoutException.class)
|
||||
@Test
|
||||
public void testCheckFailingAppears() throws Exception {
|
||||
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
||||
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
|
||||
passing.waitTillFileAppears(new Path(basePath + "/partition/path/f1_1-0-2_000.parquet"));
|
||||
assertThrows(TimeoutException.class, () -> {
|
||||
passing.waitTillFileAppears(new Path(basePath + "/partition/path/f1_1-0-2_000.parquet"));
|
||||
});
|
||||
}
|
||||
|
||||
@Test(expected = TimeoutException.class)
|
||||
@Test
|
||||
public void testCheckFailingDisappear() throws Exception {
|
||||
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
||||
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
|
||||
passing.waitTillAllFilesDisappear(basePath + "/partition/path", Arrays
|
||||
.asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet"));
|
||||
assertThrows(TimeoutException.class, () -> {
|
||||
passing.waitTillAllFilesDisappear(basePath + "/partition/path", Arrays
|
||||
.asList(basePath + "/partition/path/f1_1-0-1_000.parquet", basePath + "/partition/path/f2_1-0-2_000.parquet"));
|
||||
});
|
||||
}
|
||||
|
||||
@Test(expected = TimeoutException.class)
|
||||
@Test
|
||||
public void testCheckFailingDisappears() throws Exception {
|
||||
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
||||
HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1");
|
||||
ConsistencyGuard passing = new FailSafeConsistencyGuard(fs, getConsistencyGuardConfig());
|
||||
passing.waitTillFileDisappears(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"));
|
||||
assertThrows(TimeoutException.class, () -> {
|
||||
passing.waitTillFileDisappears(new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"));
|
||||
});
|
||||
}
|
||||
|
||||
private ConsistencyGuardConfig getConsistencyGuardConfig() {
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
|
||||
package org.apache.hudi.table;
|
||||
|
||||
import org.apache.hadoop.mapred.FileInputFormat;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hudi.client.HoodieReadClient;
|
||||
import org.apache.hudi.client.HoodieWriteClient;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
@@ -57,23 +55,27 @@ import org.apache.hudi.hadoop.HoodieParquetInputFormat;
|
||||
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.index.HoodieIndex.IndexType;
|
||||
import org.apache.hudi.table.action.deltacommit.DeleteDeltaCommitActionExecutor;
|
||||
import org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.table.action.deltacommit.DeltaCommitActionExecutor;
|
||||
import org.apache.hudi.table.action.deltacommit.DeleteDeltaCommitActionExecutor;
|
||||
import org.apache.hadoop.mapred.FileInputFormat;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.TemporaryFolder;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -82,9 +84,9 @@ import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
@@ -94,7 +96,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
private HoodieParquetRealtimeInputFormat rtInputFormat;
|
||||
private JobConf rtJobConf;
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void init() throws IOException {
|
||||
initDFS();
|
||||
initSparkContexts("TestHoodieMergeOnReadTable");
|
||||
@@ -114,7 +116,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
rtInputFormat.setConf(rtJobConf);
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void clean() throws IOException {
|
||||
cleanupDFS();
|
||||
cleanupSparkContexts();
|
||||
@@ -159,13 +161,13 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
// verify that there is a commit
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTimeline timeline = metaClient.getCommitTimeline().filterCompletedInstants();
|
||||
assertEquals("Expecting a single commit.", 1,
|
||||
timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
|
||||
assertEquals(1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(),
|
||||
"Expecting a single commit.");
|
||||
String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp();
|
||||
assertTrue(HoodieTimeline.compareTimestamps("000", latestCompactionCommitTime, HoodieTimeline.LESSER));
|
||||
|
||||
assertEquals("Must contain 200 records", 200,
|
||||
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count());
|
||||
assertEquals(200, HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count(),
|
||||
"Must contain 200 records");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -310,7 +312,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
|
||||
assertTrue(deltaCommit.isPresent());
|
||||
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
|
||||
assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
|
||||
|
||||
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
|
||||
assertFalse(commit.isPresent());
|
||||
@@ -323,8 +325,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue("should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead.findAny().isPresent());
|
||||
assertTrue(dataFilesToRead.findAny().isPresent(),
|
||||
"should list the parquet files we wrote in the delta commit");
|
||||
|
||||
/**
|
||||
* Write 2 (only updates, written to .log file)
|
||||
@@ -352,7 +354,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
|
||||
assertTrue(deltaCommit.isPresent());
|
||||
assertEquals("Latest Delta commit should be 004", "004", deltaCommit.get().getTimestamp());
|
||||
assertEquals("004", deltaCommit.get().getTimestamp(), "Latest Delta commit should be 004");
|
||||
|
||||
commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
|
||||
assertFalse(commit.isPresent());
|
||||
@@ -365,7 +367,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
|
||||
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
// Wrote 20 records and deleted 20 records, so remaining 20-20 = 0
|
||||
assertEquals("Must contain 0 records", 0, recordsRead.size());
|
||||
assertEquals(0, recordsRead.size(), "Must contain 0 records");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -394,7 +396,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
|
||||
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
|
||||
assertTrue(commit.isPresent());
|
||||
assertEquals("commit should be 001", "001", commit.get().getTimestamp());
|
||||
assertEquals("001", commit.get().getTimestamp(), "commit should be 001");
|
||||
|
||||
/**
|
||||
* Write 2 (updates)
|
||||
@@ -451,7 +453,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
|
||||
assertTrue(deltaCommit.isPresent());
|
||||
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
|
||||
assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
|
||||
|
||||
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
|
||||
assertFalse(commit.isPresent());
|
||||
@@ -464,8 +466,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue("should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead.findAny().isPresent());
|
||||
assertTrue(dataFilesToRead.findAny().isPresent(),
|
||||
"should list the parquet files we wrote in the delta commit");
|
||||
|
||||
/**
|
||||
* Write 2 (inserts + updates - testing failed delta commit)
|
||||
@@ -491,11 +493,11 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
secondClient.rollback(commitTime1);
|
||||
allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
|
||||
// After rollback, there should be no parquet file with the failed commit time
|
||||
Assert.assertEquals(Arrays.stream(allFiles)
|
||||
.filter(file -> file.getPath().getName().contains(commitTime1)).count(), 0);
|
||||
assertEquals(0, Arrays.stream(allFiles)
|
||||
.filter(file -> file.getPath().getName().contains(commitTime1)).count());
|
||||
dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
|
||||
recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
assertEquals(recordsRead.size(), 200);
|
||||
assertEquals(200, recordsRead.size());
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -511,7 +513,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
|
||||
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
assertEquals(recordsRead.size(), 200);
|
||||
assertEquals(200, recordsRead.size());
|
||||
|
||||
writeRecords = jsc.parallelize(copyOfRecords, 1);
|
||||
writeStatusJavaRDD = thirdClient.upsert(writeRecords, commitTime2);
|
||||
@@ -524,8 +526,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
thirdClient.rollback(commitTime2);
|
||||
allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
|
||||
// After rollback, there should be no parquet file with the failed commit time
|
||||
Assert.assertEquals(Arrays.stream(allFiles)
|
||||
.filter(file -> file.getPath().getName().contains(commitTime2)).count(), 0);
|
||||
assertEquals(0, Arrays.stream(allFiles)
|
||||
.filter(file -> file.getPath().getName().contains(commitTime2)).count());
|
||||
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
hoodieTable = HoodieTable.create(metaClient, cfg, jsc);
|
||||
@@ -533,7 +535,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
|
||||
recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
// check that the number of records read is still correct after rollback operation
|
||||
assertEquals(recordsRead.size(), 200);
|
||||
assertEquals(200, recordsRead.size());
|
||||
|
||||
// Test compaction commit rollback
|
||||
/**
|
||||
@@ -598,7 +600,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
|
||||
assertTrue(deltaCommit.isPresent());
|
||||
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
|
||||
assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
|
||||
|
||||
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
|
||||
assertFalse(commit.isPresent());
|
||||
@@ -611,8 +613,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue("Should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead.findAny().isPresent());
|
||||
assertTrue(dataFilesToRead.findAny().isPresent(),
|
||||
"Should list the parquet files we wrote in the delta commit");
|
||||
|
||||
/**
|
||||
* Write 2 (inserts + updates)
|
||||
@@ -628,7 +630,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
List<String> dataFiles = roView.getLatestBaseFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
|
||||
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
assertEquals(recordsRead.size(), 200);
|
||||
assertEquals(200, recordsRead.size());
|
||||
|
||||
statuses = nClient.upsert(jsc.parallelize(copyOfRecords, 1), newCommitTime).collect();
|
||||
// Verify there are no errors
|
||||
@@ -761,7 +763,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
|
||||
assertTrue(deltaCommit.isPresent());
|
||||
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
|
||||
assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
|
||||
|
||||
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
|
||||
assertFalse(commit.isPresent());
|
||||
@@ -776,8 +778,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
List<HoodieBaseFile> dataFilesList = dataFilesToRead.collect(Collectors.toList());
|
||||
assertTrue("Should list the parquet files we wrote in the delta commit",
|
||||
dataFilesList.size() > 0);
|
||||
assertTrue(dataFilesList.size() > 0,
|
||||
"Should list the parquet files we wrote in the delta commit");
|
||||
|
||||
/**
|
||||
* Write 2 (only updates + inserts, written to .log file + correction of existing parquet file size)
|
||||
@@ -795,7 +797,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
|
||||
assertTrue(deltaCommit.isPresent());
|
||||
assertEquals("Latest Delta commit should be 002", "002", deltaCommit.get().getTimestamp());
|
||||
assertEquals("002", deltaCommit.get().getTimestamp(), "Latest Delta commit should be 002");
|
||||
|
||||
commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
|
||||
assertFalse(commit.isPresent());
|
||||
@@ -813,7 +815,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
|
||||
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
// Wrote 20 records in 2 batches
|
||||
assertEquals("Must contain 40 records", 40, recordsRead.size());
|
||||
assertEquals(40, recordsRead.size(), "Must contain 40 records");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -855,7 +857,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
List<FileSlice> groupedLogFiles =
|
||||
table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
||||
for (FileSlice fileSlice : groupedLogFiles) {
|
||||
assertEquals("There should be 1 log file written for every data file", 1, fileSlice.getLogFiles().count());
|
||||
assertEquals(1, fileSlice.getLogFiles().count(), "There should be 1 log file written for every data file");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -874,14 +876,15 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
table = HoodieTable.create(metaClient, config, jsc);
|
||||
HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
|
||||
|
||||
assertTrue("Compaction commit should be > than last insert", HoodieTimeline
|
||||
.compareTimestamps(timeline.lastInstant().get().getTimestamp(), newCommitTime, HoodieTimeline.GREATER));
|
||||
assertTrue(HoodieTimeline
|
||||
.compareTimestamps(timeline.lastInstant().get().getTimestamp(), newCommitTime, HoodieTimeline.GREATER),
|
||||
"Compaction commit should be > than last insert");
|
||||
|
||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||
List<FileSlice> groupedLogFiles =
|
||||
table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
||||
for (FileSlice slice : groupedLogFiles) {
|
||||
assertEquals("After compaction there should be no log files visible on a full view", 0, slice.getLogFiles().count());
|
||||
assertEquals(0, slice.getLogFiles().count(), "After compaction there should be no log files visible on a full view");
|
||||
}
|
||||
List<WriteStatus> writeStatuses = result.collect();
|
||||
assertTrue(writeStatuses.stream().anyMatch(writeStatus -> writeStatus.getStat().getPartitionPath().contentEquals(partitionPath)));
|
||||
@@ -911,23 +914,23 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||
assertEquals(0, tableRTFileSystemView.getLatestFileSlices(partitionPath)
|
||||
.filter(fileSlice -> fileSlice.getBaseFile().isPresent()).count());
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
|
||||
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
|
||||
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
|
||||
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
|
||||
}
|
||||
|
||||
Assert.assertTrue(numLogFiles > 0);
|
||||
assertTrue(numLogFiles > 0);
|
||||
// Do a compaction
|
||||
String instantTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
|
||||
statuses = writeClient.compact(instantTime);
|
||||
assertEquals(statuses.map(status -> status.getStat().getPath().contains("parquet")).count(), numLogFiles);
|
||||
Assert.assertEquals(statuses.count(), numLogFiles);
|
||||
assertEquals(statuses.count(), numLogFiles);
|
||||
writeClient.commitCompaction(instantTime, statuses, Option.empty());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInsertsGeneratedIntoLogFilesRollback() throws Exception {
|
||||
public void testInsertsGeneratedIntoLogFilesRollback(@TempDir java.nio.file.Path tempFolder) throws Exception {
|
||||
// insert 100 records
|
||||
// Setting IndexType to be InMemory to simulate Global Index nature
|
||||
HoodieWriteConfig config = getConfigBuilder(false, IndexType.INMEMORY).build();
|
||||
@@ -942,14 +945,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
List<WriteStatus> writeStatuses = statuses.collect();
|
||||
|
||||
// Ensure that inserts are written to only log files
|
||||
Assert.assertEquals(
|
||||
writeStatuses.stream().filter(writeStatus -> !writeStatus.getStat().getPath().contains("log")).count(), 0);
|
||||
Assert.assertTrue(
|
||||
assertEquals(0,
|
||||
writeStatuses.stream().filter(writeStatus -> !writeStatus.getStat().getPath().contains("log")).count());
|
||||
assertTrue(
|
||||
writeStatuses.stream().anyMatch(writeStatus -> writeStatus.getStat().getPath().contains("log")));
|
||||
|
||||
// rollback a failed commit
|
||||
boolean rollback = writeClient.rollback(newCommitTime);
|
||||
Assert.assertTrue(rollback);
|
||||
assertTrue(rollback);
|
||||
newCommitTime = "101";
|
||||
writeClient.startCommitWithTime(newCommitTime);
|
||||
|
||||
@@ -972,9 +975,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
// Save the .commit file to local directory.
|
||||
// Rollback will be called twice to test the case where rollback failed first time and retried.
|
||||
// We got the "BaseCommitTime cannot be null" exception before the fix
|
||||
TemporaryFolder folder = new TemporaryFolder();
|
||||
folder.create();
|
||||
File file = folder.newFile();
|
||||
File file = Files.createTempFile(tempFolder, null, null).toFile();
|
||||
metaClient.getFs().copyToLocalFile(new Path(metaClient.getMetaPath(), fileName),
|
||||
new Path(file.getAbsolutePath()));
|
||||
writeClient.rollback(newCommitTime);
|
||||
@@ -985,8 +986,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
long numLogFiles = 0;
|
||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
|
||||
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
|
||||
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
|
||||
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
|
||||
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
|
||||
}
|
||||
@@ -996,7 +997,6 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
Thread.sleep(1000);
|
||||
// Rollback again to pretend the first rollback failed partially. This should not error our
|
||||
writeClient.rollback(newCommitTime);
|
||||
folder.delete();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1022,19 +1022,19 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
long numLogFiles = 0;
|
||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
|
||||
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
|
||||
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
|
||||
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
|
||||
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
|
||||
}
|
||||
|
||||
Assert.assertTrue(numLogFiles > 0);
|
||||
assertTrue(numLogFiles > 0);
|
||||
// Do a compaction
|
||||
newCommitTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
|
||||
statuses = writeClient.compact(newCommitTime);
|
||||
// Ensure all log files have been compacted into parquet files
|
||||
assertEquals(statuses.map(status -> status.getStat().getPath().contains("parquet")).count(), numLogFiles);
|
||||
Assert.assertEquals(statuses.count(), numLogFiles);
|
||||
assertEquals(statuses.count(), numLogFiles);
|
||||
writeClient.commitCompaction(newCommitTime, statuses, Option.empty());
|
||||
// Trigger a rollback of compaction
|
||||
writeClient.rollback(newCommitTime);
|
||||
@@ -1044,8 +1044,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
Option<HoodieInstant> lastInstant = ((SyncableFileSystemView) tableRTFileSystemView).getLastInstant();
|
||||
System.out.println("Last Instant =" + lastInstant);
|
||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
|
||||
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
|
||||
assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1077,7 +1077,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
|
||||
|
||||
JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime);
|
||||
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
|
||||
assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
|
||||
|
||||
// Read from commit file
|
||||
table = HoodieTable.create(cfg, jsc);
|
||||
@@ -1094,14 +1094,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
inserts += stat.getValue().getInserts();
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(inserts, 200);
|
||||
assertEquals(200, inserts);
|
||||
|
||||
instantTime = "002";
|
||||
client.startCommitWithTime(instantTime);
|
||||
records = dataGen.generateUpdates(instantTime, records);
|
||||
writeRecords = jsc.parallelize(records, 1);
|
||||
statuses = client.upsert(writeRecords, instantTime);
|
||||
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
|
||||
assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
|
||||
|
||||
// Read from commit file
|
||||
table = HoodieTable.create(cfg, jsc);
|
||||
@@ -1122,8 +1122,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertEquals(inserts, 200);
|
||||
Assert.assertEquals(upserts, 200);
|
||||
assertEquals(200, inserts);
|
||||
assertEquals(200, upserts);
|
||||
|
||||
client.rollback(instantTime);
|
||||
|
||||
@@ -1145,8 +1145,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
upserts += stat.getValue().getUpserts();
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(inserts, 200);
|
||||
Assert.assertEquals(upserts, 0);
|
||||
assertEquals(200, inserts);
|
||||
assertEquals(0, upserts);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1168,7 +1168,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
|
||||
|
||||
JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime);
|
||||
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
|
||||
assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
|
||||
|
||||
// Read from commit file
|
||||
HoodieTable table = HoodieTable.create(cfg, jsc);
|
||||
@@ -1188,7 +1188,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
fileIdToUpsertsMap.put(stat.getKey(), stat.getValue().getUpserts());
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(inserts, 200);
|
||||
assertEquals(200, inserts);
|
||||
|
||||
instantTime = "001";
|
||||
client.startCommitWithTime(instantTime);
|
||||
@@ -1197,7 +1197,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
records.addAll(dataGen.generateInserts(instantTime, 200));
|
||||
writeRecords = jsc.parallelize(records, 1);
|
||||
statuses = client.upsert(writeRecords, instantTime);
|
||||
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
|
||||
assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
|
||||
|
||||
// Read from commit file
|
||||
table = HoodieTable.create(cfg, jsc);
|
||||
@@ -1221,8 +1221,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertEquals(inserts, 400);
|
||||
Assert.assertEquals(upserts, 200);
|
||||
assertEquals(400, inserts);
|
||||
assertEquals(200, upserts);
|
||||
|
||||
// Test small file handling after compaction
|
||||
instantTime = "002";
|
||||
@@ -1243,8 +1243,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
// Ensure that the rolling stats from the extra metadata of delta commits is copied over to the compaction commit
|
||||
for (Map.Entry<String, Map<String, HoodieRollingStat>> entry : rollingStatMetadata.getPartitionToRollingStats()
|
||||
.entrySet()) {
|
||||
Assert.assertTrue(rollingStatMetadata1.getPartitionToRollingStats().containsKey(entry.getKey()));
|
||||
Assert.assertEquals(rollingStatMetadata1.getPartitionToRollingStats().get(entry.getKey()).size(),
|
||||
assertTrue(rollingStatMetadata1.getPartitionToRollingStats().containsKey(entry.getKey()));
|
||||
assertEquals(rollingStatMetadata1.getPartitionToRollingStats().get(entry.getKey()).size(),
|
||||
entry.getValue().size());
|
||||
}
|
||||
|
||||
@@ -1256,7 +1256,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
records.addAll(dataGen.generateInserts(instantTime, 200));
|
||||
writeRecords = jsc.parallelize(records, 1);
|
||||
statuses = client.upsert(writeRecords, instantTime);
|
||||
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
|
||||
assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
|
||||
|
||||
// Read from commit file
|
||||
table = HoodieTable.create(cfg, jsc);
|
||||
@@ -1279,8 +1279,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertEquals(inserts, 600);
|
||||
Assert.assertEquals(upserts, 600);
|
||||
assertEquals(600, inserts);
|
||||
assertEquals(600, upserts);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1309,21 +1309,21 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
|
||||
assertTrue(deltaCommit.isPresent());
|
||||
assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
|
||||
assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
|
||||
|
||||
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
|
||||
assertFalse(commit.isPresent());
|
||||
|
||||
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
|
||||
BaseFileOnlyView roView =
|
||||
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
|
||||
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
|
||||
Stream<HoodieBaseFile> dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertFalse(dataFilesToRead.findAny().isPresent());
|
||||
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue("should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead.findAny().isPresent());
|
||||
assertTrue(dataFilesToRead.findAny().isPresent(),
|
||||
"should list the parquet files we wrote in the delta commit");
|
||||
|
||||
/**
|
||||
* Write 2 (only updates, written to .log file)
|
||||
@@ -1386,7 +1386,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
private void assertNoWriteErrors(List<WriteStatus> statuses) {
|
||||
// Verify there are no errors
|
||||
for (WriteStatus status : statuses) {
|
||||
assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
|
||||
assertFalse(status.hasErrors(), "Errors found in write of " + status.getFileId());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1402,21 +1402,21 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
|
||||
assertTrue(deltaCommit.isPresent());
|
||||
Assert.assertEquals("Delta commit should be specified value", commitTime, deltaCommit.get().getTimestamp());
|
||||
assertEquals(commitTime, deltaCommit.get().getTimestamp(), "Delta commit should be specified value");
|
||||
|
||||
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().lastInstant();
|
||||
assertFalse(commit.isPresent());
|
||||
|
||||
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
|
||||
BaseFileOnlyView roView =
|
||||
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
|
||||
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
|
||||
Stream<HoodieBaseFile> dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue(!dataFilesToRead.findAny().isPresent());
|
||||
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue("should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead.findAny().isPresent());
|
||||
assertTrue(dataFilesToRead.findAny().isPresent(),
|
||||
"should list the parquet files we wrote in the delta commit");
|
||||
return allFiles;
|
||||
}
|
||||
|
||||
@@ -1435,8 +1435,8 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
|
||||
assertTrue(deltaCommit.isPresent());
|
||||
assertEquals("Latest Delta commit should match specified time",
|
||||
commitTime, deltaCommit.get().getTimestamp());
|
||||
assertEquals(commitTime, deltaCommit.get().getTimestamp(),
|
||||
"Latest Delta commit should match specified time");
|
||||
|
||||
Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
|
||||
assertFalse(commit.isPresent());
|
||||
@@ -1452,7 +1452,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
throws Exception {
|
||||
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ);
|
||||
setupIncremental(roJobConf, startCommitTime, numCommitsToPull, stopAtCompaction);
|
||||
FileInputFormat.setInputPaths(roJobConf, basePath + "/" + partitionPath);
|
||||
FileInputFormat.setInputPaths(roJobConf, Paths.get(basePath, partitionPath).toString());
|
||||
return roInputFormat.listStatus(roJobConf);
|
||||
}
|
||||
|
||||
@@ -1465,7 +1465,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
throws Exception {
|
||||
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ);
|
||||
setupIncremental(rtJobConf, startCommitTime, numCommitsToPull, false);
|
||||
FileInputFormat.setInputPaths(rtJobConf, basePath + "/" + partitionPath);
|
||||
FileInputFormat.setInputPaths(rtJobConf, Paths.get(basePath, partitionPath).toString());
|
||||
return rtInputFormat.listStatus(rtJobConf);
|
||||
}
|
||||
|
||||
@@ -1492,9 +1492,9 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
JobConf jobConf, int expectedRecords, String... expectedCommits) {
|
||||
|
||||
assertEquals(expectedNumFiles, files.length);
|
||||
Set<String> expectedCommitsSet = Arrays.asList(expectedCommits).stream().collect(Collectors.toSet());
|
||||
Set<String> expectedCommitsSet = Arrays.stream(expectedCommits).collect(Collectors.toSet());
|
||||
List<GenericRecord> records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(
|
||||
Arrays.asList(basePath + "/" + partitionPath), basePath, jobConf, inputFormat);
|
||||
Collections.singletonList(Paths.get(basePath, partitionPath).toString()), basePath, jobConf, inputFormat);
|
||||
assertEquals(expectedRecords, records.size());
|
||||
Set<String> actualCommits = records.stream().map(r ->
|
||||
r.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString()).collect(Collectors.toSet());
|
||||
|
||||
@@ -47,26 +47,25 @@ import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.mapred.FileInputFormat;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.parquet.avro.AvroReadSupport;
|
||||
import org.apache.parquet.hadoop.ParquetReader;
|
||||
import org.apache.spark.TaskContext;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@@ -74,7 +73,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(TestCopyOnWriteActionExecutor.class);
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initSparkContexts("TestCopyOnWriteActionExecutor");
|
||||
initPath();
|
||||
@@ -83,7 +82,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
|
||||
initFileSystem();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
cleanupSparkContexts();
|
||||
cleanupMetaClient();
|
||||
@@ -110,8 +109,8 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
|
||||
return Pair.of(io.makeNewPath(record.getPartitionPath()), writeToken);
|
||||
}).collect().get(0);
|
||||
|
||||
Assert.assertEquals(newPathWithWriteToken.getKey().toString(), this.basePath + "/" + partitionPath + "/"
|
||||
+ FSUtils.makeDataFileName(instantTime, newPathWithWriteToken.getRight(), fileName));
|
||||
assertEquals(newPathWithWriteToken.getKey().toString(), Paths.get(this.basePath, partitionPath,
|
||||
FSUtils.makeDataFileName(instantTime, newPathWithWriteToken.getRight(), fileName)).toString());
|
||||
}
|
||||
|
||||
private HoodieWriteConfig makeHoodieClientConfig() throws Exception {
|
||||
@@ -134,7 +133,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
|
||||
writeClient.startCommitWithTime(firstCommitTime);
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
String partitionPath = "/2016/01/31";
|
||||
String partitionPath = "2016/01/31";
|
||||
HoodieCopyOnWriteTable table = (HoodieCopyOnWriteTable) HoodieTable.create(metaClient, config, jsc);
|
||||
|
||||
// Get some records belong to the same partition (2016/01/31)
|
||||
@@ -227,7 +226,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
|
||||
updatedReader.close();
|
||||
// Also check the numRecordsWritten
|
||||
WriteStatus writeStatus = statuses.get(0);
|
||||
assertEquals("Should be only one file generated", 1, statuses.size());
|
||||
assertEquals(1, statuses.size(), "Should be only one file generated");
|
||||
assertEquals(4, writeStatus.getStat().getNumWrites());// 3 rewritten records + 1 new record
|
||||
}
|
||||
|
||||
@@ -239,7 +238,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
|
||||
hoodieInputFormat.setConf(jobConf);
|
||||
HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.COPY_ON_WRITE);
|
||||
setupIncremental(jobConf, startCommitTime, numCommitsToPull);
|
||||
FileInputFormat.setInputPaths(jobConf, basePath + partitionPath);
|
||||
FileInputFormat.setInputPaths(jobConf, Paths.get(basePath, partitionPath).toString());
|
||||
return hoodieInputFormat.listStatus(jobConf);
|
||||
}
|
||||
|
||||
@@ -390,13 +389,13 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
|
||||
|
||||
// Check the updated file
|
||||
int counts = 0;
|
||||
for (File file : new File(basePath + "/2016/01/31").listFiles()) {
|
||||
for (File file : Paths.get(basePath, "2016/01/31").toFile().listFiles()) {
|
||||
if (file.getName().endsWith(".parquet") && FSUtils.getCommitTime(file.getName()).equals(instantTime)) {
|
||||
LOG.info(file.getName() + "-" + file.length());
|
||||
counts++;
|
||||
}
|
||||
}
|
||||
assertEquals("If the number of records are more than 1150, then there should be a new file", 3, counts);
|
||||
assertEquals(3, counts, "If the number of records are more than 1150, then there should be a new file");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -416,7 +415,7 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
|
||||
|
||||
WriteStatus writeStatus = ws.get(0).get(0);
|
||||
String fileId = writeStatus.getFileId();
|
||||
metaClient.getFs().create(new Path(basePath + "/.hoodie/000.commit")).close();
|
||||
metaClient.getFs().create(new Path(Paths.get(basePath, ".hoodie", "000.commit").toString())).close();
|
||||
final List<HoodieRecord> updates = dataGen.generateUpdatesWithHoodieAvroPayload(instantTime, inserts);
|
||||
|
||||
String partitionPath = updates.get(0).getPartitionPath();
|
||||
@@ -429,11 +428,8 @@ public class TestCopyOnWriteActionExecutor extends HoodieClientTestHarness {
|
||||
assertEquals(updates.size() - numRecordsInPartition, updateStatus.get(0).get(0).getTotalErrorRecords());
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void cleanup() {
|
||||
if (basePath != null) {
|
||||
new File(basePath).delete();
|
||||
}
|
||||
if (jsc != null) {
|
||||
jsc.stop();
|
||||
}
|
||||
|
||||
@@ -18,10 +18,6 @@
|
||||
|
||||
package org.apache.hudi.table.action.commit;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.hudi.common.HoodieClientTestHarness;
|
||||
import org.apache.hudi.common.HoodieClientTestUtils;
|
||||
import org.apache.hudi.common.HoodieTestDataGenerator;
|
||||
@@ -36,18 +32,25 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieCopyOnWriteTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.WorkloadProfile;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class TestUpsertPartitioner extends HoodieClientTestHarness {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(TestUpsertPartitioner.class);
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
initSparkContexts("TestUpsertPartitioner");
|
||||
initPath();
|
||||
@@ -56,7 +59,7 @@ public class TestUpsertPartitioner extends HoodieClientTestHarness {
|
||||
initFileSystem();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
cleanupSparkContexts();
|
||||
cleanupMetaClient();
|
||||
@@ -89,8 +92,9 @@ public class TestUpsertPartitioner extends HoodieClientTestHarness {
|
||||
records.addAll(updateRecords);
|
||||
WorkloadProfile profile = new WorkloadProfile(jsc.parallelize(records));
|
||||
UpsertPartitioner partitioner = new UpsertPartitioner(profile, jsc, table, config);
|
||||
assertEquals("Update record should have gone to the 1 update partition", 0, partitioner.getPartition(
|
||||
new Tuple2<>(updateRecords.get(0).getKey(), Option.ofNullable(updateRecords.get(0).getCurrentLocation()))));
|
||||
assertEquals(0, partitioner.getPartition(
|
||||
new Tuple2<>(updateRecords.get(0).getKey(), Option.ofNullable(updateRecords.get(0).getCurrentLocation()))),
|
||||
"Update record should have gone to the 1 update partition");
|
||||
return partitioner;
|
||||
}
|
||||
|
||||
@@ -100,7 +104,7 @@ public class TestUpsertPartitioner extends HoodieClientTestHarness {
|
||||
// Inserts + Updates... Check all updates go together & inserts subsplit
|
||||
UpsertPartitioner partitioner = getUpsertPartitioner(0, 200, 100, 1024, testPartitionPath, false);
|
||||
List<InsertBucket> insertBuckets = partitioner.getInsertBuckets(testPartitionPath);
|
||||
assertEquals("Total of 2 insert buckets", 2, insertBuckets.size());
|
||||
assertEquals(2, insertBuckets.size(), "Total of 2 insert buckets");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -111,33 +115,33 @@ public class TestUpsertPartitioner extends HoodieClientTestHarness {
|
||||
UpsertPartitioner partitioner = getUpsertPartitioner(1000 * 1024, 400, 100, 800 * 1024, testPartitionPath, false);
|
||||
List<InsertBucket> insertBuckets = partitioner.getInsertBuckets(testPartitionPath);
|
||||
|
||||
assertEquals("Should have 3 partitions", 3, partitioner.numPartitions());
|
||||
assertEquals("Bucket 0 is UPDATE", BucketType.UPDATE,
|
||||
partitioner.getBucketInfo(0).bucketType);
|
||||
assertEquals("Bucket 1 is INSERT", BucketType.INSERT,
|
||||
partitioner.getBucketInfo(1).bucketType);
|
||||
assertEquals("Bucket 2 is INSERT", BucketType.INSERT,
|
||||
partitioner.getBucketInfo(2).bucketType);
|
||||
assertEquals("Total of 3 insert buckets", 3, insertBuckets.size());
|
||||
assertEquals("First insert bucket must be same as update bucket", 0, insertBuckets.get(0).bucketNumber);
|
||||
assertEquals("First insert bucket should have weight 0.5", 0.5, insertBuckets.get(0).weight, 0.01);
|
||||
assertEquals(3, partitioner.numPartitions(), "Should have 3 partitions");
|
||||
assertEquals(BucketType.UPDATE, partitioner.getBucketInfo(0).bucketType,
|
||||
"Bucket 0 is UPDATE");
|
||||
assertEquals(BucketType.INSERT, partitioner.getBucketInfo(1).bucketType,
|
||||
"Bucket 1 is INSERT");
|
||||
assertEquals(BucketType.INSERT, partitioner.getBucketInfo(2).bucketType,
|
||||
"Bucket 2 is INSERT");
|
||||
assertEquals(3, insertBuckets.size(), "Total of 3 insert buckets");
|
||||
assertEquals(0, insertBuckets.get(0).bucketNumber, "First insert bucket must be same as update bucket");
|
||||
assertEquals(0.5, insertBuckets.get(0).weight, 0.01, "First insert bucket should have weight 0.5");
|
||||
|
||||
// Now with insert split size auto tuned
|
||||
partitioner = getUpsertPartitioner(1000 * 1024, 2400, 100, 800 * 1024, testPartitionPath, true);
|
||||
insertBuckets = partitioner.getInsertBuckets(testPartitionPath);
|
||||
|
||||
assertEquals("Should have 4 partitions", 4, partitioner.numPartitions());
|
||||
assertEquals("Bucket 0 is UPDATE", BucketType.UPDATE,
|
||||
partitioner.getBucketInfo(0).bucketType);
|
||||
assertEquals("Bucket 1 is INSERT", BucketType.INSERT,
|
||||
partitioner.getBucketInfo(1).bucketType);
|
||||
assertEquals("Bucket 2 is INSERT", BucketType.INSERT,
|
||||
partitioner.getBucketInfo(2).bucketType);
|
||||
assertEquals("Bucket 3 is INSERT", BucketType.INSERT,
|
||||
partitioner.getBucketInfo(3).bucketType);
|
||||
assertEquals("Total of 4 insert buckets", 4, insertBuckets.size());
|
||||
assertEquals("First insert bucket must be same as update bucket", 0, insertBuckets.get(0).bucketNumber);
|
||||
assertEquals("First insert bucket should have weight 0.5", 200.0 / 2400, insertBuckets.get(0).weight, 0.01);
|
||||
assertEquals(4, partitioner.numPartitions(), "Should have 4 partitions");
|
||||
assertEquals(BucketType.UPDATE, partitioner.getBucketInfo(0).bucketType,
|
||||
"Bucket 0 is UPDATE");
|
||||
assertEquals(BucketType.INSERT, partitioner.getBucketInfo(1).bucketType,
|
||||
"Bucket 1 is INSERT");
|
||||
assertEquals(BucketType.INSERT, partitioner.getBucketInfo(2).bucketType,
|
||||
"Bucket 2 is INSERT");
|
||||
assertEquals(BucketType.INSERT, partitioner.getBucketInfo(3).bucketType,
|
||||
"Bucket 3 is INSERT");
|
||||
assertEquals(4, insertBuckets.size(), "Total of 4 insert buckets");
|
||||
assertEquals(0, insertBuckets.get(0).bucketNumber, "First insert bucket must be same as update bucket");
|
||||
assertEquals(200.0 / 2400, insertBuckets.get(0).weight, 0.01, "First insert bucket should have weight 0.5");
|
||||
}
|
||||
|
||||
private HoodieWriteConfig.Builder makeHoodieClientConfigBuilder() throws Exception {
|
||||
|
||||
@@ -52,8 +52,7 @@ import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
@@ -63,9 +62,10 @@ import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.apache.hudi.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Test Cases for Async Compaction and Ingestion interaction.
|
||||
@@ -111,9 +111,9 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
|
||||
HoodieInstant pendingCompactionInstant =
|
||||
metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
|
||||
assertEquals("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp(),
|
||||
compactionInstantTime);
|
||||
assertEquals("Pending Compaction instant has expected state", pendingCompactionInstant.getState(), State.REQUESTED);
|
||||
assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(),
|
||||
"Pending Compaction instant has expected instant time");
|
||||
assertEquals(State.REQUESTED, pendingCompactionInstant.getState(), "Pending Compaction instant has expected state");
|
||||
|
||||
moveCompactionFromRequestedToInflight(compactionInstantTime, cfg);
|
||||
|
||||
@@ -169,11 +169,11 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
|
||||
HoodieInstant pendingCompactionInstant =
|
||||
metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
|
||||
assertEquals("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp(),
|
||||
compactionInstantTime);
|
||||
assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(),
|
||||
"Pending Compaction instant has expected instant time");
|
||||
HoodieInstant inflightInstant =
|
||||
metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
|
||||
assertEquals("inflight instant has expected instant time", inflightInstant.getTimestamp(), inflightInstantTime);
|
||||
assertEquals(inflightInstantTime, inflightInstant.getTimestamp(), "inflight instant has expected instant time");
|
||||
|
||||
// This should rollback
|
||||
client.startCommitWithTime(nextInflightInstantTime);
|
||||
@@ -181,13 +181,14 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
// Validate
|
||||
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
|
||||
inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
|
||||
assertEquals("inflight instant has expected instant time", inflightInstant.getTimestamp(), nextInflightInstantTime);
|
||||
assertEquals("Expect only one inflight instant", 1, metaClient.getActiveTimeline()
|
||||
.filterPendingExcludingCompaction().getInstants().count());
|
||||
assertEquals(inflightInstant.getTimestamp(), nextInflightInstantTime, "inflight instant has expected instant time");
|
||||
assertEquals(1, metaClient.getActiveTimeline()
|
||||
.filterPendingExcludingCompaction().getInstants().count(),
|
||||
"Expect only one inflight instant");
|
||||
// Expect pending Compaction to be present
|
||||
pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
|
||||
assertEquals("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp(),
|
||||
compactionInstantTime);
|
||||
assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(),
|
||||
"Pending Compaction instant has expected instant time");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -237,8 +238,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
String compactionInstantTime = "006";
|
||||
int numRecs = 2000;
|
||||
|
||||
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
|
||||
records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
|
||||
final List<HoodieRecord> initalRecords = dataGen.generateInserts(firstInstantTime, numRecs);
|
||||
final List<HoodieRecord> records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), initalRecords, cfg, true,
|
||||
new ArrayList<>());
|
||||
|
||||
// Schedule compaction but do not run them
|
||||
@@ -246,17 +247,12 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
|
||||
HoodieInstant pendingCompactionInstant =
|
||||
metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
|
||||
assertEquals("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp(), compactionInstantTime);
|
||||
assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
|
||||
|
||||
boolean gotException = false;
|
||||
try {
|
||||
assertThrows(IllegalArgumentException.class, () -> {
|
||||
runNextDeltaCommits(client, readClient, Arrays.asList(failedInstantTime), records, cfg, false,
|
||||
Arrays.asList(compactionInstantTime));
|
||||
} catch (IllegalArgumentException iex) {
|
||||
// Latest pending compaction instant time must be earlier than this instant time. Should fail here
|
||||
gotException = true;
|
||||
}
|
||||
assertTrue("Latest pending compaction instant time must be earlier than this instant time", gotException);
|
||||
}, "Latest pending compaction instant time must be earlier than this instant time");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -283,17 +279,12 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
|
||||
HoodieInstant inflightInstant =
|
||||
metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
|
||||
assertEquals("inflight instant has expected instant time", inflightInstant.getTimestamp(), inflightInstantTime);
|
||||
assertEquals(inflightInstantTime, inflightInstant.getTimestamp(), "inflight instant has expected instant time");
|
||||
|
||||
boolean gotException = false;
|
||||
try {
|
||||
assertThrows(IllegalArgumentException.class, () -> {
|
||||
// Schedule compaction but do not run them
|
||||
scheduleCompaction(compactionInstantTime, client, cfg);
|
||||
} catch (IllegalArgumentException iex) {
|
||||
// Earliest ingestion inflight instant time must be later than compaction time. Should fail here
|
||||
gotException = true;
|
||||
}
|
||||
assertTrue("Earliest ingestion inflight instant time must be later than compaction time", gotException);
|
||||
}, "Earliest ingestion inflight instant time must be later than compaction time");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -304,44 +295,32 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
HoodieWriteClient client = getHoodieWriteClient(cfg, true);
|
||||
HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
|
||||
|
||||
String firstInstantTime = "001";
|
||||
String secondInstantTime = "004";
|
||||
String compactionInstantTime = "002";
|
||||
final String firstInstantTime = "001";
|
||||
final String secondInstantTime = "004";
|
||||
final String compactionInstantTime = "002";
|
||||
int numRecs = 2000;
|
||||
|
||||
List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
|
||||
runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
|
||||
new ArrayList<>());
|
||||
|
||||
boolean gotException = false;
|
||||
try {
|
||||
assertThrows(IllegalArgumentException.class, () -> {
|
||||
// Schedule compaction but do not run them
|
||||
scheduleCompaction(compactionInstantTime, client, cfg);
|
||||
} catch (IllegalArgumentException iex) {
|
||||
gotException = true;
|
||||
}
|
||||
assertTrue("Compaction Instant to be scheduled cannot have older timestamp", gotException);
|
||||
}, "Compaction Instant to be scheduled cannot have older timestamp");
|
||||
|
||||
// Schedule with timestamp same as that of committed instant
|
||||
gotException = false;
|
||||
try {
|
||||
assertThrows(IllegalArgumentException.class, () -> {
|
||||
// Schedule compaction but do not run them
|
||||
scheduleCompaction(secondInstantTime, client, cfg);
|
||||
} catch (IllegalArgumentException iex) {
|
||||
gotException = true;
|
||||
}
|
||||
assertTrue("Compaction Instant to be scheduled cannot have same timestamp as committed instant", gotException);
|
||||
}, "Compaction Instant to be scheduled cannot have same timestamp as committed instant");
|
||||
|
||||
compactionInstantTime = "006";
|
||||
scheduleCompaction(compactionInstantTime, client, cfg);
|
||||
gotException = false;
|
||||
try {
|
||||
final String compactionInstantTime2 = "006";
|
||||
scheduleCompaction(compactionInstantTime2, client, cfg);
|
||||
assertThrows(IllegalArgumentException.class, () -> {
|
||||
// Schedule compaction with the same times as a pending compaction
|
||||
scheduleCompaction(secondInstantTime, client, cfg);
|
||||
} catch (IllegalArgumentException iex) {
|
||||
gotException = true;
|
||||
}
|
||||
assertTrue("Compaction Instant to be scheduled cannot have same timestamp as a pending compaction", gotException);
|
||||
}, "Compaction Instant to be scheduled cannot have same timestamp as a pending compaction");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -406,13 +385,13 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
fileSliceList.forEach(fileSlice -> {
|
||||
Pair<String, HoodieCompactionOperation> opPair = fgIdToCompactionOperation.get(fileSlice.getFileGroupId());
|
||||
if (opPair != null) {
|
||||
assertEquals("Expect baseInstant to match compaction Instant", fileSlice.getBaseInstantTime(), opPair.getKey());
|
||||
assertTrue("Expect atleast one log file to be present where the latest delta commit was written",
|
||||
fileSlice.getLogFiles().count() > 0);
|
||||
assertFalse("Expect no data-file to be present", fileSlice.getBaseFile().isPresent());
|
||||
assertEquals(fileSlice.getBaseInstantTime(), opPair.getKey(), "Expect baseInstant to match compaction Instant");
|
||||
assertTrue(fileSlice.getLogFiles().count() > 0,
|
||||
"Expect atleast one log file to be present where the latest delta commit was written");
|
||||
assertFalse(fileSlice.getBaseFile().isPresent(), "Expect no data-file to be present");
|
||||
} else {
|
||||
assertTrue("Expect baseInstant to be less than or equal to latestDeltaCommit",
|
||||
fileSlice.getBaseInstantTime().compareTo(latestDeltaCommit) <= 0);
|
||||
assertTrue(fileSlice.getBaseInstantTime().compareTo(latestDeltaCommit) <= 0,
|
||||
"Expect baseInstant to be less than or equal to latestDeltaCommit");
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -446,8 +425,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
|
||||
HoodieTable hoodieTable = getHoodieTable(metaClient, cfg);
|
||||
List<HoodieBaseFile> dataFilesToRead = getCurrentLatestDataFiles(hoodieTable, cfg);
|
||||
assertTrue("should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead.stream().findAny().isPresent());
|
||||
assertTrue(dataFilesToRead.stream().findAny().isPresent(),
|
||||
"should list the parquet files we wrote in the delta commit");
|
||||
validateDeltaCommit(firstInstant, fgIdToCompactionOperation, cfg);
|
||||
}
|
||||
|
||||
@@ -467,7 +446,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant);
|
||||
HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants()
|
||||
.filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get();
|
||||
assertTrue("Instant must be marked inflight", instant.isInflight());
|
||||
assertTrue(instant.isInflight(), "Instant must be marked inflight");
|
||||
}
|
||||
|
||||
private void scheduleCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg)
|
||||
@@ -475,7 +454,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
|
||||
HoodieInstant instant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().get();
|
||||
assertEquals("Last compaction instant must be the one set", instant.getTimestamp(), compactionInstantTime);
|
||||
assertEquals(compactionInstantTime, instant.getTimestamp(), "Last compaction instant must be the one set");
|
||||
}
|
||||
|
||||
private void scheduleAndExecuteCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieTable table,
|
||||
@@ -489,28 +468,30 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
|
||||
client.compact(compactionInstantTime);
|
||||
List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table);
|
||||
assertTrue("Ensure latest file-slices are not empty", fileSliceList.stream().findAny().isPresent());
|
||||
assertFalse("Verify all file-slices have base-instant same as compaction instant", fileSliceList.stream()
|
||||
.anyMatch(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime)));
|
||||
assertFalse("Verify all file-slices have data-files",
|
||||
fileSliceList.stream().anyMatch(fs -> !fs.getBaseFile().isPresent()));
|
||||
assertTrue(fileSliceList.stream().findAny().isPresent(), "Ensure latest file-slices are not empty");
|
||||
assertFalse(fileSliceList.stream()
|
||||
.anyMatch(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime)),
|
||||
"Verify all file-slices have base-instant same as compaction instant");
|
||||
assertFalse(fileSliceList.stream().anyMatch(fs -> !fs.getBaseFile().isPresent()),
|
||||
"Verify all file-slices have data-files");
|
||||
|
||||
if (hasDeltaCommitAfterPendingCompaction) {
|
||||
assertFalse("Verify all file-slices have atleast one log-file",
|
||||
fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() == 0));
|
||||
assertFalse(fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() == 0),
|
||||
"Verify all file-slices have atleast one log-file");
|
||||
} else {
|
||||
assertFalse("Verify all file-slices have no log-files",
|
||||
fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() > 0));
|
||||
assertFalse(fileSliceList.stream().anyMatch(fs -> fs.getLogFiles().count() > 0),
|
||||
"Verify all file-slices have no log-files");
|
||||
}
|
||||
|
||||
// verify that there is a commit
|
||||
table = getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath(), true), cfg);
|
||||
HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants();
|
||||
String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp();
|
||||
assertEquals("Expect compaction instant time to be the latest commit time", latestCompactionCommitTime,
|
||||
compactionInstantTime);
|
||||
Assert.assertEquals("Must contain expected records", expectedNumRecs,
|
||||
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count());
|
||||
assertEquals(latestCompactionCommitTime, compactionInstantTime,
|
||||
"Expect compaction instant time to be the latest commit time");
|
||||
assertEquals(expectedNumRecs,
|
||||
HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count(),
|
||||
"Must contain expected records");
|
||||
|
||||
}
|
||||
|
||||
@@ -530,11 +511,11 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
Option<HoodieInstant> deltaCommit =
|
||||
metaClient.getActiveTimeline().reload().getDeltaCommitTimeline().filterCompletedInstants().lastInstant();
|
||||
if (skipCommit && !cfg.shouldAutoCommit()) {
|
||||
assertTrue("Delta commit should not be latest instant",
|
||||
deltaCommit.get().getTimestamp().compareTo(instantTime) < 0);
|
||||
assertTrue(deltaCommit.get().getTimestamp().compareTo(instantTime) < 0,
|
||||
"Delta commit should not be latest instant");
|
||||
} else {
|
||||
assertTrue(deltaCommit.isPresent());
|
||||
assertEquals("Delta commit should be latest instant", instantTime, deltaCommit.get().getTimestamp());
|
||||
assertEquals(instantTime, deltaCommit.get().getTimestamp(), "Delta commit should be latest instant");
|
||||
}
|
||||
return statusList;
|
||||
}
|
||||
|
||||
@@ -43,23 +43,24 @@ import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestHoodieCompactor extends HoodieClientTestHarness {
|
||||
|
||||
private Configuration hadoopConf;
|
||||
private HoodieTableMetaClient metaClient;
|
||||
|
||||
@Before
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
// Initialize a local spark env
|
||||
initSparkContexts("TestHoodieCompactor");
|
||||
@@ -72,7 +73,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
|
||||
initTestDataGenerator();
|
||||
}
|
||||
|
||||
@After
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
cleanupFileSystem();
|
||||
cleanupTestDataGenerator();
|
||||
@@ -100,13 +101,15 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
|
||||
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build());
|
||||
}
|
||||
|
||||
@Test(expected = HoodieNotSupportedException.class)
|
||||
@Test
|
||||
public void testCompactionOnCopyOnWriteFail() throws Exception {
|
||||
metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.COPY_ON_WRITE);
|
||||
HoodieTable<?> table = HoodieTable.create(metaClient, getConfig(), jsc);
|
||||
String compactionInstantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
table.scheduleCompaction(jsc, compactionInstantTime, Option.empty());
|
||||
table.compact(jsc, compactionInstantTime);
|
||||
assertThrows(HoodieNotSupportedException.class, () -> {
|
||||
table.scheduleCompaction(jsc, compactionInstantTime, Option.empty());
|
||||
table.compact(jsc, compactionInstantTime);
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -123,7 +126,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
|
||||
|
||||
String compactionInstantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
Option<HoodieCompactionPlan> plan = table.scheduleCompaction(jsc, compactionInstantTime, Option.empty());
|
||||
assertFalse("If there is nothing to compact, result will be empty", plan.isPresent());
|
||||
assertFalse(plan.isPresent(), "If there is nothing to compact, result will be empty");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -159,7 +162,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
|
||||
List<FileSlice> groupedLogFiles =
|
||||
table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
||||
for (FileSlice fileSlice : groupedLogFiles) {
|
||||
assertEquals("There should be 1 log file written for every data file", 1, fileSlice.getLogFiles().count());
|
||||
assertEquals(1, fileSlice.getLogFiles().count(), "There should be 1 log file written for every data file");
|
||||
}
|
||||
}
|
||||
HoodieTestUtils.createDeltaCommitFiles(basePath, newCommitTime);
|
||||
|
||||
Reference in New Issue
Block a user