[HUDI-509] Renaming code in sync with cWiki restructuring (#1212)
- Storage Type replaced with Table Type (remaining instances) - View types replaced with query types; - ReadOptimized view referred as Snapshot Query - TableFileSystemView sub interfaces renamed to BaseFileOnly and Slice Views - HoodieDataFile renamed to HoodieBaseFile - Hive Sync tool will register RO tables for MOR with a `_ro` suffix - Datasource/Deltastreamer options renamed accordingly - Support fallback to old config values as well, so migration is painless - Config for controlling _ro suffix addition - Renaming DataFile to BaseFile across DTOs, HoodieFileSlice and AbstractTableFileSystemView
This commit is contained in:
@@ -23,7 +23,7 @@ import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.common.HoodieClientTestUtils;
|
||||
import org.apache.hudi.common.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
@@ -402,7 +402,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
assertEquals("Expect baseInstant to match compaction Instant", fileSlice.getBaseInstantTime(), opPair.getKey());
|
||||
assertTrue("Expect atleast one log file to be present where the latest delta commit was written",
|
||||
fileSlice.getLogFiles().count() > 0);
|
||||
assertFalse("Expect no data-file to be present", fileSlice.getDataFile().isPresent());
|
||||
assertFalse("Expect no data-file to be present", fileSlice.getBaseFile().isPresent());
|
||||
} else {
|
||||
assertTrue("Expect baseInstant to be less than or equal to latestDeltaCommit",
|
||||
fileSlice.getBaseInstantTime().compareTo(latestDeltaCommit) <= 0);
|
||||
@@ -439,8 +439,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
assertNoWriteErrors(statusList);
|
||||
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
|
||||
HoodieTable hoodieTable = getHoodieTable(metaClient, cfg);
|
||||
List<HoodieDataFile> dataFilesToRead = getCurrentLatestDataFiles(hoodieTable, cfg);
|
||||
assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit",
|
||||
List<HoodieBaseFile> dataFilesToRead = getCurrentLatestDataFiles(hoodieTable, cfg);
|
||||
assertTrue("should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead.stream().findAny().isPresent());
|
||||
validateDeltaCommit(firstInstant, fgIdToCompactionOperation, cfg);
|
||||
}
|
||||
@@ -487,7 +487,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
assertFalse("Verify all file-slices have base-instant same as compaction instant", fileSliceList.stream()
|
||||
.anyMatch(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime)));
|
||||
assertFalse("Verify all file-slices have data-files",
|
||||
fileSliceList.stream().anyMatch(fs -> !fs.getDataFile().isPresent()));
|
||||
fileSliceList.stream().anyMatch(fs -> !fs.getBaseFile().isPresent()));
|
||||
|
||||
if (hasDeltaCommitAfterPendingCompaction) {
|
||||
assertFalse("Verify all file-slices have atleast one log-file",
|
||||
@@ -533,11 +533,11 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
|
||||
return statusList;
|
||||
}
|
||||
|
||||
private List<HoodieDataFile> getCurrentLatestDataFiles(HoodieTable table, HoodieWriteConfig cfg) throws IOException {
|
||||
private List<HoodieBaseFile> getCurrentLatestDataFiles(HoodieTable table, HoodieWriteConfig cfg) throws IOException {
|
||||
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(table.getMetaClient().getFs(), cfg.getBasePath());
|
||||
HoodieTableFileSystemView view =
|
||||
new HoodieTableFileSystemView(table.getMetaClient(), table.getCompletedCommitsTimeline(), allFiles);
|
||||
return view.getLatestDataFiles().collect(Collectors.toList());
|
||||
return view.getLatestBaseFiles().collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private List<FileSlice> getCurrentLatestFileSlices(HoodieTable table) {
|
||||
|
||||
@@ -26,7 +26,7 @@ import org.apache.hudi.common.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieCleaningPolicy;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
@@ -267,8 +267,8 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
for (HoodieFileGroup fileGroup : fileGroups) {
|
||||
if (compactionFileIdToLatestFileSlice.containsKey(fileGroup.getFileGroupId())) {
|
||||
// Ensure latest file-slice selected for compaction is retained
|
||||
Option<HoodieDataFile> dataFileForCompactionPresent =
|
||||
Option.fromJavaOptional(fileGroup.getAllDataFiles().filter(df -> {
|
||||
Option<HoodieBaseFile> dataFileForCompactionPresent =
|
||||
Option.fromJavaOptional(fileGroup.getAllBaseFiles().filter(df -> {
|
||||
return compactionFileIdToLatestFileSlice.get(fileGroup.getFileGroupId()).getBaseInstantTime()
|
||||
.equals(df.getCommitTime());
|
||||
}).findAny());
|
||||
@@ -277,7 +277,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
} else {
|
||||
// file has no more than max versions
|
||||
String fileId = fileGroup.getFileGroupId().getFileId();
|
||||
List<HoodieDataFile> dataFiles = fileGroup.getAllDataFiles().collect(Collectors.toList());
|
||||
List<HoodieBaseFile> dataFiles = fileGroup.getAllBaseFiles().collect(Collectors.toList());
|
||||
|
||||
assertTrue("fileId " + fileId + " has more than " + maxVersions + " versions",
|
||||
dataFiles.size() <= maxVersions);
|
||||
@@ -391,7 +391,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
List<HoodieFileGroup> fileGroups = fsView.getAllFileGroups(partitionPath).collect(Collectors.toList());
|
||||
for (HoodieFileGroup fileGroup : fileGroups) {
|
||||
Set<String> commitTimes = new HashSet<>();
|
||||
fileGroup.getAllDataFiles().forEach(value -> {
|
||||
fileGroup.getAllBaseFiles().forEach(value -> {
|
||||
LOG.debug("Data File - " + value);
|
||||
commitTimes.add(value.getCommitTime());
|
||||
});
|
||||
@@ -1025,7 +1025,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
||||
FileSlice slice =
|
||||
table.getRTFileSystemView().getLatestFileSlices(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
|
||||
table.getSliceView().getLatestFileSlices(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
|
||||
.filter(fs -> fs.getFileId().equals(fileId)).findFirst().get();
|
||||
List<FileSlice> slices = new ArrayList<>();
|
||||
if (compactionInstantsToFileSlices.containsKey(compactionInstants[j])) {
|
||||
@@ -1069,12 +1069,12 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
|
||||
expFileIdToPendingCompaction.forEach((fileId, value) -> {
|
||||
String baseInstantForCompaction = fileIdToLatestInstantBeforeCompaction.get(fileId);
|
||||
Option<FileSlice> fileSliceForCompaction = Option.fromJavaOptional(hoodieTable.getRTFileSystemView()
|
||||
Option<FileSlice> fileSliceForCompaction = Option.fromJavaOptional(hoodieTable.getSliceView()
|
||||
.getLatestFileSlicesBeforeOrOn(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, baseInstantForCompaction,
|
||||
true)
|
||||
.filter(fs -> fs.getFileId().equals(fileId)).findFirst());
|
||||
Assert.assertTrue("Base Instant for Compaction must be preserved", fileSliceForCompaction.isPresent());
|
||||
Assert.assertTrue("FileSlice has data-file", fileSliceForCompaction.get().getDataFile().isPresent());
|
||||
Assert.assertTrue("FileSlice has data-file", fileSliceForCompaction.get().getBaseFile().isPresent());
|
||||
Assert.assertEquals("FileSlice has log-files", 2, fileSliceForCompaction.get().getLogFiles().count());
|
||||
});
|
||||
|
||||
@@ -1135,9 +1135,9 @@ public class TestCleaner extends TestHoodieClientBase {
|
||||
private Stream<Pair<String, String>> convertPathToFileIdWithCommitTime(final HoodieTableMetaClient metaClient,
|
||||
List<String> paths) {
|
||||
Predicate<String> roFilePredicate =
|
||||
path -> path.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
|
||||
path -> path.contains(metaClient.getTableConfig().getBaseFileFormat().getFileExtension());
|
||||
Predicate<String> rtFilePredicate =
|
||||
path -> path.contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
|
||||
path -> path.contains(metaClient.getTableConfig().getLogFileFormat().getFileExtension());
|
||||
Stream<Pair<String, String>> stream1 = paths.stream().filter(roFilePredicate).map(fullPath -> {
|
||||
String fileName = Paths.get(fullPath).getFileName().toString();
|
||||
return Pair.of(FSUtils.getFileId(fileName), FSUtils.getCommitTime(fileName));
|
||||
|
||||
@@ -20,11 +20,11 @@ package org.apache.hudi;
|
||||
|
||||
import org.apache.hudi.common.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.common.model.HoodieCleaningPolicy;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieTestUtils;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.BaseFileOnlyView;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.util.FSUtils;
|
||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
@@ -100,15 +100,15 @@ public class TestClientRollback extends TestHoodieClientBase {
|
||||
FSUtils.getAllPartitionPaths(fs, cfg.getBasePath(), getConfig().shouldAssumeDatePartitioning());
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig(), jsc);
|
||||
final ReadOptimizedView view1 = table.getROFileSystemView();
|
||||
final BaseFileOnlyView view1 = table.getBaseFileOnlyView();
|
||||
|
||||
List<HoodieDataFile> dataFiles = partitionPaths.stream().flatMap(s -> {
|
||||
return view1.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("003"));
|
||||
List<HoodieBaseFile> dataFiles = partitionPaths.stream().flatMap(s -> {
|
||||
return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"));
|
||||
}).collect(Collectors.toList());
|
||||
assertEquals("The data files for commit 003 should be present", 3, dataFiles.size());
|
||||
|
||||
dataFiles = partitionPaths.stream().flatMap(s -> {
|
||||
return view1.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("002"));
|
||||
return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"));
|
||||
}).collect(Collectors.toList());
|
||||
assertEquals("The data files for commit 002 should be present", 3, dataFiles.size());
|
||||
|
||||
@@ -125,9 +125,9 @@ public class TestClientRollback extends TestHoodieClientBase {
|
||||
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
table = HoodieTable.getHoodieTable(metaClient, getConfig(), jsc);
|
||||
final ReadOptimizedView view2 = table.getROFileSystemView();
|
||||
final BaseFileOnlyView view2 = table.getBaseFileOnlyView();
|
||||
|
||||
dataFiles = partitionPaths.stream().flatMap(s -> view2.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
|
||||
dataFiles = partitionPaths.stream().flatMap(s -> view2.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
|
||||
assertEquals("The data files for commit 004 should be present", 3, dataFiles.size());
|
||||
|
||||
// rolling back to a non existent savepoint must not succeed
|
||||
@@ -144,19 +144,19 @@ public class TestClientRollback extends TestHoodieClientBase {
|
||||
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
table = HoodieTable.getHoodieTable(metaClient, getConfig(), jsc);
|
||||
final ReadOptimizedView view3 = table.getROFileSystemView();
|
||||
final BaseFileOnlyView view3 = table.getBaseFileOnlyView();
|
||||
dataFiles = partitionPaths.stream().flatMap(s -> {
|
||||
return view3.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("002"));
|
||||
return view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"));
|
||||
}).collect(Collectors.toList());
|
||||
assertEquals("The data files for commit 002 be available", 3, dataFiles.size());
|
||||
|
||||
dataFiles = partitionPaths.stream().flatMap(s -> {
|
||||
return view3.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("003"));
|
||||
return view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"));
|
||||
}).collect(Collectors.toList());
|
||||
assertEquals("The data files for commit 003 should be rolled back", 0, dataFiles.size());
|
||||
|
||||
dataFiles = partitionPaths.stream().flatMap(s -> {
|
||||
return view3.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("004"));
|
||||
return view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"));
|
||||
}).collect(Collectors.toList());
|
||||
assertEquals("The data files for commit 004 should be rolled back", 0, dataFiles.size());
|
||||
}
|
||||
|
||||
@@ -275,7 +275,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
// Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
|
||||
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true)
|
||||
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).forEach(fs -> {
|
||||
Assert.assertFalse("No Data file must be present", fs.getDataFile().isPresent());
|
||||
Assert.assertFalse("No Data file must be present", fs.getBaseFile().isPresent());
|
||||
Assert.assertEquals("No Log Files", 0, fs.getLogFiles().count());
|
||||
});
|
||||
|
||||
@@ -336,7 +336,7 @@ public class TestCompactionAdminClient extends TestHoodieClientBase {
|
||||
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true)
|
||||
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant))
|
||||
.filter(fs -> fs.getFileId().equals(op.getFileId())).forEach(fs -> {
|
||||
Assert.assertFalse("No Data file must be present", fs.getDataFile().isPresent());
|
||||
Assert.assertFalse("No Data file must be present", fs.getBaseFile().isPresent());
|
||||
Assert.assertEquals("No Log Files", 0, fs.getLogFiles().count());
|
||||
});
|
||||
|
||||
|
||||
@@ -157,7 +157,7 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
|
||||
|
||||
protected HoodieTable getHoodieTable(HoodieTableMetaClient metaClient, HoodieWriteConfig config) {
|
||||
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
||||
((SyncableFileSystemView) (table.getRTFileSystemView())).reset();
|
||||
((SyncableFileSystemView) (table.getSliceView())).reset();
|
||||
return table;
|
||||
}
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ package org.apache.hudi;
|
||||
import org.apache.hudi.common.HoodieClientTestUtils;
|
||||
import org.apache.hudi.common.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieRollingStat;
|
||||
@@ -30,7 +30,7 @@ import org.apache.hudi.common.model.HoodieTestUtils;
|
||||
import org.apache.hudi.common.model.TimelineLayoutVersion;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.BaseFileOnlyView;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.util.ConsistencyGuardConfig;
|
||||
@@ -510,12 +510,12 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||
|
||||
HoodieTable table = getHoodieTable(metadata, config);
|
||||
ReadOptimizedView fileSystemView = table.getROFileSystemView();
|
||||
List<HoodieDataFile> files =
|
||||
fileSystemView.getLatestDataFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList());
|
||||
BaseFileOnlyView fileSystemView = table.getBaseFileOnlyView();
|
||||
List<HoodieBaseFile> files =
|
||||
fileSystemView.getLatestBaseFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList());
|
||||
int numTotalInsertsInCommit3 = 0;
|
||||
int numTotalUpdatesInCommit3 = 0;
|
||||
for (HoodieDataFile file : files) {
|
||||
for (HoodieBaseFile file : files) {
|
||||
if (file.getFileName().contains(file1)) {
|
||||
assertEquals("Existing file should be expanded", commitTime3, file.getCommitTime());
|
||||
records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath()));
|
||||
@@ -616,12 +616,12 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
||||
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||
HoodieTable table = getHoodieTable(metaClient, config);
|
||||
List<HoodieDataFile> files = table.getROFileSystemView()
|
||||
.getLatestDataFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList());
|
||||
List<HoodieBaseFile> files = table.getBaseFileOnlyView()
|
||||
.getLatestBaseFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList());
|
||||
assertEquals("Total of 2 valid data files", 2, files.size());
|
||||
|
||||
int totalInserts = 0;
|
||||
for (HoodieDataFile file : files) {
|
||||
for (HoodieBaseFile file : files) {
|
||||
assertEquals("All files must be at commit 3", commitTime3, file.getCommitTime());
|
||||
records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), new Path(file.getPath()));
|
||||
totalInserts += records.size();
|
||||
|
||||
@@ -25,13 +25,13 @@ import org.apache.hudi.common.bloom.filter.BloomFilter;
|
||||
import org.apache.hudi.common.bloom.filter.BloomFilterFactory;
|
||||
import org.apache.hudi.common.bloom.filter.BloomFilterTypeCode;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieTestUtils;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.BaseFileOnlyView;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
|
||||
import org.apache.hudi.common.util.FSUtils;
|
||||
@@ -203,10 +203,10 @@ public class HoodieClientTestUtils {
|
||||
try {
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true);
|
||||
for (String path : paths) {
|
||||
ReadOptimizedView fileSystemView = new HoodieTableFileSystemView(metaClient,
|
||||
BaseFileOnlyView fileSystemView = new HoodieTableFileSystemView(metaClient,
|
||||
metaClient.getCommitsTimeline().filterCompletedInstants(), fs.globStatus(new Path(path)));
|
||||
List<HoodieDataFile> latestFiles = fileSystemView.getLatestDataFiles().collect(Collectors.toList());
|
||||
for (HoodieDataFile file : latestFiles) {
|
||||
List<HoodieBaseFile> latestFiles = fileSystemView.getLatestBaseFiles().collect(Collectors.toList());
|
||||
for (HoodieBaseFile file : latestFiles) {
|
||||
filteredPaths.add(file.getPath());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,7 +157,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
|
||||
table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||
List<FileSlice> groupedLogFiles =
|
||||
table.getRTFileSystemView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
||||
table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
||||
for (FileSlice fileSlice : groupedLogFiles) {
|
||||
assertEquals("There should be 1 log file written for every data file", 1, fileSlice.getLogFiles().count());
|
||||
}
|
||||
@@ -185,7 +185,7 @@ public class TestHoodieCompactor extends HoodieClientTestHarness {
|
||||
return HoodieTableType.MERGE_ON_READ;
|
||||
}
|
||||
|
||||
// TODO - after modifying HoodieReadClient to support realtime tables - add more tests to make
|
||||
// TODO - after modifying HoodieReadClient to support mor tables - add more tests to make
|
||||
// sure the data read is the updated data (compaction correctness)
|
||||
// TODO - add more test cases for compactions after a failed commit/compaction
|
||||
}
|
||||
|
||||
@@ -18,21 +18,21 @@
|
||||
|
||||
package org.apache.hudi.io.strategy;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
public class TestHoodieDataFile extends HoodieDataFile {
|
||||
public class TestHoodieBaseFile extends HoodieBaseFile {
|
||||
|
||||
private final long size;
|
||||
|
||||
public TestHoodieDataFile(long size) {
|
||||
public TestHoodieBaseFile(long size) {
|
||||
super("/tmp/XYXYXYXYXYYX_11_20180918020003.parquet");
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
public static HoodieDataFile newDataFile(long size) {
|
||||
return new TestHoodieDataFile(size);
|
||||
public static HoodieBaseFile newDataFile(long size) {
|
||||
return new TestHoodieBaseFile(size);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -19,7 +19,7 @@
|
||||
package org.apache.hudi.io.strategy;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieCompactionOperation;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
@@ -239,7 +239,7 @@ public class TestHoodieCompactionStrategy {
|
||||
List<HoodieCompactionOperation> operations = new ArrayList<>(sizesMap.size());
|
||||
|
||||
sizesMap.forEach((k, v) -> {
|
||||
HoodieDataFile df = TestHoodieDataFile.newDataFile(k);
|
||||
HoodieBaseFile df = TestHoodieBaseFile.newDataFile(k);
|
||||
String partitionPath = keyToPartitionMap.get(k);
|
||||
List<HoodieLogFile> logFiles = v.stream().map(TestHoodieLogFile::newLogFile).collect(Collectors.toList());
|
||||
operations.add(new HoodieCompactionOperation(df.getCommitTime(),
|
||||
|
||||
@@ -28,7 +28,7 @@ import org.apache.hudi.common.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.common.TestRawTripPayload.MetadataMergeWriteStatus;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieDataFile;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieFileGroup;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
@@ -39,8 +39,8 @@ import org.apache.hudi.common.model.HoodieTestUtils;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.SyncableFileSystemView;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.RealtimeView;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.BaseFileOnlyView;
|
||||
import org.apache.hudi.common.table.TableFileSystemView.SliceView;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
||||
@@ -130,14 +130,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
assertFalse(commit.isPresent());
|
||||
|
||||
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
|
||||
ReadOptimizedView roView =
|
||||
BaseFileOnlyView roView =
|
||||
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
|
||||
Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
|
||||
Stream<HoodieBaseFile> dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue(!dataFilesToRead.findAny().isPresent());
|
||||
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestDataFiles();
|
||||
assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue("should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead.findAny().isPresent());
|
||||
|
||||
/**
|
||||
@@ -170,7 +170,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
allFiles = HoodieTestUtils.listAllDataFilesInPath(dfs, cfg.getBasePath());
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestDataFiles();
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue(dataFilesToRead.findAny().isPresent());
|
||||
|
||||
// verify that there is a commit
|
||||
@@ -238,14 +238,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
assertFalse(commit.isPresent());
|
||||
|
||||
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
|
||||
ReadOptimizedView roView =
|
||||
BaseFileOnlyView roView =
|
||||
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
|
||||
Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
|
||||
Stream<HoodieBaseFile> dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertFalse(dataFilesToRead.findAny().isPresent());
|
||||
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestDataFiles();
|
||||
assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue("should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead.findAny().isPresent());
|
||||
|
||||
/**
|
||||
@@ -281,10 +281,10 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
allFiles = HoodieTestUtils.listAllDataFilesInPath(dfs, cfg.getBasePath());
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestDataFiles();
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue(dataFilesToRead.findAny().isPresent());
|
||||
|
||||
List<String> dataFiles = roView.getLatestDataFiles().map(HoodieDataFile::getPath).collect(Collectors.toList());
|
||||
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
|
||||
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
// Wrote 20 records and deleted 20 records, so remaining 20-20 = 0
|
||||
assertEquals("Must contain 0 records", 0, recordsRead.size());
|
||||
@@ -343,7 +343,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
|
||||
final String absentCommit = newCommitTime;
|
||||
assertFalse(roView.getLatestDataFiles().anyMatch(file -> absentCommit.equals(file.getCommitTime())));
|
||||
assertFalse(roView.getLatestBaseFiles().anyMatch(file -> absentCommit.equals(file.getCommitTime())));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -379,14 +379,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
assertFalse(commit.isPresent());
|
||||
|
||||
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
|
||||
ReadOptimizedView roView =
|
||||
BaseFileOnlyView roView =
|
||||
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
|
||||
Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
|
||||
Stream<HoodieBaseFile> dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue(!dataFilesToRead.findAny().isPresent());
|
||||
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestDataFiles();
|
||||
assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue("should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead.findAny().isPresent());
|
||||
|
||||
/**
|
||||
@@ -401,7 +401,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
copyOfRecords = dataGen.generateUpdates(commitTime1, copyOfRecords);
|
||||
copyOfRecords.addAll(dataGen.generateInserts(commitTime1, 200));
|
||||
|
||||
List<String> dataFiles = roView.getLatestDataFiles().map(HoodieDataFile::getPath).collect(Collectors.toList());
|
||||
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
|
||||
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
assertEquals(recordsRead.size(), 200);
|
||||
|
||||
@@ -415,7 +415,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
// After rollback, there should be no parquet file with the failed commit time
|
||||
Assert.assertEquals(Arrays.stream(allFiles)
|
||||
.filter(file -> file.getPath().getName().contains(commitTime1)).count(), 0);
|
||||
dataFiles = roView.getLatestDataFiles().map(HoodieDataFile::getPath).collect(Collectors.toList());
|
||||
dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
|
||||
recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
assertEquals(recordsRead.size(), 200);
|
||||
}
|
||||
@@ -431,7 +431,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
copyOfRecords = dataGen.generateUpdates(commitTime2, copyOfRecords);
|
||||
copyOfRecords.addAll(dataGen.generateInserts(commitTime2, 200));
|
||||
|
||||
List<String> dataFiles = roView.getLatestDataFiles().map(HoodieDataFile::getPath).collect(Collectors.toList());
|
||||
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
|
||||
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
assertEquals(recordsRead.size(), 200);
|
||||
|
||||
@@ -452,7 +452,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFiles = roView.getLatestDataFiles().map(HoodieDataFile::getPath).collect(Collectors.toList());
|
||||
dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
|
||||
recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
// check that the number of records read is still correct after rollback operation
|
||||
assertEquals(recordsRead.size(), 200);
|
||||
@@ -483,7 +483,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
final String compactedCommitTime =
|
||||
metaClient.getActiveTimeline().reload().getCommitsTimeline().lastInstant().get().getTimestamp();
|
||||
|
||||
assertTrue(roView.getLatestDataFiles().anyMatch(file -> compactedCommitTime.equals(file.getCommitTime())));
|
||||
assertTrue(roView.getLatestBaseFiles().anyMatch(file -> compactedCommitTime.equals(file.getCommitTime())));
|
||||
|
||||
thirdClient.rollback(compactedCommitTime);
|
||||
|
||||
@@ -491,7 +491,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
roView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsTimeline(), allFiles);
|
||||
|
||||
assertFalse(roView.getLatestDataFiles().anyMatch(file -> compactedCommitTime.equals(file.getCommitTime())));
|
||||
assertFalse(roView.getLatestBaseFiles().anyMatch(file -> compactedCommitTime.equals(file.getCommitTime())));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -526,14 +526,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
assertFalse(commit.isPresent());
|
||||
|
||||
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
|
||||
ReadOptimizedView roView =
|
||||
BaseFileOnlyView roView =
|
||||
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
|
||||
Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
|
||||
Stream<HoodieBaseFile> dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertFalse(dataFilesToRead.findAny().isPresent());
|
||||
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestDataFiles();
|
||||
assertTrue("ReadOptimizedTableView should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertTrue("Should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead.findAny().isPresent());
|
||||
|
||||
/**
|
||||
@@ -548,7 +548,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
copyOfRecords = dataGen.generateUpdates(newCommitTime, copyOfRecords);
|
||||
copyOfRecords.addAll(dataGen.generateInserts(newCommitTime, 200));
|
||||
|
||||
List<String> dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
|
||||
List<String> dataFiles = roView.getLatestBaseFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
|
||||
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
assertEquals(recordsRead.size(), 200);
|
||||
|
||||
@@ -611,7 +611,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
final String compactedCommitTime =
|
||||
metaClient.getActiveTimeline().reload().getCommitsTimeline().lastInstant().get().getTimestamp();
|
||||
|
||||
assertTrue(roView.getLatestDataFiles().anyMatch(file -> compactedCommitTime.equals(file.getCommitTime())));
|
||||
assertTrue(roView.getLatestBaseFiles().anyMatch(file -> compactedCommitTime.equals(file.getCommitTime())));
|
||||
|
||||
/**
|
||||
* Write 5 (updates)
|
||||
@@ -635,9 +635,9 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
|
||||
roView =
|
||||
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
|
||||
dataFilesToRead = roView.getLatestDataFiles();
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
assertFalse(dataFilesToRead.findAny().isPresent());
|
||||
RealtimeView rtView =
|
||||
SliceView rtView =
|
||||
new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
|
||||
List<HoodieFileGroup> fileGroups =
|
||||
((HoodieTableFileSystemView) rtView).getAllFileGroups().collect(Collectors.toList());
|
||||
@@ -689,16 +689,16 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
assertFalse(commit.isPresent());
|
||||
|
||||
FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
|
||||
ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient,
|
||||
BaseFileOnlyView roView = new HoodieTableFileSystemView(metaClient,
|
||||
metaClient.getCommitsTimeline().filterCompletedInstants(), allFiles);
|
||||
Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
|
||||
Stream<HoodieBaseFile> dataFilesToRead = roView.getLatestBaseFiles();
|
||||
Map<String, Long> parquetFileIdToSize =
|
||||
dataFilesToRead.collect(Collectors.toMap(HoodieDataFile::getFileId, HoodieDataFile::getFileSize));
|
||||
dataFilesToRead.collect(Collectors.toMap(HoodieBaseFile::getFileId, HoodieBaseFile::getFileSize));
|
||||
|
||||
roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
|
||||
dataFilesToRead = roView.getLatestDataFiles();
|
||||
List<HoodieDataFile> dataFilesList = dataFilesToRead.collect(Collectors.toList());
|
||||
assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit",
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
List<HoodieBaseFile> dataFilesList = dataFilesToRead.collect(Collectors.toList());
|
||||
assertTrue("Should list the parquet files we wrote in the delta commit",
|
||||
dataFilesList.size() > 0);
|
||||
|
||||
/**
|
||||
@@ -725,14 +725,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
|
||||
roView = new HoodieTableFileSystemView(metaClient,
|
||||
hoodieTable.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants(), allFiles);
|
||||
dataFilesToRead = roView.getLatestDataFiles();
|
||||
List<HoodieDataFile> newDataFilesList = dataFilesToRead.collect(Collectors.toList());
|
||||
dataFilesToRead = roView.getLatestBaseFiles();
|
||||
List<HoodieBaseFile> newDataFilesList = dataFilesToRead.collect(Collectors.toList());
|
||||
Map<String, Long> parquetFileIdToNewSize =
|
||||
newDataFilesList.stream().collect(Collectors.toMap(HoodieDataFile::getFileId, HoodieDataFile::getFileSize));
|
||||
newDataFilesList.stream().collect(Collectors.toMap(HoodieBaseFile::getFileId, HoodieBaseFile::getFileSize));
|
||||
|
||||
assertTrue(parquetFileIdToNewSize.entrySet().stream().anyMatch(entry -> parquetFileIdToSize.get(entry.getKey()) < entry.getValue()));
|
||||
|
||||
List<String> dataFiles = roView.getLatestDataFiles().map(HoodieDataFile::getPath).collect(Collectors.toList());
|
||||
List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
|
||||
List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
|
||||
// Wrote 20 records in 2 batches
|
||||
assertEquals("Must contain 40 records", 40, recordsRead.size());
|
||||
@@ -770,11 +770,11 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
||||
// In writeRecordsToLogFiles, no commit files are getting added, so resetting file-system view state
|
||||
((SyncableFileSystemView) (table.getRTFileSystemView())).reset();
|
||||
((SyncableFileSystemView) (table.getSliceView())).reset();
|
||||
|
||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||
List<FileSlice> groupedLogFiles =
|
||||
table.getRTFileSystemView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
||||
table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
||||
for (FileSlice fileSlice : groupedLogFiles) {
|
||||
assertEquals("There should be 1 log file written for every data file", 1, fileSlice.getLogFiles().count());
|
||||
}
|
||||
@@ -800,9 +800,9 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||
List<FileSlice> groupedLogFiles =
|
||||
table.getRTFileSystemView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
||||
table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
|
||||
for (FileSlice slice : groupedLogFiles) {
|
||||
assertEquals("After compaction there should be no log files visiable on a Realtime view", 0, slice.getLogFiles().count());
|
||||
assertEquals("After compaction there should be no log files visible on a full view", 0, slice.getLogFiles().count());
|
||||
}
|
||||
List<WriteStatus> writeStatuses = result.collect();
|
||||
assertTrue(writeStatuses.stream().anyMatch(writeStatus -> writeStatus.getStat().getPartitionPath().contentEquals(partitionPath)));
|
||||
@@ -827,12 +827,12 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
HoodieTable table =
|
||||
HoodieTable.getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath), config, jsc);
|
||||
RealtimeView tableRTFileSystemView = table.getRTFileSystemView();
|
||||
SliceView tableRTFileSystemView = table.getSliceView();
|
||||
|
||||
long numLogFiles = 0;
|
||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||
assertEquals(0, tableRTFileSystemView.getLatestFileSlices(partitionPath)
|
||||
.filter(fileSlice -> fileSlice.getDataFile().isPresent()).count());
|
||||
.filter(fileSlice -> fileSlice.getBaseFile().isPresent()).count());
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
|
||||
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
|
||||
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
|
||||
@@ -903,11 +903,11 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
||||
RealtimeView tableRTFileSystemView = table.getRTFileSystemView();
|
||||
SliceView tableRTFileSystemView = table.getSliceView();
|
||||
|
||||
long numLogFiles = 0;
|
||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getDataFile().isPresent()));
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
|
||||
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
|
||||
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
|
||||
@@ -940,11 +940,11 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
|
||||
HoodieTable table =
|
||||
HoodieTable.getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath), config, jsc);
|
||||
RealtimeView tableRTFileSystemView = table.getRTFileSystemView();
|
||||
SliceView tableRTFileSystemView = table.getSliceView();
|
||||
|
||||
long numLogFiles = 0;
|
||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getDataFile().isPresent()));
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
|
||||
numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath)
|
||||
.filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
|
||||
@@ -961,12 +961,12 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
||||
// Trigger a rollback of compaction
|
||||
writeClient.rollback(newCommitTime);
|
||||
table = HoodieTable.getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath), config, jsc);
|
||||
tableRTFileSystemView = table.getRTFileSystemView();
|
||||
tableRTFileSystemView = table.getSliceView();
|
||||
((SyncableFileSystemView) tableRTFileSystemView).reset();
|
||||
Option<HoodieInstant> lastInstant = ((SyncableFileSystemView) tableRTFileSystemView).getLastInstant();
|
||||
System.out.println("Last Instant =" + lastInstant);
|
||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getDataFile().isPresent()));
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
|
||||
Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user