Refactoring HoodieTableFileSystemView using FileGroups/FileSlices
- Merged all filter* and get* methods - new constructor takes filestatus[] - All existing tests pass - FileGroup is all files that belong to a fileID within a partition - FileSlice is a generation of data and log files, starting at a base commit
This commit is contained in:
@@ -16,21 +16,21 @@
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import com.esotericsoftware.kryo.Kryo;
|
||||
import com.esotericsoftware.kryo.io.Input;
|
||||
import com.esotericsoftware.kryo.io.Output;
|
||||
import com.esotericsoftware.kryo.serializers.JavaSerializer;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.uber.hoodie.common.table.HoodieTableConfig;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFile;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFormat;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFormat.Writer;
|
||||
import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
|
||||
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
package com.uber.hoodie.common.table.log;
|
||||
|
||||
import com.uber.hoodie.common.minicluster.MiniClusterUtil;
|
||||
import com.uber.hoodie.common.model.HoodieLogFile;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFormat.Reader;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFormat.Writer;
|
||||
|
||||
@@ -19,6 +19,7 @@ package com.uber.hoodie.common.table.view;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.uber.hoodie.common.model.HoodieDataFile;
|
||||
import com.uber.hoodie.common.model.HoodieFileGroup;
|
||||
import com.uber.hoodie.common.model.HoodieTestUtils;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
@@ -43,7 +44,7 @@ import java.util.stream.Collectors;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
@SuppressWarnings("ResultOfMethodCallIgnored")
|
||||
public class ReadOptimizedTableViewTest {
|
||||
public class HoodieTableFileSystemViewTest {
|
||||
private HoodieTableMetaClient metaClient;
|
||||
private String basePath;
|
||||
private TableFileSystemView fsView;
|
||||
@@ -58,10 +59,16 @@ public class ReadOptimizedTableViewTest {
|
||||
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants());
|
||||
}
|
||||
|
||||
private void refreshFsView() {
|
||||
private void refreshFsView(FileStatus[] statuses) {
|
||||
metaClient = new HoodieTableMetaClient(HoodieTestUtils.fs, basePath, true);
|
||||
fsView = new HoodieTableFileSystemView(metaClient,
|
||||
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants());
|
||||
if (statuses != null) {
|
||||
fsView = new HoodieTableFileSystemView(metaClient,
|
||||
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants(),
|
||||
statuses);
|
||||
} else {
|
||||
fsView = new HoodieTableFileSystemView(metaClient,
|
||||
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -71,42 +78,51 @@ public class ReadOptimizedTableViewTest {
|
||||
String fileId = UUID.randomUUID().toString();
|
||||
|
||||
assertFalse("No commit, should not find any data file",
|
||||
fsView.getLatestDataFilesForFileId(partitionPath, fileId).findFirst().isPresent());
|
||||
fsView.getLatestDataFiles(partitionPath)
|
||||
.filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().isPresent());
|
||||
|
||||
// Only one commit, but is not safe
|
||||
String commitTime1 = "1";
|
||||
String fileName1 = FSUtils.makeDataFileName(commitTime1, 1, fileId);
|
||||
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
|
||||
refreshFsView();
|
||||
refreshFsView(null);
|
||||
assertFalse("No commit, should not find any data file",
|
||||
fsView.getLatestDataFilesForFileId(partitionPath, fileId).findFirst().isPresent());
|
||||
fsView.getLatestDataFiles(partitionPath)
|
||||
.filter(dfile -> dfile.getFileId().equals(fileId))
|
||||
.findFirst().isPresent());
|
||||
|
||||
// Make this commit safe
|
||||
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
|
||||
HoodieInstant instant1 =
|
||||
new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
|
||||
commitTimeline.saveAsComplete(instant1, Optional.empty());
|
||||
refreshFsView();
|
||||
assertEquals("", fileName1,
|
||||
fsView.getLatestDataFilesForFileId(partitionPath, fileId).findFirst().get()
|
||||
refreshFsView(null);
|
||||
assertEquals("", fileName1, fsView
|
||||
.getLatestDataFiles(partitionPath)
|
||||
.filter(dfile -> dfile.getFileId().equals(fileId))
|
||||
.findFirst().get()
|
||||
.getFileName());
|
||||
|
||||
// Do another commit, but not safe
|
||||
String commitTime2 = "2";
|
||||
String fileName2 = FSUtils.makeDataFileName(commitTime2, 1, fileId);
|
||||
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
|
||||
refreshFsView();
|
||||
assertEquals("", fileName1,
|
||||
fsView.getLatestDataFilesForFileId(partitionPath, fileId).findFirst().get()
|
||||
refreshFsView(null);
|
||||
assertEquals("", fileName1, fsView
|
||||
.getLatestDataFiles(partitionPath)
|
||||
.filter(dfile -> dfile.getFileId().equals(fileId))
|
||||
.findFirst().get()
|
||||
.getFileName());
|
||||
|
||||
// Make it safe
|
||||
HoodieInstant instant2 =
|
||||
new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime2);
|
||||
commitTimeline.saveAsComplete(instant2, Optional.empty());
|
||||
refreshFsView();
|
||||
assertEquals("", fileName2,
|
||||
fsView.getLatestDataFilesForFileId(partitionPath, fileId).findFirst().get()
|
||||
refreshFsView(null);
|
||||
assertEquals("", fileName2, fsView
|
||||
.getLatestDataFiles(partitionPath)
|
||||
.filter(dfile -> dfile.getFileId().equals(fileId))
|
||||
.findFirst().get()
|
||||
.getFileName());
|
||||
}
|
||||
|
||||
@@ -147,13 +163,13 @@ public class ReadOptimizedTableViewTest {
|
||||
FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
|
||||
assertEquals(statuses.length, 7);
|
||||
|
||||
refreshFsView();
|
||||
List<HoodieDataFile> statuses1 =
|
||||
fsView.getLatestVersionInPartition("2016/05/01", commitTime4)
|
||||
refreshFsView(null);
|
||||
List<HoodieDataFile> dataFileList =
|
||||
fsView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime4)
|
||||
.collect(Collectors.toList());
|
||||
assertEquals(statuses1.size(), 3);
|
||||
assertEquals(dataFileList.size(), 3);
|
||||
Set<String> filenames = Sets.newHashSet();
|
||||
for (HoodieDataFile status : statuses1) {
|
||||
for (HoodieDataFile status : dataFileList) {
|
||||
filenames.add(status.getFileName());
|
||||
}
|
||||
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId1)));
|
||||
@@ -162,7 +178,7 @@ public class ReadOptimizedTableViewTest {
|
||||
|
||||
// Reset the max commit time
|
||||
List<HoodieDataFile> statuses2 =
|
||||
fsView.getLatestVersionInPartition("2016/05/01", commitTime3)
|
||||
fsView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime3)
|
||||
.collect(Collectors.toList());
|
||||
assertEquals(statuses2.size(), 3);
|
||||
filenames = Sets.newHashSet();
|
||||
@@ -211,18 +227,18 @@ public class ReadOptimizedTableViewTest {
|
||||
FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
|
||||
assertEquals(statuses.length, 7);
|
||||
|
||||
refreshFsView();
|
||||
List<List<HoodieDataFile>> statuses1 =
|
||||
fsView.getEveryVersionInPartition("2016/05/01").collect(Collectors.toList());
|
||||
assertEquals(statuses1.size(), 3);
|
||||
refreshFsView(null);
|
||||
List<HoodieFileGroup> fileGroups =
|
||||
fsView.getAllFileGroups("2016/05/01").collect(Collectors.toList());
|
||||
assertEquals(fileGroups.size(), 3);
|
||||
|
||||
for (List<HoodieDataFile> status : statuses1) {
|
||||
String fileId = status.get(0).getFileId();
|
||||
for (HoodieFileGroup fileGroup : fileGroups) {
|
||||
String fileId = fileGroup.getId();
|
||||
Set<String> filenames = Sets.newHashSet();
|
||||
for (HoodieDataFile dataFile : status) {
|
||||
fileGroup.getAllDataFiles().forEach(dataFile -> {
|
||||
assertEquals("All same fileId should be grouped", fileId, dataFile.getFileId());
|
||||
filenames.add(dataFile.getFileName());
|
||||
}
|
||||
});
|
||||
if (fileId.equals(fileId1)) {
|
||||
assertEquals(filenames,
|
||||
Sets.newHashSet(FSUtils.makeDataFileName(commitTime1, 1, fileId1),
|
||||
@@ -277,9 +293,9 @@ public class ReadOptimizedTableViewTest {
|
||||
FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
|
||||
assertEquals(statuses.length, 7);
|
||||
|
||||
refreshFsView();
|
||||
refreshFsView(statuses);
|
||||
List<HoodieDataFile> statuses1 = fsView
|
||||
.getLatestVersionInRange(statuses, Lists.newArrayList(commitTime2, commitTime3))
|
||||
.getLatestDataFilesInRange(Lists.newArrayList(commitTime2, commitTime3))
|
||||
.collect(Collectors.toList());
|
||||
assertEquals(statuses1.size(), 2);
|
||||
Set<String> filenames = Sets.newHashSet();
|
||||
@@ -293,7 +309,8 @@ public class ReadOptimizedTableViewTest {
|
||||
@Test
|
||||
public void streamLatestVersionsBefore() throws IOException {
|
||||
// Put some files in the partition
|
||||
String fullPartitionPath = basePath + "/2016/05/01/";
|
||||
String partitionPath = "2016/05/01/";
|
||||
String fullPartitionPath = basePath + "/" + partitionPath;
|
||||
new File(fullPartitionPath).mkdirs();
|
||||
String commitTime1 = "1";
|
||||
String commitTime2 = "2";
|
||||
@@ -327,9 +344,9 @@ public class ReadOptimizedTableViewTest {
|
||||
FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
|
||||
assertEquals(statuses.length, 7);
|
||||
|
||||
refreshFsView();
|
||||
refreshFsView(null);
|
||||
List<HoodieDataFile> statuses1 =
|
||||
fsView.getLatestVersionsBeforeOrOn(statuses, commitTime2)
|
||||
fsView.getLatestDataFilesBeforeOrOn(partitionPath, commitTime2)
|
||||
.collect(Collectors.toList());
|
||||
assertEquals(statuses1.size(), 2);
|
||||
Set<String> filenames = Sets.newHashSet();
|
||||
@@ -344,7 +361,8 @@ public class ReadOptimizedTableViewTest {
|
||||
@Test
|
||||
public void streamLatestVersions() throws IOException {
|
||||
// Put some files in the partition
|
||||
String fullPartitionPath = basePath + "/2016/05/01/";
|
||||
String partitionPath = "2016/05/01/";
|
||||
String fullPartitionPath = basePath + "/" + partitionPath;
|
||||
new File(fullPartitionPath).mkdirs();
|
||||
String commitTime1 = "1";
|
||||
String commitTime2 = "2";
|
||||
@@ -378,9 +396,9 @@ public class ReadOptimizedTableViewTest {
|
||||
FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
|
||||
assertEquals(statuses.length, 7);
|
||||
|
||||
refreshFsView();
|
||||
refreshFsView(statuses);
|
||||
List<HoodieDataFile> statuses1 =
|
||||
fsView.getLatestVersions(statuses).collect(Collectors.toList());
|
||||
fsView.getLatestDataFiles().collect(Collectors.toList());
|
||||
assertEquals(statuses1.size(), 3);
|
||||
Set<String> filenames = Sets.newHashSet();
|
||||
for (HoodieDataFile status : statuses1) {
|
||||
Reference in New Issue
Block a user