1
0

[HUDI-296] Explore use of spotless to auto fix formatting errors (#945)

- Add spotless format fixing to project
- One time reformatting for conformity
- Build fails for formatting changes and mvn spotless:apply autofixes them
This commit is contained in:
leesf
2019-10-10 20:19:40 +08:00
committed by vinoth chandar
parent 834c591955
commit b19bed442d
381 changed files with 7350 additions and 9064 deletions

View File

@@ -39,14 +39,14 @@ public class HoodieCommonTestHarness {
/**
* Initializes basePath.
*/
*/
protected void initPath() {
this.basePath = folder.getRoot().getAbsolutePath();
}
/**
* Initializes an instance of {@link HoodieTableMetaClient} with a special table type
* specified by {@code getTableType()}.
* Initializes an instance of {@link HoodieTableMetaClient} with a special table type specified by
* {@code getTableType()}.
*
* @throws IOException
*/
@@ -67,8 +67,7 @@ public class HoodieCommonTestHarness {
return new HoodieTableFileSystemView(metaClient, timeline, enableIncrementalTimelineSync);
}
protected SyncableFileSystemView getFileSystemView(HoodieTableMetaClient metaClient)
throws IOException {
protected SyncableFileSystemView getFileSystemView(HoodieTableMetaClient metaClient) throws IOException {
return getFileSystemView(metaClient, metaClient.getActiveTimeline().filterCompletedAndCompactionInstants());
}
@@ -78,8 +77,8 @@ public class HoodieCommonTestHarness {
}
/**
* Gets a default {@link HoodieTableType#COPY_ON_WRITE} table type.
* Sub-classes can override this method to specify a new table type.
* Gets a default {@link HoodieTableType#COPY_ON_WRITE} table type. Sub-classes can override this method to specify a
* new table type.
*
* @return an instance of Hoodie table type.
*/

View File

@@ -107,7 +107,7 @@ public class HdfsTestService {
* exist.
*
* @param localDFSLocation The location on the local FS to hold the HDFS metadata and block data
* @param clean Specifies if we want to start a clean cluster
* @param clean Specifies if we want to start a clean cluster
* @return Returns true if we should format a DFSCluster, otherwise false
*/
private static boolean shouldFormatDFSCluster(String localDFSLocation, boolean clean) {
@@ -122,9 +122,9 @@ public class HdfsTestService {
/**
* Configure the DFS Cluster before launching it.
*
* @param config The already created Hadoop configuration we'll further configure for HDFS
* @param config The already created Hadoop configuration we'll further configure for HDFS
* @param localDFSLocation The location on the local filesystem where cluster data is stored
* @param bindIP An IP address we want to force the datanode and namenode to bind to.
* @param bindIP An IP address we want to force the datanode and namenode to bind to.
* @return The updated Configuration object.
*/
private static Configuration configureDFSCluster(Configuration config, String localDFSLocation, String bindIP,

View File

@@ -49,8 +49,7 @@ public class AvroBinaryTestPayload implements HoodieRecordPayload {
}
@Override
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
throws IOException {
public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException {
return getInsertValue(schema);
}

View File

@@ -90,8 +90,7 @@ public class HoodieTestUtils {
return new Configuration();
}
public static HoodieTableMetaClient init(String basePath)
throws IOException {
public static HoodieTableMetaClient init(String basePath) throws IOException {
return init(basePath, HoodieTableType.COPY_ON_WRITE);
}
@@ -99,8 +98,7 @@ public class HoodieTestUtils {
return init(getDefaultHadoopConf(), basePath, tableType);
}
public static HoodieTableMetaClient init(Configuration hadoopConf, String basePath)
throws IOException {
public static HoodieTableMetaClient init(Configuration hadoopConf, String basePath) throws IOException {
return init(hadoopConf, basePath, HoodieTableType.COPY_ON_WRITE);
}
@@ -121,7 +119,7 @@ public class HoodieTestUtils {
for (String commitTime : commitTimes) {
new File(
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCommitFileName(commitTime))
.createNewFile();
.createNewFile();
}
}
@@ -129,7 +127,7 @@ public class HoodieTestUtils {
for (String commitTime : commitTimes) {
new File(
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeDeltaFileName(commitTime))
.createNewFile();
.createNewFile();
}
}
@@ -139,17 +137,16 @@ public class HoodieTestUtils {
public static final void createInflightCommitFiles(String basePath, String... commitTimes) throws IOException {
for (String commitTime : commitTimes) {
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeInflightCommitFileName(
commitTime)).createNewFile();
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
+ HoodieTimeline.makeInflightCommitFileName(commitTime)).createNewFile();
}
}
public static final void createInflightCleanFiles(String basePath, Configuration configuration, String... commitTimes)
throws IOException {
for (String commitTime : commitTimes) {
Path commitFile = new Path((basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline
.makeInflightCleanerFileName(
commitTime)));
Path commitFile = new Path((basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
+ HoodieTimeline.makeInflightCleanerFileName(commitTime)));
FileSystem fs = FSUtils.getFs(basePath, configuration);
FSDataOutputStream os = fs.create(commitFile, true);
}
@@ -181,8 +178,8 @@ public class HoodieTestUtils {
public static final String createMarkerFile(String basePath, String partitionPath, String commitTime, String fileID)
throws IOException {
String folderPath = basePath + "/" + HoodieTableMetaClient.TEMPFOLDER_NAME + "/" + commitTime + "/"
+ partitionPath + "/";
String folderPath =
basePath + "/" + HoodieTableMetaClient.TEMPFOLDER_NAME + "/" + commitTime + "/" + partitionPath + "/";
new File(folderPath).mkdirs();
File f = new File(folderPath + FSUtils.makeMarkerFile(commitTime, DEFAULT_WRITE_TOKEN, fileID));
f.createNewFile();
@@ -196,10 +193,8 @@ public class HoodieTestUtils {
if (!makeDir) {
throw new IOException("cannot create directory for path " + folderPath);
}
boolean createFile = fs.createNewFile(new Path(
folderPath + FSUtils
.makeLogFileName(fileID, ".log", commitTime, version.orElse(DEFAULT_LOG_VERSION),
HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
boolean createFile = fs.createNewFile(new Path(folderPath + FSUtils.makeLogFileName(fileID, ".log", commitTime,
version.orElse(DEFAULT_LOG_VERSION), HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
if (!createFile) {
throw new IOException(
StringUtils.format("cannot create data file for commit %s and fileId %s", commitTime, fileID));
@@ -210,9 +205,8 @@ public class HoodieTestUtils {
public static final void createCompactionCommitFiles(FileSystem fs, String basePath, String... commitTimes)
throws IOException {
for (String commitTime : commitTimes) {
boolean createFile = fs.createNewFile(new Path(
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline
.makeCommitFileName(commitTime)));
boolean createFile = fs.createNewFile(new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
+ HoodieTimeline.makeCommitFileName(commitTime)));
if (!createFile) {
throw new IOException("cannot create commit file for commit " + commitTime);
}
@@ -222,15 +216,13 @@ public class HoodieTestUtils {
public static final void createCompactionRequest(HoodieTableMetaClient metaClient, String instant,
List<Pair<String, FileSlice>> fileSliceList) throws IOException {
HoodieCompactionPlan plan = CompactionUtils.buildFromFileSlices(fileSliceList, Option.empty(), Option.empty());
HoodieInstant compactionInstant =
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instant);
HoodieInstant compactionInstant = new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instant);
metaClient.getActiveTimeline().saveToCompactionRequested(compactionInstant,
AvroUtils.serializeCompactionPlan(plan));
}
public static final String getDataFilePath(String basePath, String partitionPath, String commitTime, String fileID) {
return basePath + "/" + partitionPath + "/" + FSUtils
.makeDataFileName(commitTime, DEFAULT_WRITE_TOKEN, fileID);
return basePath + "/" + partitionPath + "/" + FSUtils.makeDataFileName(commitTime, DEFAULT_WRITE_TOKEN, fileID);
}
public static final String getLogFilePath(String basePath, String partitionPath, String commitTime, String fileID,
@@ -266,13 +258,13 @@ public class HoodieTestUtils {
public static final boolean doesCommitExist(String basePath, String commitTime) {
return new File(
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + HoodieTimeline.COMMIT_EXTENSION)
.exists();
.exists();
}
public static final boolean doesInflightExist(String basePath, String commitTime) {
return new File(
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + HoodieTimeline.INFLIGHT_EXTENSION)
.exists();
.exists();
}
public static void createCleanFiles(String basePath, String commitTime, Configuration configuration)
@@ -286,8 +278,8 @@ public class HoodieTestUtils {
DEFAULT_PARTITION_PATHS[rand.nextInt(DEFAULT_PARTITION_PATHS.length)], new ArrayList<>(), new ArrayList<>(),
new ArrayList<>(), commitTime);
// Create the clean metadata
HoodieCleanMetadata cleanMetadata = AvroUtils.convertCleanMetadata(commitTime, Option.of(0L),
Arrays.asList(cleanStats));
HoodieCleanMetadata cleanMetadata =
AvroUtils.convertCleanMetadata(commitTime, Option.of(0L), Arrays.asList(cleanStats));
// Write empty clean metadata
os.write(AvroUtils.serializeCleanMetadata(cleanMetadata).get());
} finally {
@@ -335,8 +327,8 @@ public class HoodieTestUtils {
public static void writeRecordsToLogFiles(FileSystem fs, String basePath, Schema schema,
List<HoodieRecord> updatedRecords) {
Map<HoodieRecordLocation, List<HoodieRecord>> groupedUpdated = updatedRecords.stream().collect(
Collectors.groupingBy(HoodieRecord::getCurrentLocation));
Map<HoodieRecordLocation, List<HoodieRecord>> groupedUpdated =
updatedRecords.stream().collect(Collectors.groupingBy(HoodieRecord::getCurrentLocation));
groupedUpdated.entrySet().forEach(s -> {
HoodieRecordLocation location = s.getKey();

View File

@@ -36,8 +36,8 @@ public class TestHoodieCommitMetadata {
Assert.assertTrue(commitMetadata.getTotalLogFilesCompacted() > 0);
String serializedCommitMetadata = commitMetadata.toJsonString();
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(serializedCommitMetadata,
HoodieCommitMetadata.class);
HoodieCommitMetadata metadata =
HoodieCommitMetadata.fromJsonString(serializedCommitMetadata, HoodieCommitMetadata.class);
// Make sure timing metrics are not written to instant file
Assert.assertTrue(metadata.getTotalScanTime() == 0);
Assert.assertTrue(metadata.getTotalLogFilesCompacted() > 0);

View File

@@ -42,9 +42,9 @@ public class TestHoodieRecord {
@Before
public void setUp() throws Exception {
final List<IndexedRecord> indexedRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1);
final List<HoodieRecord> hoodieRecords = indexedRecords.stream()
.map(r -> new HoodieRecord(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
new AvroBinaryTestPayload(Option.of((GenericRecord) r)))).collect(Collectors.toList());
final List<HoodieRecord> hoodieRecords =
indexedRecords.stream().map(r -> new HoodieRecord(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
new AvroBinaryTestPayload(Option.of((GenericRecord) r)))).collect(Collectors.toList());
hoodieRecord = hoodieRecords.get(0);
}

View File

@@ -58,8 +58,8 @@ public class HoodieTableMetaClientTest extends HoodieCommonTestHarness {
@Test
public void checkSerDe() throws IOException, ClassNotFoundException {
// check if this object is serialized and de-serialized, we are able to read from the file system
HoodieTableMetaClient deseralizedMetaClient = HoodieTestUtils
.serializeDeserialize(metaClient, HoodieTableMetaClient.class);
HoodieTableMetaClient deseralizedMetaClient =
HoodieTestUtils.serializeDeserialize(metaClient, HoodieTableMetaClient.class);
assertNotNull(deseralizedMetaClient);
HoodieActiveTimeline commitTimeline = deseralizedMetaClient.getActiveTimeline();
HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
@@ -99,10 +99,9 @@ public class HoodieTableMetaClientTest extends HoodieCommonTestHarness {
@Test
public void checkArchiveCommitTimeline() throws IOException {
Path archiveLogPath = HoodieArchivedTimeline.getArchiveLogPath(metaClient.getArchivePath());
SequenceFile.Writer writer = SequenceFile
.createWriter(metaClient.getHadoopConf(), SequenceFile.Writer.file(archiveLogPath),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class));
SequenceFile.Writer writer =
SequenceFile.createWriter(metaClient.getHadoopConf(), SequenceFile.Writer.file(archiveLogPath),
SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class));
writer.append(new Text("1"), new Text("data1"));
writer.append(new Text("2"), new Text("data2"));

View File

@@ -52,8 +52,8 @@ import org.junit.Test;
/**
* This class is intentionally using a different way of setting up the MiniDFSCluster and not relying on
* {@link MiniClusterUtil} to reproduce append() issue : https://issues.apache.org/jira/browse/HDFS-6325
* Reference : https://issues.apache.org/jira/secure/attachment/12645053/HDFS-6325.patch
* {@link MiniClusterUtil} to reproduce append() issue : https://issues.apache.org/jira/browse/HDFS-6325 Reference :
* https://issues.apache.org/jira/secure/attachment/12645053/HDFS-6325.patch
*/
public class HoodieLogFormatAppendFailureTest {
@@ -83,8 +83,8 @@ public class HoodieLogFormatAppendFailureTest {
}
@Test(timeout = 60000)
public void testFailedToGetAppendStreamFromHDFSNameNode() throws IOException, URISyntaxException,
InterruptedException, TimeoutException {
public void testFailedToGetAppendStreamFromHDFSNameNode()
throws IOException, URISyntaxException, InterruptedException, TimeoutException {
// Use some fs like LocalFileSystem, that does not support appends
String uuid = UUID.randomUUID().toString();
@@ -101,9 +101,8 @@ public class HoodieLogFormatAppendFailureTest {
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits"
+ ".archive").overBaseCommit("")
.withFs(fs).build();
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits" + ".archive")
.overBaseCommit("").withFs(fs).build();
writer = writer.appendBlock(dataBlock);
// get the current log file version to compare later
@@ -134,8 +133,7 @@ public class HoodieLogFormatAppendFailureTest {
// return a new writer with a bumped up logVersion
writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits" + ".archive")
.overBaseCommit("")
.withFs(fs).build();
.overBaseCommit("").withFs(fs).build();
// The log version should be different for this new writer
Assert.assertFalse(writer.getLogFile().getLogVersion() == logFileVersion);
}

View File

@@ -91,7 +91,7 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
@Parameterized.Parameters(name = "LogBlockReadMode")
public static Collection<Boolean[]> data() {
return Arrays.asList(new Boolean[][]{{true}, {false}});
return Arrays.asList(new Boolean[][] {{true}, {false}});
}
@BeforeClass
@@ -122,9 +122,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
@Test
public void testEmptyLog() throws IOException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
assertEquals("Just created this log, size should be 0", 0, writer.getCurrentSize());
assertTrue("Check all log files should start with a .", writer.getLogFile().getFileName().startsWith("."));
assertEquals("Version should be 1 for new log created", 1, writer.getLogFile().getLogVersion());
@@ -132,9 +132,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
@Test
public void testBasicAppend() throws IOException, InterruptedException, URISyntaxException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
@@ -150,9 +150,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
@Test
public void testRollover() throws IOException, InterruptedException, URISyntaxException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
@@ -165,9 +165,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
writer.close();
// Create a writer with the size threshold as the size we just wrote - so this has to roll
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).withSizeThreshold(size - 1).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).withSizeThreshold(size - 1).build();
records = SchemaTestUtil.generateTestRecords(0, 100);
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
dataBlock = new HoodieAvroDataBlock(records, header);
@@ -194,11 +194,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
private void testConcurrentAppend(boolean logFileExists, boolean newLogFileFormat) throws Exception {
HoodieLogFormat.WriterBuilder builder1 = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs);
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs);
HoodieLogFormat.WriterBuilder builder2 = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs);
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs);
if (newLogFileFormat && logFileExists) {
// Assume there is an existing log-file with write token
@@ -236,9 +234,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
@Test
public void testMultipleAppend() throws IOException, URISyntaxException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
@@ -248,9 +246,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
long size1 = writer.getCurrentSize();
writer.close();
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
records = SchemaTestUtil.generateTestRecords(0, 100);
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
dataBlock = new HoodieAvroDataBlock(records, header);
@@ -262,9 +260,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
writer.close();
// Close and Open again and append 100 more records
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
records = SchemaTestUtil.generateTestRecords(0, 100);
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
dataBlock = new HoodieAvroDataBlock(records, header);
@@ -285,40 +283,30 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
}
/**
* This is actually a test on concurrent append and not recovery lease.
* Commenting this out.
* This is actually a test on concurrent append and not recovery lease. Commenting this out.
* https://issues.apache.org/jira/browse/HUDI-117
*/
/**
@Test
public void testLeaseRecovery() throws IOException, URISyntaxException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);
writer = writer.appendBlock(dataBlock);
long size1 = writer.getCurrentSize();
// do not close this writer - this simulates a data note appending to a log dying without closing the file
// writer.close();
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
records = SchemaTestUtil.generateTestRecords(0, 100);
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
dataBlock = new HoodieAvroDataBlock(records, header);
writer = writer.appendBlock(dataBlock);
long size2 = writer.getCurrentSize();
assertTrue("We just wrote a new block - size2 should be > size1", size2 > size1);
assertEquals("Write should be auto-flushed. The size reported by FileStatus and the writer should match", size2,
fs.getFileStatus(writer.getLogFile().getPath()).getLen());
writer.close();
}
**/
* @Test public void testLeaseRecovery() throws IOException, URISyntaxException, InterruptedException { Writer writer
* = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
* .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
* .overBaseCommit("100").withFs(fs).build(); List<IndexedRecord> records =
* SchemaTestUtil.generateTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header =
* Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
* header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieAvroDataBlock
* dataBlock = new HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size1 =
* writer.getCurrentSize(); // do not close this writer - this simulates a data note appending to a log dying
* without closing the file // writer.close();
*
* writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
* .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
* .withFs(fs).build(); records = SchemaTestUtil.generateTestRecords(0, 100);
* header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); dataBlock = new
* HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size2 =
* writer.getCurrentSize(); assertTrue("We just wrote a new block - size2 should be > size1", size2 > size1);
* assertEquals("Write should be auto-flushed. The size reported by FileStatus and the writer should match",
* size2, fs.getFileStatus(writer.getLogFile().getPath()).getLen()); writer.close(); }
**/
@Test
public void testAppendNotSupported() throws IOException, URISyntaxException, InterruptedException {
@@ -349,13 +337,13 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
@SuppressWarnings("unchecked")
@Test
public void testBasicWriteAndScan() throws IOException, URISyntaxException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
Schema schema = getSimpleSchema();
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
List<IndexedRecord> copyOfRecords = records.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords = records.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
@@ -378,13 +366,13 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
@SuppressWarnings("unchecked")
@Test
public void testBasicAppendAndRead() throws IOException, URISyntaxException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
List<IndexedRecord> records1 = SchemaTestUtil.generateTestRecords(0, 100);
Schema schema = getSimpleSchema();
List<IndexedRecord> copyOfRecords1 = records1.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords1 = records1.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
@@ -392,24 +380,24 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
writer = writer.appendBlock(dataBlock);
writer.close();
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
List<IndexedRecord> records2 = SchemaTestUtil.generateTestRecords(0, 100);
List<IndexedRecord> copyOfRecords2 = records2.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords2 = records2.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
dataBlock = new HoodieAvroDataBlock(records2, header);
writer = writer.appendBlock(dataBlock);
writer.close();
// Close and Open again and append 100 more records
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
List<IndexedRecord> records3 = SchemaTestUtil.generateTestRecords(0, 100);
List<IndexedRecord> copyOfRecords3 = records3.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords3 = records3.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
dataBlock = new HoodieAvroDataBlock(records3, header);
writer = writer.appendBlock(dataBlock);
@@ -445,9 +433,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
@SuppressWarnings("unchecked")
@Test
public void testBasicAppendAndScanMultipleFiles() throws IOException, URISyntaxException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withSizeThreshold(1024)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withSizeThreshold(1024).withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
@@ -459,8 +447,8 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
while (writer.getLogFile().getLogVersion() != 4) {
logFiles.add(writer.getLogFile());
List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords1 = records1.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords1 = records1.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
allRecords.add(copyOfRecords1);
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1, header);
@@ -486,9 +474,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
@Test
public void testAppendAndReadOnCorruptedLog() throws IOException, URISyntaxException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
@@ -521,7 +509,7 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
HoodieLogBlock block = reader.next();
assertEquals("The read block should be a corrupt block", HoodieLogBlockType.CORRUPT_BLOCK, block.getBlockType());
HoodieCorruptBlock corruptBlock = (HoodieCorruptBlock) block;
//assertEquals("", "something-random", new String(corruptBlock.getCorruptedBytes()));
// assertEquals("", "something-random", new String(corruptBlock.getCorruptedBytes()));
assertFalse("There should be no more block left", reader.hasNext());
reader.close();
@@ -542,9 +530,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
outputStream.close();
// Should be able to append a new block
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
records = SchemaTestUtil.generateTestRecords(0, 100);
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
dataBlock = new HoodieAvroDataBlock(records, header);
@@ -561,7 +549,7 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
block = reader.next();
assertEquals("The read block should be a corrupt block", HoodieLogBlockType.CORRUPT_BLOCK, block.getBlockType());
corruptBlock = (HoodieCorruptBlock) block;
//assertEquals("", "something-else-random", new String(corruptBlock.getCorruptedBytes()));
// assertEquals("", "something-else-random", new String(corruptBlock.getCorruptedBytes()));
assertTrue("We should get the last block next", reader.hasNext());
reader.next();
assertFalse("We should have no more blocks left", reader.hasNext());
@@ -573,13 +561,13 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
public void testAvroLogRecordReaderBasic() throws IOException, URISyntaxException, InterruptedException {
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
// Set a small threshold so that every block is a new version
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).withSizeThreshold(500).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).withSizeThreshold(500).build();
// Write 1
List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords1 = records1.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords1 = records1.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
@@ -589,26 +577,27 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
// Write 2
List<IndexedRecord> records2 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords2 = records2.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords2 = records2.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
dataBlock = new HoodieAvroDataBlock(records2, header);
writer = writer.appendBlock(dataBlock);
writer.close();
List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
"100").map(s -> s.getPath().toString()).collect(Collectors.toList());
List<String> allLogFiles =
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
"100", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "100",
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
assertEquals("", 200, scanner.getTotalLogRecords());
Set<String> readKeys = new HashSet<>(200);
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
assertEquals("Stream collect should return all 200 records", 200, readKeys.size());
copyOfRecords1.addAll(copyOfRecords2);
Set<String> originalKeys = copyOfRecords1.stream()
.map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
.collect(Collectors.toSet());
Set<String> originalKeys =
copyOfRecords1.stream().map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
.collect(Collectors.toSet());
assertEquals("CompositeAvroLogReader should return 200 records from 2 versions", originalKeys, readKeys);
}
@@ -617,14 +606,14 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
throws IOException, URISyntaxException, InterruptedException {
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
// Set a small threshold so that every block is a new version
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
// Write 1
List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords1 = records1.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords1 = records1.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
@@ -649,26 +638,27 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
// Write 3
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "102");
List<IndexedRecord> records3 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords3 = records3.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords3 = records3.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
dataBlock = new HoodieAvroDataBlock(records3, header);
writer = writer.appendBlock(dataBlock);
writer.close();
List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
"100").map(s -> s.getPath().toString()).collect(Collectors.toList());
List<String> allLogFiles =
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
"102", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "102",
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
assertEquals("We read 200 records from 2 write batches", 200, scanner.getTotalLogRecords());
Set<String> readKeys = new HashSet<>(200);
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
assertEquals("Stream collect should return all 200 records", 200, readKeys.size());
copyOfRecords1.addAll(copyOfRecords3);
Set<String> originalKeys = copyOfRecords1.stream()
.map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
.collect(Collectors.toSet());
Set<String> originalKeys =
copyOfRecords1.stream().map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
.collect(Collectors.toSet());
assertEquals("CompositeAvroLogReader should return 200 records from 2 versions", originalKeys, readKeys);
}
@@ -677,14 +667,14 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
throws IOException, URISyntaxException, InterruptedException {
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
// Set a small threshold so that every block is a new version
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
// Write 1
List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords1 = records1.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords1 = records1.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
@@ -718,35 +708,36 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
writer = writer.appendBlock(commandBlock);
// Write 3
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "103");
List<IndexedRecord> records3 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords3 = records3.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords3 = records3.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
dataBlock = new HoodieAvroDataBlock(records3, header);
writer = writer.appendBlock(dataBlock);
writer.close();
List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
"100").map(s -> s.getPath().toString()).collect(Collectors.toList());
List<String> allLogFiles =
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
"103", 10240L, true, false, bufferSize, BASE_OUTPUT_PATH);
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "103",
10240L, true, false, bufferSize, BASE_OUTPUT_PATH);
assertEquals("We would read 200 records", 200, scanner.getTotalLogRecords());
Set<String> readKeys = new HashSet<>(200);
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
assertEquals("Stream collect should return all 200 records", 200, readKeys.size());
copyOfRecords1.addAll(copyOfRecords3);
Set<String> originalKeys = copyOfRecords1.stream()
.map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
.collect(Collectors.toSet());
Set<String> originalKeys =
copyOfRecords1.stream().map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
.collect(Collectors.toSet());
assertEquals("CompositeAvroLogReader should return 200 records from 2 versions", originalKeys, readKeys);
}
@@ -755,14 +746,14 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
throws IOException, URISyntaxException, InterruptedException {
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
// Set a small threshold so that every block is a new version
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
// Write 1
List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords1 = records1.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords1 = records1.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
@@ -772,18 +763,19 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
// Write 2
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101");
List<IndexedRecord> records2 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords2 = records2.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords2 = records2.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
dataBlock = new HoodieAvroDataBlock(records2, header);
writer = writer.appendBlock(dataBlock);
copyOfRecords1.addAll(copyOfRecords2);
List<String> originalKeys = copyOfRecords1.stream().map(
s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString()).collect(Collectors.toList());
List<String> originalKeys =
copyOfRecords1.stream().map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
.collect(Collectors.toList());
// Delete 50 keys
List<HoodieKey> deletedKeys = copyOfRecords1.stream().map(
s -> (new HoodieKey(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
List<HoodieKey> deletedKeys = copyOfRecords1.stream()
.map(s -> (new HoodieKey(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
((GenericRecord) s).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString())))
.collect(Collectors.toList()).subList(0, 50);
@@ -791,11 +783,12 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
writer = writer.appendBlock(deleteBlock);
List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
"100").map(s -> s.getPath().toString()).collect(Collectors.toList());
List<String> allLogFiles =
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
"102", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "102",
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
assertEquals("We still would read 200 records", 200, scanner.getTotalLogRecords());
final List<String> readKeys = new ArrayList<>(200);
final List<Boolean> emptyPayloads = new ArrayList<>();
@@ -825,8 +818,8 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
writer = writer.appendBlock(commandBlock);
readKeys.clear();
scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "101",
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "101", 10240L, readBlocksLazily,
false, bufferSize, BASE_OUTPUT_PATH);
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
assertEquals("Stream collect should return all 200 records after rollback of delete", 200, readKeys.size());
}
@@ -838,14 +831,14 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
// Write a Data block and Delete block with same InstantTime (written in same batch)
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
// Set a small threshold so that every block is a new version
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
// Write 1
List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords1 = records1.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords1 = records1.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "100");
@@ -860,13 +853,14 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
dataBlock = new HoodieAvroDataBlock(records2, header);
writer = writer.appendBlock(dataBlock);
List<String> originalKeys = copyOfRecords1.stream().map(
s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString()).collect(Collectors.toList());
List<String> originalKeys =
copyOfRecords1.stream().map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
.collect(Collectors.toList());
// Delete 50 keys
// Delete 50 keys
List<HoodieKey> deletedKeys = copyOfRecords1.stream().map(
s -> (new HoodieKey(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
List<HoodieKey> deletedKeys = copyOfRecords1.stream()
.map(s -> (new HoodieKey(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
((GenericRecord) s).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString())))
.collect(Collectors.toList()).subList(0, 50);
@@ -887,12 +881,13 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
// Attempt 2 : Write another rollback blocks for a failed write
writer = writer.appendBlock(commandBlock);
List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
"100").map(s -> s.getPath().toString()).collect(Collectors.toList());
List<String> allLogFiles =
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
// all data must be rolled back before merge
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
"100", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "100",
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
assertEquals("We would have scanned 0 records because of rollback", 0, scanner.getTotalLogRecords());
final List<String> readKeys = new ArrayList<>();
@@ -907,14 +902,14 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
// Write a Data block and Delete block with same InstantTime (written in same batch)
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
// Set a small threshold so that every block is a new version
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
// Write 1
List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords1 = records1.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords1 = records1.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "100");
@@ -922,13 +917,14 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1, header);
writer = writer.appendBlock(dataBlock);
List<String> originalKeys = copyOfRecords1.stream().map(
s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString()).collect(Collectors.toList());
List<String> originalKeys =
copyOfRecords1.stream().map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
.collect(Collectors.toList());
// Delete 50 keys
// Delete 50 keys
List<HoodieKey> deletedKeys = copyOfRecords1.stream().map(
s -> (new HoodieKey(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
List<HoodieKey> deletedKeys = copyOfRecords1.stream()
.map(s -> (new HoodieKey(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
((GenericRecord) s).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString())))
.collect(Collectors.toList()).subList(0, 50);
HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
@@ -941,11 +937,12 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
writer = writer.appendBlock(commandBlock);
writer = writer.appendBlock(commandBlock);
List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
"100").map(s -> s.getPath().toString()).collect(Collectors.toList());
List<String> allLogFiles =
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
"100", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "100",
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
assertEquals("We would read 0 records", 0, scanner.getTotalLogRecords());
}
@@ -954,9 +951,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
throws IOException, URISyntaxException, InterruptedException {
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
// Set a small threshold so that every block is a new version
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
// Write 1
List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
@@ -973,11 +970,12 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
writer = writer.appendBlock(commandBlock);
List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
"100").map(s -> s.getPath().toString()).collect(Collectors.toList());
List<String> allLogFiles =
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
"100", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "100",
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
assertEquals("We still would read 100 records", 100, scanner.getTotalLogRecords());
final List<String> readKeys = new ArrayList<>(100);
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
@@ -991,14 +989,14 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
// Write a 3 Data blocs with same InstantTime (written in same batch)
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
// Set a small threshold so that every block is a new version
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
// Write 1
List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<IndexedRecord> copyOfRecords1 = records1.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords1 = records1.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "100");
@@ -1008,13 +1006,14 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
writer = writer.appendBlock(dataBlock);
writer = writer.appendBlock(dataBlock);
List<String> originalKeys = copyOfRecords1.stream().map(
s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString()).collect(Collectors.toList());
List<String> originalKeys =
copyOfRecords1.stream().map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
.collect(Collectors.toList());
// Delete 50 keys
// Delete 50 keys
List<HoodieKey> deletedKeys = copyOfRecords1.stream().map(
s -> (new HoodieKey(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
List<HoodieKey> deletedKeys = copyOfRecords1.stream()
.map(s -> (new HoodieKey(((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(),
((GenericRecord) s).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString())))
.collect(Collectors.toList()).subList(0, 50);
HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
@@ -1027,11 +1026,12 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
writer = writer.appendBlock(commandBlock);
List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
"100").map(s -> s.getPath().toString()).collect(Collectors.toList());
List<String> allLogFiles =
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
"101", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "101",
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
assertEquals("We would read 0 records", 0, scanner.getTotalLogRecords());
}
@@ -1042,9 +1042,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
// Write a 3 Data blocs with same InstantTime (written in same batch)
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
// Set a small threshold so that every block is a new version
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
// Write 1
List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
@@ -1083,9 +1083,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
outputStream.flush();
outputStream.close();
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
writer = writer.appendBlock(dataBlock);
writer.close();
@@ -1103,9 +1103,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
outputStream.flush();
outputStream.close();
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
// Write 1 rollback block for the last commit instant
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101");
header.put(HeaderMetadataType.TARGET_INSTANT_TIME, "100");
@@ -1115,24 +1115,25 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
writer = writer.appendBlock(commandBlock);
writer.close();
List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
"100").map(s -> s.getPath().toString()).collect(Collectors.toList());
List<String> allLogFiles =
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
"101", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "101",
10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
assertEquals("We would read 0 records", 0, scanner.getTotalLogRecords());
}
@SuppressWarnings("unchecked")
@Test
public void testBasicAppendAndReadInReverse() throws IOException, URISyntaxException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
Schema schema = getSimpleSchema();
List<IndexedRecord> records1 = SchemaTestUtil.generateTestRecords(0, 100);
List<IndexedRecord> copyOfRecords1 = records1.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords1 = records1.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
@@ -1140,23 +1141,23 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
writer = writer.appendBlock(dataBlock);
writer.close();
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
List<IndexedRecord> records2 = SchemaTestUtil.generateTestRecords(0, 100);
List<IndexedRecord> copyOfRecords2 = records2.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords2 = records2.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
dataBlock = new HoodieAvroDataBlock(records2, header);
writer = writer.appendBlock(dataBlock);
writer.close();
// Close and Open again and append 100 more records
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
List<IndexedRecord> records3 = SchemaTestUtil.generateTestRecords(0, 100);
List<IndexedRecord> copyOfRecords3 = records3.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords3 = records3.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
dataBlock = new HoodieAvroDataBlock(records3, header);
writer = writer.appendBlock(dataBlock);
writer.close();
@@ -1195,9 +1196,9 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
@Test
public void testAppendAndReadOnCorruptedLogInReverse() throws IOException, URISyntaxException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
Schema schema = getSimpleSchema();
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
@@ -1225,17 +1226,17 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
outputStream.close();
// Should be able to append a new block
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
records = SchemaTestUtil.generateTestRecords(0, 100);
dataBlock = new HoodieAvroDataBlock(records, header);
writer = writer.appendBlock(dataBlock);
writer.close();
// First round of reads - we should be able to read the first block and then EOF
HoodieLogFileReader reader = new HoodieLogFileReader(fs, writer.getLogFile(), schema, bufferSize,
readBlocksLazily, true);
HoodieLogFileReader reader =
new HoodieLogFileReader(fs, writer.getLogFile(), schema, bufferSize, readBlocksLazily, true);
assertTrue("Last block should be available", reader.hasPrev());
HoodieLogBlock block = reader.prev();
@@ -1254,13 +1255,13 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
@SuppressWarnings("unchecked")
@Test
public void testBasicAppendAndTraverseInReverse() throws IOException, URISyntaxException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
Writer writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
Schema schema = getSimpleSchema();
List<IndexedRecord> records1 = SchemaTestUtil.generateTestRecords(0, 100);
List<IndexedRecord> copyOfRecords1 = records1.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords1 = records1.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
@@ -1268,23 +1269,23 @@ public class HoodieLogFormatTest extends HoodieCommonTestHarness {
writer = writer.appendBlock(dataBlock);
writer.close();
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
List<IndexedRecord> records2 = SchemaTestUtil.generateTestRecords(0, 100);
List<IndexedRecord> copyOfRecords2 = records2.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords2 = records2.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
dataBlock = new HoodieAvroDataBlock(records2, header);
writer = writer.appendBlock(dataBlock);
writer.close();
// Close and Open again and append 100 more records
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
List<IndexedRecord> records3 = SchemaTestUtil.generateTestRecords(0, 100);
List<IndexedRecord> copyOfRecords3 = records3.stream().map(
record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
List<IndexedRecord> copyOfRecords3 = records3.stream()
.map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
dataBlock = new HoodieAvroDataBlock(records3, header);
writer = writer.appendBlock(dataBlock);
writer.close();

View File

@@ -98,12 +98,10 @@ public class HoodieActiveTimelineTest extends HoodieCommonTestHarness {
public void testTimelineOperations() throws Exception {
timeline = new MockHoodieTimeline(Stream.of("01", "03", "05", "07", "09", "11", "13", "15", "17", "19"),
Stream.of("21", "23"));
HoodieTestUtils.assertStreamEquals("", Stream.of("05", "07", "09", "11"),
timeline.getCommitTimeline().filterCompletedInstants().findInstantsInRange("04", "11").getInstants()
.map(HoodieInstant::getTimestamp));
HoodieTestUtils.assertStreamEquals("", Stream.of("09", "11"),
timeline.getCommitTimeline().filterCompletedInstants().findInstantsAfter("07", 2).getInstants()
.map(HoodieInstant::getTimestamp));
HoodieTestUtils.assertStreamEquals("", Stream.of("05", "07", "09", "11"), timeline.getCommitTimeline()
.filterCompletedInstants().findInstantsInRange("04", "11").getInstants().map(HoodieInstant::getTimestamp));
HoodieTestUtils.assertStreamEquals("", Stream.of("09", "11"), timeline.getCommitTimeline().filterCompletedInstants()
.findInstantsAfter("07", 2).getInstants().map(HoodieInstant::getTimestamp));
assertFalse(timeline.empty());
assertFalse(timeline.getCommitTimeline().filterInflightsExcludingCompaction().empty());
assertEquals("", 12, timeline.countInstants());

View File

@@ -31,8 +31,9 @@ public class MockHoodieTimeline extends HoodieActiveTimeline {
public MockHoodieTimeline(Stream<String> completed, Stream<String> inflights) throws IOException {
super();
this.setInstants(Stream.concat(completed.map(s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)),
inflights.map(s -> new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, s)))
this.setInstants(Stream
.concat(completed.map(s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)),
inflights.map(s -> new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, s)))
.sorted(Comparator.comparing(new Function<HoodieInstant, String>() {
@Override
public String apply(HoodieInstant hoodieInstant) {

View File

@@ -96,8 +96,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
}
/**
* Test case for view generation on a file group where
* the only file-slice does not have data-file. This is the case where upserts directly go to log-files
* Test case for view generation on a file group where the only file-slice does not have data-file. This is the case
* where upserts directly go to log-files
*/
@Test
public void testViewForFileSlicesWithNoBaseFile() throws Exception {
@@ -113,10 +113,10 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
String instantTime1 = "1";
String deltaInstantTime1 = "2";
String deltaInstantTime2 = "3";
String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
instantTime1, 0, TEST_WRITE_TOKEN);
String fileName2 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
instantTime1, 1, TEST_WRITE_TOKEN);
String fileName1 =
FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
String fileName2 =
FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 1, TEST_WRITE_TOKEN);
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
@@ -144,8 +144,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
assertEquals("Log File Order check", fileName1, logFiles.get(1).getFileName());
// Check Merged File Slices API
fileSliceList = rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime2)
.collect(Collectors.toList());
fileSliceList =
rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime2).collect(Collectors.toList());
assertEquals(1, fileSliceList.size());
fileSlice = fileSliceList.get(0);
assertEquals("File-Id must be set correctly", fileId, fileSlice.getFileId());
@@ -198,41 +198,40 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
/**
* Returns all file-slices including uncommitted ones.
*
* @param partitionPath
* @return
*/
private Stream<FileSlice> getAllRawFileSlices(String partitionPath) {
return fsView.getAllFileGroups(partitionPath)
.map(group -> group.getAllFileSlicesIncludingInflight())
return fsView.getAllFileGroups(partitionPath).map(group -> group.getAllFileSlicesIncludingInflight())
.flatMap(sliceList -> sliceList);
}
/**
* Returns latest raw file-slices including uncommitted ones.
* Returns latest raw file-slices including uncommitted ones.
*
* @param partitionPath
* @return
*/
public Stream<FileSlice> getLatestRawFileSlices(String partitionPath) {
return fsView.getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getLatestFileSlicesIncludingInflight())
.filter(fileSliceOpt -> fileSliceOpt.isPresent())
.map(Option::get);
return fsView.getAllFileGroups(partitionPath).map(fileGroup -> fileGroup.getLatestFileSlicesIncludingInflight())
.filter(fileSliceOpt -> fileSliceOpt.isPresent()).map(Option::get);
}
/**
* Helper method to test Views in the presence of concurrent compaction
* @param skipCreatingDataFile if set, first File Slice will not have data-file set. This would
* simulate inserts going directly to log files
* @param isCompactionInFlight if set, compaction was inflight (running) when view was tested first time,
* otherwise compaction was in requested state
* @param expTotalFileSlices Total number of file-slices across file-groups in the partition path
* @param expTotalDataFiles Total number of data-files across file-groups in the partition path
*
* @param skipCreatingDataFile if set, first File Slice will not have data-file set. This would simulate inserts going
* directly to log files
* @param isCompactionInFlight if set, compaction was inflight (running) when view was tested first time, otherwise
* compaction was in requested state
* @param expTotalFileSlices Total number of file-slices across file-groups in the partition path
* @param expTotalDataFiles Total number of data-files across file-groups in the partition path
* @param includeInvalidAndInflight Whether view includes inflight and invalid file-groups.
* @throws Exception
*/
protected void testViewForFileSlicesWithAsyncCompaction(boolean skipCreatingDataFile,
boolean isCompactionInFlight, int expTotalFileSlices, int expTotalDataFiles,
boolean includeInvalidAndInflight) throws Exception {
protected void testViewForFileSlicesWithAsyncCompaction(boolean skipCreatingDataFile, boolean isCompactionInFlight,
int expTotalFileSlices, int expTotalDataFiles, boolean includeInvalidAndInflight) throws Exception {
String partitionPath = "2016/05/01";
new File(basePath + "/" + partitionPath).mkdirs();
String fileId = UUID.randomUUID().toString();
@@ -247,10 +246,10 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
dataFileName = FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId);
new File(basePath + "/" + partitionPath + "/" + dataFileName).createNewFile();
}
String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
instantTime1, 0, TEST_WRITE_TOKEN);
String fileName2 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
instantTime1, 1, TEST_WRITE_TOKEN);
String fileName1 =
FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
String fileName2 =
FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 1, TEST_WRITE_TOKEN);
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
@@ -268,8 +267,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
String compactDataFileName = FSUtils.makeDataFileName(compactionRequestedTime, TEST_WRITE_TOKEN, fileId);
List<Pair<String, FileSlice>> partitionFileSlicesPairs = new ArrayList<>();
partitionFileSlicesPairs.add(Pair.of(partitionPath, fileSlices.get(0)));
HoodieCompactionPlan compactionPlan = CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs,
Option.empty(), Option.empty());
HoodieCompactionPlan compactionPlan =
CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs, Option.empty(), Option.empty());
HoodieInstant compactionInstant = null;
if (isCompactionInFlight) {
// Create a Data-file but this should be skipped by view
@@ -297,10 +296,10 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
String deltaInstantTime5 = "6";
List<String> allInstantTimes = Arrays.asList(instantTime1, deltaInstantTime1, deltaInstantTime2,
compactionRequestedTime, deltaInstantTime4, deltaInstantTime5);
String fileName3 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
compactionRequestedTime, 0, TEST_WRITE_TOKEN);
String fileName4 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
compactionRequestedTime, 1, TEST_WRITE_TOKEN);
String fileName3 =
FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 0, TEST_WRITE_TOKEN);
String fileName4 =
FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 1, TEST_WRITE_TOKEN);
new File(basePath + "/" + partitionPath + "/" + fileName3).createNewFile();
new File(basePath + "/" + partitionPath + "/" + fileName4).createNewFile();
HoodieInstant deltaInstant4 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, deltaInstantTime4);
@@ -318,8 +317,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
}
/** Merge API Tests **/
List<FileSlice> fileSliceList = rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5)
.collect(Collectors.toList());
List<FileSlice> fileSliceList =
rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
assertEquals("Expect file-slice to be merged", 1, fileSliceList.size());
FileSlice fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId());
@@ -337,8 +336,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
assertEquals("Log File Order check", fileName2, logFiles.get(2).getFileName());
assertEquals("Log File Order check", fileName1, logFiles.get(3).getFileName());
fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, true)
.collect(Collectors.toList());
fileSliceList =
rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, true).collect(Collectors.toList());
assertEquals("Expect only one file-id", 1, fileSliceList.size());
fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId());
@@ -350,7 +349,7 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
assertEquals("Log File Order check", fileName4, logFiles.get(0).getFileName());
assertEquals("Log File Order check", fileName3, logFiles.get(1).getFileName());
/** Data Files API tests */
/** Data Files API tests */
dataFiles = roView.getLatestDataFiles().collect(Collectors.toList());
if (skipCreatingDataFile) {
assertEquals("Expect no data file to be returned", 0, dataFiles.size());
@@ -411,8 +410,9 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
inflightDeltaInstantTime, 0, TEST_WRITE_TOKEN);
new File(basePath + "/" + partitionPath + "/" + inflightLogFileName).createNewFile();
// Mark instant as inflight
commitTimeline.saveToInflight(new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION,
inflightDeltaInstantTime), Option.empty());
commitTimeline.saveToInflight(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, inflightDeltaInstantTime),
Option.empty());
refreshFsView();
List<FileSlice> allRawFileSlices = getAllRawFileSlices(partitionPath).collect(Collectors.toList());
@@ -424,8 +424,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
}).collect(Collectors.toList());
if (includeInvalidAndInflight) {
assertEquals("Inflight/Orphan data-file is also expected", 2
+ (isCompactionInFlight ? 1 : 0) + (skipCreatingDataFile ? 0 : 1), dataFiles.size());
assertEquals("Inflight/Orphan data-file is also expected",
2 + (isCompactionInFlight ? 1 : 0) + (skipCreatingDataFile ? 0 : 1), dataFiles.size());
Set<String> fileNames = dataFiles.stream().map(HoodieDataFile::getFileName).collect(Collectors.toSet());
assertTrue("Expect orphan data-file to be present", fileNames.contains(orphanDataFileName));
assertTrue("Expect inflight data-file to be present", fileNames.contains(inflightDataFileName));
@@ -438,8 +438,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
}
fileSliceList = getLatestRawFileSlices(partitionPath).collect(Collectors.toList());
assertEquals("Expect both inflight and orphan file-slice to be included",
includeInvalidAndInflight ? 5 : 1, fileSliceList.size());
assertEquals("Expect both inflight and orphan file-slice to be included", includeInvalidAndInflight ? 5 : 1,
fileSliceList.size());
Map<String, FileSlice> fileSliceMap =
fileSliceList.stream().collect(Collectors.toMap(FileSlice::getFileId, r -> r));
FileSlice orphanFileSliceWithDataFile = fileSliceMap.get(orphanFileId1);
@@ -465,8 +465,7 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
orphanFileSliceWithLogFile.getDataFile().isPresent());
logFiles = orphanFileSliceWithLogFile.getLogFiles().collect(Collectors.toList());
assertEquals("Orphan File Slice with log-file check data-file", 1, logFiles.size());
assertEquals("Orphan File Slice with log-file check data-file", orphanLogFileName,
logFiles.get(0).getFileName());
assertEquals("Orphan File Slice with log-file check data-file", orphanLogFileName, logFiles.get(0).getFileName());
assertEquals("Inflight File Slice with log-file check base-commit", inflightDeltaInstantTime,
inflightFileSliceWithLogFile.getBaseInstantTime());
assertFalse("Inflight File Slice with log-file check data-file",
@@ -495,46 +494,42 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
fileSliceList = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
log.info("FILESLICE LIST=" + fileSliceList);
dataFiles = fileSliceList.stream().map(FileSlice::getDataFile)
.filter(Option::isPresent).map(Option::get).collect(Collectors.toList());
dataFiles = fileSliceList.stream().map(FileSlice::getDataFile).filter(Option::isPresent).map(Option::get)
.collect(Collectors.toList());
assertEquals("Expect only one data-files in latest view as there is only one file-group", 1, dataFiles.size());
assertEquals("Data Filename must match", compactDataFileName, dataFiles.get(0).getFileName());
assertEquals("Only one latest file-slice in the partition", 1, fileSliceList.size());
fileSlice = fileSliceList.get(0);
assertEquals("Check file-Id is set correctly", fileId, fileSlice.getFileId());
assertEquals("Check data-filename is set correctly",
compactDataFileName, fileSlice.getDataFile().get().getFileName());
assertEquals("Ensure base-instant is now compaction request instant",
compactionRequestedTime, fileSlice.getBaseInstantTime());
assertEquals("Check data-filename is set correctly", compactDataFileName,
fileSlice.getDataFile().get().getFileName());
assertEquals("Ensure base-instant is now compaction request instant", compactionRequestedTime,
fileSlice.getBaseInstantTime());
logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
assertEquals("Only log-files after compaction request shows up", 2, logFiles.size());
assertEquals("Log File Order check", fileName4, logFiles.get(0).getFileName());
assertEquals("Log File Order check", fileName3, logFiles.get(1).getFileName());
/** Data Files API tests */
/** Data Files API tests */
dataFiles = roView.getLatestDataFiles().collect(Collectors.toList());
assertEquals("Expect only one data-file to be sent", 1, dataFiles.size());
dataFiles.stream().forEach(df -> {
assertEquals("Expect data-file created by compaction be returned", df.getCommitTime(),
compactionRequestedTime);
assertEquals("Expect data-file created by compaction be returned", df.getCommitTime(), compactionRequestedTime);
});
dataFiles = roView.getLatestDataFiles(partitionPath).collect(Collectors.toList());
assertEquals("Expect only one data-file to be sent", 1, dataFiles.size());
dataFiles.stream().forEach(df -> {
assertEquals("Expect data-file created by compaction be returned", df.getCommitTime(),
compactionRequestedTime);
assertEquals("Expect data-file created by compaction be returned", df.getCommitTime(), compactionRequestedTime);
});
dataFiles = roView.getLatestDataFilesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
assertEquals("Expect only one data-file to be sent", 1, dataFiles.size());
dataFiles.stream().forEach(df -> {
assertEquals("Expect data-file created by compaction be returned", df.getCommitTime(),
compactionRequestedTime);
assertEquals("Expect data-file created by compaction be returned", df.getCommitTime(), compactionRequestedTime);
});
dataFiles = roView.getLatestDataFilesInRange(allInstantTimes).collect(Collectors.toList());
assertEquals("Expect only one data-file to be sent", 1, dataFiles.size());
dataFiles.stream().forEach(df -> {
assertEquals("Expect data-file created by compaction be returned", df.getCommitTime(),
compactionRequestedTime);
assertEquals("Expect data-file created by compaction be returned", df.getCommitTime(), compactionRequestedTime);
});
assertEquals("Total number of file-slices in partitions matches expected", expTotalFileSlices,
@@ -542,8 +537,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
assertEquals("Total number of data-files in partitions matches expected", expTotalDataFiles,
roView.getAllDataFiles(partitionPath).count());
// file-groups includes inflight/invalid file-ids
assertEquals("Total number of file-groups in partitions matches expected",
5, fsView.getAllFileGroups(partitionPath).count());
assertEquals("Total number of file-groups in partitions matches expected", 5,
fsView.getAllFileGroups(partitionPath).count());
}
@Test
@@ -552,44 +547,39 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
new File(basePath + "/" + partitionPath).mkdirs();
String fileId = UUID.randomUUID().toString();
assertFalse("No commit, should not find any data file",
roView.getLatestDataFiles(partitionPath).filter(dfile -> dfile.getFileId().equals(fileId)).findFirst()
.isPresent());
assertFalse("No commit, should not find any data file", roView.getLatestDataFiles(partitionPath)
.filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().isPresent());
// Only one commit, but is not safe
String commitTime1 = "1";
String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId);
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
refreshFsView();
assertFalse("No commit, should not find any data file",
roView.getLatestDataFiles(partitionPath).filter(dfile -> dfile.getFileId().equals(fileId)).findFirst()
.isPresent());
assertFalse("No commit, should not find any data file", roView.getLatestDataFiles(partitionPath)
.filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().isPresent());
// Make this commit safe
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
commitTimeline.saveAsComplete(instant1, Option.empty());
refreshFsView();
assertEquals("", fileName1,
roView.getLatestDataFiles(partitionPath).filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().get()
.getFileName());
assertEquals("", fileName1, roView.getLatestDataFiles(partitionPath)
.filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().get().getFileName());
// Do another commit, but not safe
String commitTime2 = "2";
String fileName2 = FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId);
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
refreshFsView();
assertEquals("", fileName1,
roView.getLatestDataFiles(partitionPath).filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().get()
.getFileName());
assertEquals("", fileName1, roView.getLatestDataFiles(partitionPath)
.filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().get().getFileName());
// Make it safe
HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime2);
commitTimeline.saveAsComplete(instant2, Option.empty());
refreshFsView();
assertEquals("", fileName2,
roView.getLatestDataFiles(partitionPath).filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().get()
.getFileName());
assertEquals("", fileName2, roView.getLatestDataFiles(partitionPath)
.filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().get().getFileName());
}
@Test
@@ -614,19 +604,23 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)).createNewFile();
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
commitTime4, 0, TEST_WRITE_TOKEN)).createNewFile();
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
commitTime4, 1, TEST_WRITE_TOKEN)).createNewFile();
new File(fullPartitionPath
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0, TEST_WRITE_TOKEN))
.createNewFile();
new File(fullPartitionPath
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 1, TEST_WRITE_TOKEN))
.createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId2,
HoodieLogFile.DELTA_EXTENSION, commitTime3, 0, TEST_WRITE_TOKEN)).createNewFile();
new File(fullPartitionPath
+ FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0, TEST_WRITE_TOKEN))
.createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION,
commitTime4, 0, TEST_WRITE_TOKEN)).createNewFile();
new File(fullPartitionPath
+ FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0, TEST_WRITE_TOKEN))
.createNewFile();
// Create commit/clean files
new File(basePath + "/.hoodie/" + cleanTime1 + ".clean").createNewFile();
@@ -638,7 +632,7 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
testStreamLatestVersionInPartition(isLatestFileSliceOnly, fullPartitionPath, commitTime1, commitTime2, commitTime3,
commitTime4, fileId1, fileId2, fileId3, fileId4);
// Now create a scenario where archiving deleted commits (1,2, and 3) but retained cleaner clean1. Now clean1 is
// Now create a scenario where archiving deleted commits (1,2, and 3) but retained cleaner clean1. Now clean1 is
// the lowest commit time. Scenario for HUDI-162 - Here clean is the earliest action in active timeline
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").delete();
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").delete();
@@ -659,15 +653,15 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
// Check files as of lastest commit.
List<FileSlice> allSlices = rtView.getAllFileSlices("2016/05/01").collect(Collectors.toList());
assertEquals(isLatestFileSliceOnly ? 4 : 8, allSlices.size());
Map<String, Long> fileSliceMap = allSlices.stream().collect(
Collectors.groupingBy(slice -> slice.getFileId(), Collectors.counting()));
Map<String, Long> fileSliceMap =
allSlices.stream().collect(Collectors.groupingBy(slice -> slice.getFileId(), Collectors.counting()));
assertEquals(isLatestFileSliceOnly ? 1 : 2, fileSliceMap.get(fileId1).longValue());
assertEquals(isLatestFileSliceOnly ? 1 : 3, fileSliceMap.get(fileId2).longValue());
assertEquals(isLatestFileSliceOnly ? 1 : 2, fileSliceMap.get(fileId3).longValue());
assertEquals(1, fileSliceMap.get(fileId4).longValue());
List<HoodieDataFile> dataFileList = roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime4)
.collect(Collectors.toList());
List<HoodieDataFile> dataFileList =
roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime4).collect(Collectors.toList());
assertEquals(3, dataFileList.size());
Set<String> filenames = Sets.newHashSet();
for (HoodieDataFile status : dataFileList) {
@@ -679,24 +673,23 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
filenames = Sets.newHashSet();
List<HoodieLogFile> logFilesList = rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime4, true)
.map(slice -> slice.getLogFiles()).flatMap(logFileList -> logFileList)
.collect(Collectors.toList());
.map(slice -> slice.getLogFiles()).flatMap(logFileList -> logFileList).collect(Collectors.toList());
assertEquals(logFilesList.size(), 4);
for (HoodieLogFile logFile : logFilesList) {
filenames.add(logFile.getFileName());
}
assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
commitTime4, 0, TEST_WRITE_TOKEN)));
assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
commitTime4, 1, TEST_WRITE_TOKEN)));
assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION,
commitTime3, 0, TEST_WRITE_TOKEN)));
assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION,
commitTime4, 0, TEST_WRITE_TOKEN)));
assertTrue(filenames
.contains(FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0, TEST_WRITE_TOKEN)));
assertTrue(filenames
.contains(FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 1, TEST_WRITE_TOKEN)));
assertTrue(filenames
.contains(FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0, TEST_WRITE_TOKEN)));
assertTrue(filenames
.contains(FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0, TEST_WRITE_TOKEN)));
// Reset the max commit time
List<HoodieDataFile> dataFiles = roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime3)
.collect(Collectors.toList());
List<HoodieDataFile> dataFiles =
roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime3).collect(Collectors.toList());
filenames = Sets.newHashSet();
for (HoodieDataFile status : dataFiles) {
filenames.add(status.getFileName());
@@ -711,9 +704,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)));
}
logFilesList =
rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime3, true).map(slice -> slice.getLogFiles())
.flatMap(logFileList -> logFileList).collect(Collectors.toList());
logFilesList = rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime3, true)
.map(slice -> slice.getLogFiles()).flatMap(logFileList -> logFileList).collect(Collectors.toList());
assertEquals(logFilesList.size(), 1);
assertTrue(logFilesList.get(0).getFileName()
.equals(FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0, TEST_WRITE_TOKEN)));
@@ -806,15 +798,17 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
String fileId3 = UUID.randomUUID().toString();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1)).createNewFile();
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
commitTime1, 0, TEST_WRITE_TOKEN)).createNewFile();
new File(fullPartitionPath
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0, TEST_WRITE_TOKEN))
.createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId1)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION,
commitTime3, 0, TEST_WRITE_TOKEN)).createNewFile();
new File(fullPartitionPath
+ FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0, TEST_WRITE_TOKEN))
.createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
@@ -832,8 +826,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
// Populate view for partition
roView.getAllDataFiles("2016/05/01/");
List<HoodieDataFile> dataFiles = roView.getLatestDataFilesInRange(Lists.newArrayList(commitTime2, commitTime3))
.collect(Collectors.toList());
List<HoodieDataFile> dataFiles =
roView.getLatestDataFilesInRange(Lists.newArrayList(commitTime2, commitTime3)).collect(Collectors.toList());
assertEquals(isLatestFileSliceOnly ? 2 : 3, dataFiles.size());
Set<String> filenames = Sets.newHashSet();
for (HoodieDataFile status : dataFiles) {
@@ -846,8 +840,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)));
}
List<FileSlice> slices = rtView.getLatestFileSliceInRange(Lists.newArrayList(commitTime3, commitTime4))
.collect(Collectors.toList());
List<FileSlice> slices =
rtView.getLatestFileSliceInRange(Lists.newArrayList(commitTime3, commitTime4)).collect(Collectors.toList());
assertEquals(3, slices.size());
for (FileSlice slice : slices) {
if (slice.getFileId().equals(fileId1)) {
@@ -902,8 +896,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
assertEquals(7, statuses.length);
refreshFsView();
List<HoodieDataFile> dataFiles = roView.getLatestDataFilesBeforeOrOn(partitionPath, commitTime2)
.collect(Collectors.toList());
List<HoodieDataFile> dataFiles =
roView.getLatestDataFilesBeforeOrOn(partitionPath, commitTime2).collect(Collectors.toList());
if (!isLatestFileSliceOnly) {
assertEquals(2, dataFiles.size());
Set<String> filenames = Sets.newHashSet();
@@ -935,31 +929,31 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
String fileId2 = UUID.randomUUID().toString();
String fileId3 = UUID.randomUUID().toString();
new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime1,
TEST_WRITE_TOKEN, fileId1)).createNewFile();
new File(fullPartitionPath + "/"
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
commitTime1, 0, TEST_WRITE_TOKEN)).createNewFile();
new File(fullPartitionPath + "/"
+ FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1)).createNewFile();
new File(fullPartitionPath + "/"
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION,
commitTime4, 0, TEST_WRITE_TOKEN)).createNewFile();
new File(fullPartitionPath + "/"
+ FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2)).createNewFile();
new File(fullPartitionPath + "/"
+ FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2)).createNewFile();
new File(fullPartitionPath + "/"
+ FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, TEST_WRITE_TOKEN))
new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1))
.createNewFile();
new File(fullPartitionPath + "/"
+ FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2)).createNewFile();
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0, TEST_WRITE_TOKEN))
.createNewFile();
new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId1))
.createNewFile();
new File(fullPartitionPath + "/"
+ FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0, TEST_WRITE_TOKEN))
.createNewFile();
new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2))
.createNewFile();
new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId2))
.createNewFile();
new File(fullPartitionPath + "/"
+ FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3)).createNewFile();
new File(fullPartitionPath + "/"
+ FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3)).createNewFile();
+ FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0, TEST_WRITE_TOKEN))
.createNewFile();
new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId2))
.createNewFile();
new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId3))
.createNewFile();
new File(fullPartitionPath + "/" + FSUtils.makeDataFileName(commitTime4, TEST_WRITE_TOKEN, fileId3))
.createNewFile();
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
@@ -1017,11 +1011,11 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
String partitionPath2 = "2016/05/02";
String partitionPath3 = "2016/05/03";
String fullPartitionPath1 = basePath + "/" + partitionPath1 + "/";
String fullPartitionPath1 = basePath + "/" + partitionPath1 + "/";
new File(fullPartitionPath1).mkdirs();
String fullPartitionPath2 = basePath + "/" + partitionPath2 + "/";
String fullPartitionPath2 = basePath + "/" + partitionPath2 + "/";
new File(fullPartitionPath2).mkdirs();
String fullPartitionPath3 = basePath + "/" + partitionPath3 + "/";
String fullPartitionPath3 = basePath + "/" + partitionPath3 + "/";
new File(fullPartitionPath3).mkdirs();
String instantTime1 = "1";
String deltaInstantTime1 = "2";
@@ -1031,16 +1025,13 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
String dataFileName = FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId);
new File(fullPartitionPath1 + dataFileName).createNewFile();
String fileName1 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
instantTime1, 0, TEST_WRITE_TOKEN);
new File(fullPartitionPath1 + fileName1)
.createNewFile();
String fileName1 =
FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, instantTime1, 0, TEST_WRITE_TOKEN);
new File(fullPartitionPath1 + fileName1).createNewFile();
new File(fullPartitionPath2 + FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId)).createNewFile();
new File(fullPartitionPath2 + fileName1)
.createNewFile();
new File(fullPartitionPath2 + fileName1).createNewFile();
new File(fullPartitionPath3 + FSUtils.makeDataFileName(instantTime1, TEST_WRITE_TOKEN, fileId)).createNewFile();
new File(fullPartitionPath3 + fileName1)
.createNewFile();
new File(fullPartitionPath3 + fileName1).createNewFile();
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, instantTime1);
@@ -1052,9 +1043,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
commitTimeline.saveAsComplete(deltaInstant3, Option.empty());
// Now we list all partitions
FileStatus[] statuses = metaClient.getFs().listStatus(new Path[] {
new Path(fullPartitionPath1), new Path(fullPartitionPath2), new Path(fullPartitionPath3)
});
FileStatus[] statuses = metaClient.getFs().listStatus(
new Path[] {new Path(fullPartitionPath1), new Path(fullPartitionPath2), new Path(fullPartitionPath3)});
assertEquals(6, statuses.length);
refreshFsView();
Arrays.asList(partitionPath1, partitionPath2, partitionPath3).forEach(p -> fsView.getAllFileGroups(p).count());
@@ -1064,8 +1054,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
Assert.assertEquals("Expected number of file-groups", 3, groups.size());
Assert.assertEquals("Partitions must be different for file-groups", 3,
groups.stream().map(HoodieFileGroup::getPartitionPath).collect(Collectors.toSet()).size());
Set<String> fileIds = groups.stream().map(HoodieFileGroup::getFileGroupId)
.map(HoodieFileGroupId::getFileId).collect(Collectors.toSet());
Set<String> fileIds = groups.stream().map(HoodieFileGroup::getFileGroupId).map(HoodieFileGroupId::getFileId)
.collect(Collectors.toSet());
Assert.assertEquals("File Id must be same", 1, fileIds.size());
Assert.assertTrue("Expected FileId", fileIds.contains(fileId));
@@ -1080,8 +1070,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
String compactionRequestedTime = "2";
String compactDataFileName = FSUtils.makeDataFileName(compactionRequestedTime, TEST_WRITE_TOKEN, fileId);
HoodieCompactionPlan compactionPlan = CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs,
Option.empty(), Option.empty());
HoodieCompactionPlan compactionPlan =
CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs, Option.empty(), Option.empty());
// Create a Data-file for some of the partitions but this should be skipped by view
new File(basePath + "/" + partitionPath1 + "/" + compactDataFileName).createNewFile();
@@ -1099,10 +1089,10 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
String deltaInstantTime5 = "6";
List<String> allInstantTimes = Arrays.asList(instantTime1, deltaInstantTime1, deltaInstantTime2,
compactionRequestedTime, deltaInstantTime4, deltaInstantTime5);
String fileName3 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
compactionRequestedTime, 0, TEST_WRITE_TOKEN);
String fileName4 = FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
compactionRequestedTime, 1, TEST_WRITE_TOKEN);
String fileName3 =
FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 0, TEST_WRITE_TOKEN);
String fileName4 =
FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION, compactionRequestedTime, 1, TEST_WRITE_TOKEN);
new File(basePath + "/" + partitionPath1 + "/" + fileName3).createNewFile();
new File(basePath + "/" + partitionPath1 + "/" + fileName4).createNewFile();
new File(basePath + "/" + partitionPath2 + "/" + fileName3).createNewFile();
@@ -1126,8 +1116,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
/** Merge API Tests **/
Arrays.asList(partitionPath1, partitionPath2, partitionPath3).stream().forEach(partitionPath -> {
List<FileSlice> fileSliceList = rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5)
.collect(Collectors.toList());
List<FileSlice> fileSliceList =
rtView.getLatestMergedFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList());
assertEquals("Expect file-slice to be merged", 1, fileSliceList.size());
FileSlice fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId());
@@ -1140,8 +1130,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
assertEquals("Log File Order check", fileName3, logFiles.get(1).getFileName());
assertEquals("Log File Order check", fileName1, logFiles.get(2).getFileName());
fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, true)
.collect(Collectors.toList());
fileSliceList =
rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, true).collect(Collectors.toList());
assertEquals("Expect only one file-id", 1, fileSliceList.size());
fileSlice = fileSliceList.get(0);
assertEquals(fileId, fileSlice.getFileId());
@@ -1154,8 +1144,8 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
assertEquals("Log File Order check", fileName3, logFiles.get(1).getFileName());
// Check getLatestFileSlicesBeforeOrOn excluding fileIds in pending compaction
fileSliceList = rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, false)
.collect(Collectors.toList());
fileSliceList =
rtView.getLatestFileSlicesBeforeOrOn(partitionPath, deltaInstantTime5, false).collect(Collectors.toList());
assertEquals("Expect empty list as file-id is in pending compaction", 0, fileSliceList.size());
});
@@ -1168,7 +1158,7 @@ public class HoodieTableFileSystemViewTest extends HoodieCommonTestHarness {
Assert.assertTrue(partitionsInCompaction.contains(partitionPath3));
Set<String> fileIdsInCompaction = fsView.getPendingCompactionOperations().map(Pair::getValue)
.map(CompactionOperation::getFileId).collect(Collectors.toSet());
.map(CompactionOperation::getFileId).collect(Collectors.toSet());
Assert.assertEquals(1, fileIdsInCompaction.size());
Assert.assertTrue(fileIdsInCompaction.contains(fileId));
}

View File

@@ -75,8 +75,7 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
private static String TEST_WRITE_TOKEN = "1-0-1";
private final List<String> partitions = Arrays.asList("2018/01/01", "2018/01/02",
"2019/03/01");
private final List<String> partitions = Arrays.asList("2018/01/01", "2018/01/02", "2019/03/01");
private final List<String> fileIdsPerPartition =
IntStream.range(0, 10).mapToObj(x -> UUID.randomUUID().toString()).collect(Collectors.toList());
@@ -110,8 +109,7 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
unscheduleCompaction(view, "14", "13", "11");
// Add one more delta instant
instantsToFiles.putAll(
testMultipleWriteSteps(view, Arrays.asList("15"), true, "11"));
instantsToFiles.putAll(testMultipleWriteSteps(view, Arrays.asList("15"), true, "11"));
// Schedule Compaction again
scheduleCompaction(view, "16");
@@ -120,37 +118,31 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
testMultipleWriteSteps(view, Arrays.asList("16"), false, "16", 2);
// Run 2 more ingest
instantsToFiles.putAll(
testMultipleWriteSteps(view, Arrays.asList("17", "18"), true, "16", 2));
instantsToFiles.putAll(testMultipleWriteSteps(view, Arrays.asList("17", "18"), true, "16", 2));
// Schedule Compaction again
scheduleCompaction(view, "19");
// Run one more ingestion after pending compaction. THis will be 3rd slice
instantsToFiles.putAll(
testMultipleWriteSteps(view, Arrays.asList("20"), true, "19", 3));
instantsToFiles.putAll(testMultipleWriteSteps(view, Arrays.asList("20"), true, "19", 3));
// Clean first slice
testCleans(view, Arrays.asList("21"),
new ImmutableMap.Builder<String, List<String>>().put("11", Arrays.asList("12", "13", "15")).build(),
instantsToFiles,
Arrays.asList("11"));
instantsToFiles, Arrays.asList("11"));
// Add one more ingestion instant. This should be 2nd slice now
instantsToFiles.putAll(
testMultipleWriteSteps(view, Arrays.asList("22"), true, "19", 2));
instantsToFiles.putAll(testMultipleWriteSteps(view, Arrays.asList("22"), true, "19", 2));
// Restore last ingestion
testRestore(view, Arrays.asList("23"), true, new HashMap<>(), Arrays.asList("22"), "24", false);
// Run one more ingestion. THis is still 2nd slice
instantsToFiles.putAll(
testMultipleWriteSteps(view, Arrays.asList("24"), true, "19", 2));
instantsToFiles.putAll(testMultipleWriteSteps(view, Arrays.asList("24"), true, "19", 2));
// Finish Compaction
instantsToFiles.putAll(
testMultipleWriteSteps(view, Arrays.asList("19"), false, "19", 2,
Arrays.asList(new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "24"))));
instantsToFiles.putAll(testMultipleWriteSteps(view, Arrays.asList("19"), false, "19", 2,
Arrays.asList(new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "24"))));
}
@Test
@@ -183,8 +175,8 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
Map<String, List<String>> instantsToFiles = testMultipleWriteSteps(view, Arrays.asList("12", "13", "14"));
// restore instants in reverse order till we rollback all
testRestore(view, Arrays.asList("15", "16", "17"), false, instantsToFiles,
Arrays.asList("14", "13", "12"), "17", true);
testRestore(view, Arrays.asList("15", "16", "17"), false, instantsToFiles, Arrays.asList("14", "13", "12"), "17",
true);
// Add 5 non-empty ingestions back-to-back
instantsToFiles = testMultipleWriteSteps(view, Arrays.asList("18", "19", "20"));
@@ -207,8 +199,7 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
* Case where incremental syncing is catching up on more than one ingestion at a time
*/
// Run 1 ingestion on MOR table (1 delta commits). View1 is now sync up to this point
instantsToFiles =
testMultipleWriteSteps(view1, Arrays.asList("11"), true, "11");
instantsToFiles = testMultipleWriteSteps(view1, Arrays.asList("11"), true, "11");
SyncableFileSystemView view2 =
getFileSystemView(new HoodieTableMetaClient(metaClient.getHadoopConf(), metaClient.getBasePath()));
@@ -258,14 +249,13 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
*/
testCleans(view2, Arrays.asList("19"),
new ImmutableMap.Builder<String, List<String>>().put("11", Arrays.asList("12", "13", "14")).build(),
instantsToFiles,
Arrays.asList("11"));
instantsToFiles, Arrays.asList("11"));
scheduleCompaction(view2, "20");
instantsToFiles.putAll(testMultipleWriteSteps(view2, Arrays.asList("21", "22"), true, "20", 2));
// Compaction
testMultipleWriteSteps(view2, Arrays.asList("20"), false, "20", 2,
Arrays.asList(new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "22")));
//Run one more round of ingestion
// Run one more round of ingestion
instantsToFiles.putAll(testMultipleWriteSteps(view2, Arrays.asList("23", "24"), true, "20", 2));
view1.sync();
areViewsConsistent(view1, view2, partitions.size() * fileIdsPerPartition.size() * 2);
@@ -319,8 +309,8 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
* @param cleanedInstants List of cleaned instants
*/
private void testCleans(SyncableFileSystemView view, List<String> newCleanerInstants,
Map<String, List<String>> deltaInstantMap,
Map<String, List<String>> instantsToFiles, List<String> cleanedInstants) {
Map<String, List<String>> deltaInstantMap, Map<String, List<String>> instantsToFiles,
List<String> cleanedInstants) {
Assert.assertEquals(newCleanerInstants.size(), cleanedInstants.size());
long initialFileSlices = partitions.stream().mapToLong(p -> view.getAllFileSlices(p).count()).findAny().getAsLong();
long exp = initialFileSlices;
@@ -371,17 +361,16 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
* @param emptyRestoreInstant Restore instant at which dataset becomes empty
*/
private void testRestore(SyncableFileSystemView view, List<String> newRestoreInstants, boolean isDeltaCommit,
Map<String, List<String>> instantsToFiles, List<String> rolledBackInstants,
String emptyRestoreInstant, boolean isRestore)
throws IOException {
Map<String, List<String>> instantsToFiles, List<String> rolledBackInstants, String emptyRestoreInstant,
boolean isRestore) throws IOException {
Assert.assertEquals(newRestoreInstants.size(), rolledBackInstants.size());
long initialFileSlices = partitions.stream().mapToLong(p -> view.getAllFileSlices(p).count()).findAny().getAsLong();
IntStream.range(0, newRestoreInstants.size()).forEach(idx -> {
String instant = rolledBackInstants.get(idx);
try {
performRestore(view, instant, instantsToFiles.get(instant), newRestoreInstants.get(idx), isRestore);
final long expTotalFileSlicesPerPartition = isDeltaCommit ? initialFileSlices :
initialFileSlices - ((idx + 1) * fileIdsPerPartition.size());
final long expTotalFileSlicesPerPartition =
isDeltaCommit ? initialFileSlices : initialFileSlices - ((idx + 1) * fileIdsPerPartition.size());
view.sync();
Assert.assertTrue(view.getLastInstant().isPresent());
log.info("Last Instant is :" + view.getLastInstant().get());
@@ -424,14 +413,12 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
throws IOException {
Map<String, List<String>> partititonToFiles = deleteFiles(files);
List<HoodieCleanStat> cleanStats = partititonToFiles.entrySet().stream().map(e -> {
return new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_COMMITS,
e.getKey(), e.getValue(), e.getValue(), new ArrayList<>(),
Integer.toString(Integer.parseInt(instant) + 1));
return new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_COMMITS, e.getKey(), e.getValue(), e.getValue(),
new ArrayList<>(), Integer.toString(Integer.parseInt(instant) + 1));
}).collect(Collectors.toList());
HoodieCleanMetadata cleanMetadata = AvroUtils.convertCleanMetadata(cleanInstant, Option.empty(), cleanStats);
metaClient.getActiveTimeline().saveAsComplete(
new HoodieInstant(true, HoodieTimeline.CLEAN_ACTION, cleanInstant),
metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.CLEAN_ACTION, cleanInstant),
AvroUtils.serializeCleanMetadata(cleanMetadata));
}
@@ -453,15 +440,14 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
List<String> rollbacks = new ArrayList<>();
rollbacks.add(instant);
HoodieRollbackMetadata rollbackMetadata = AvroUtils
.convertRollbackMetadata(rollbackInstant, Option.empty(), rollbacks, rollbackStats);
HoodieRollbackMetadata rollbackMetadata =
AvroUtils.convertRollbackMetadata(rollbackInstant, Option.empty(), rollbacks, rollbackStats);
if (isRestore) {
HoodieRestoreMetadata metadata = new HoodieRestoreMetadata();
List<HoodieRollbackMetadata> rollbackM = new ArrayList<>();
rollbackM.add(rollbackMetadata);
metadata.setHoodieRestoreMetadata(
new ImmutableMap.Builder().put(rollbackInstant, rollbackM).build());
metadata.setHoodieRestoreMetadata(new ImmutableMap.Builder().put(rollbackInstant, rollbackM).build());
List<String> rollbackInstants = new ArrayList<>();
rollbackInstants.add(rollbackInstant);
metadata.setInstantsToRollback(rollbackInstants);
@@ -507,14 +493,13 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
* @param instantTime COmpaction Instant Time
*/
private void scheduleCompaction(SyncableFileSystemView view, String instantTime) throws IOException {
List<Pair<String, FileSlice>> slices = partitions.stream().flatMap(p -> view.getLatestFileSlices(p)
.map(s -> Pair.of(p, s))).collect(Collectors.toList());
List<Pair<String, FileSlice>> slices = partitions.stream()
.flatMap(p -> view.getLatestFileSlices(p).map(s -> Pair.of(p, s))).collect(Collectors.toList());
long initialExpTotalFileSlices = partitions.stream().mapToLong(p -> view.getAllFileSlices(p).count()).sum();
HoodieCompactionPlan plan = CompactionUtils.buildFromFileSlices(slices, Option.empty(), Option.empty());
HoodieInstant compactionInstant =
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instantTime);
HoodieInstant compactionInstant = new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instantTime);
metaClient.getActiveTimeline().saveToCompactionRequested(compactionInstant,
AvroUtils.serializeCompactionPlan(plan));
@@ -526,8 +511,8 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
Assert.assertFalse(fs.getDataFile().isPresent());
});
view.getLatestMergedFileSlicesBeforeOrOn(p, instantTime).forEach(fs -> {
Assert.assertTrue(HoodieTimeline.compareTimestamps(instantTime, fs.getBaseInstantTime(),
HoodieTimeline.GREATER));
Assert
.assertTrue(HoodieTimeline.compareTimestamps(instantTime, fs.getBaseInstantTime(), HoodieTimeline.GREATER));
Assert.assertEquals(p, fs.getPartitionPath());
});
});
@@ -586,11 +571,11 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
* @return List of new file created
*/
private Map<String, List<String>> testMultipleWriteSteps(SyncableFileSystemView view, List<String> instants,
boolean deltaCommit, String baseInstantForDeltaCommit, int begin)
throws IOException {
boolean deltaCommit, String baseInstantForDeltaCommit, int begin) throws IOException {
return testMultipleWriteSteps(view, instants, deltaCommit, baseInstantForDeltaCommit, begin,
instants.stream().map(i -> new HoodieInstant(State.COMPLETED,
deltaCommit ? HoodieTimeline.DELTA_COMMIT_ACTION : HoodieTimeline.COMMIT_ACTION, i))
instants.stream()
.map(i -> new HoodieInstant(State.COMPLETED,
deltaCommit ? HoodieTimeline.DELTA_COMMIT_ACTION : HoodieTimeline.COMMIT_ACTION, i))
.collect(Collectors.toList()));
}
@@ -628,13 +613,14 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
log.info("Adding instant=" + instant);
HoodieInstant lastInstant = lastInstants.get(idx);
// Add a non-empty ingestion to COW table
List<String> filePaths = addInstant(metaClient, instant, deltaCommit,
deltaCommit ? baseInstantForDeltaCommit : instant);
List<String> filePaths =
addInstant(metaClient, instant, deltaCommit, deltaCommit ? baseInstantForDeltaCommit : instant);
view.sync();
Assert.assertTrue(view.getLastInstant().isPresent());
Assert.assertEquals(lastInstant.getTimestamp(), view.getLastInstant().get().getTimestamp());
Assert.assertEquals(State.COMPLETED, view.getLastInstant().get().getState());
Assert.assertEquals("Expected Last=" + lastInstant + ", Found Instants="
Assert.assertEquals(
"Expected Last=" + lastInstant + ", Found Instants="
+ view.getTimeline().getInstants().collect(Collectors.toList()),
lastInstant.getAction(), view.getLastInstant().get().getAction());
partitions.forEach(p -> Assert.assertEquals(fileIdsPerPartition.size(), view.getLatestFileSlices(p).count()));
@@ -676,53 +662,50 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
*/
private void areViewsConsistent(SyncableFileSystemView view1, SyncableFileSystemView view2,
long expectedTotalFileSlices) {
//Timeline check
// Timeline check
HoodieTimeline timeline1 = view1.getTimeline();
HoodieTimeline timeline2 = view2.getTimeline();
Assert.assertEquals(view1.getLastInstant(), view2.getLastInstant());
Iterators.elementsEqual(timeline1.getInstants().iterator(), timeline2.getInstants().iterator());
//View Checks
Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap1 =
partitions.stream().flatMap(p -> view1.getAllFileGroups(p))
.collect(Collectors.toMap(fg -> fg.getFileGroupId(), fg -> fg));
Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap2 =
partitions.stream().flatMap(p -> view2.getAllFileGroups(p))
.collect(Collectors.toMap(fg -> fg.getFileGroupId(), fg -> fg));
// View Checks
Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap1 = partitions.stream().flatMap(p -> view1.getAllFileGroups(p))
.collect(Collectors.toMap(fg -> fg.getFileGroupId(), fg -> fg));
Map<HoodieFileGroupId, HoodieFileGroup> fileGroupsMap2 = partitions.stream().flatMap(p -> view2.getAllFileGroups(p))
.collect(Collectors.toMap(fg -> fg.getFileGroupId(), fg -> fg));
Assert.assertEquals(fileGroupsMap1.keySet(), fileGroupsMap2.keySet());
long gotSlicesCount =
fileGroupsMap1.keySet().stream().map(k -> Pair.of(fileGroupsMap1.get(k), fileGroupsMap2.get(k)))
.mapToLong(e -> {
HoodieFileGroup fg1 = e.getKey();
HoodieFileGroup fg2 = e.getValue();
Assert.assertEquals(fg1.getFileGroupId(), fg2.getFileGroupId());
List<FileSlice> slices1 = fg1.getAllRawFileSlices().collect(Collectors.toList());
List<FileSlice> slices2 = fg2.getAllRawFileSlices().collect(Collectors.toList());
Assert.assertEquals(slices1.size(), slices2.size());
IntStream.range(0, slices1.size()).mapToObj(idx -> Pair.of(slices1.get(idx), slices2.get(idx)))
.forEach(e2 -> {
FileSlice slice1 = e2.getKey();
FileSlice slice2 = e2.getValue();
Assert.assertEquals(slice1.getBaseInstantTime(), slice2.getBaseInstantTime());
Assert.assertEquals(slice1.getFileId(), slice2.getFileId());
Assert.assertEquals(slice1.getDataFile().isPresent(), slice2.getDataFile().isPresent());
if (slice1.getDataFile().isPresent()) {
HoodieDataFile df1 = slice1.getDataFile().get();
HoodieDataFile df2 = slice2.getDataFile().get();
Assert.assertEquals(df1.getCommitTime(), df2.getCommitTime());
Assert.assertEquals(df1.getFileId(), df2.getFileId());
Assert.assertEquals(df1.getFileName(), df2.getFileName());
Assert.assertEquals(Path.getPathWithoutSchemeAndAuthority(new Path(df1.getPath())),
Path.getPathWithoutSchemeAndAuthority(new Path(df2.getPath())));
}
List<Path> logPaths1 = slice1.getLogFiles()
.map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath())).collect(Collectors.toList());
List<Path> logPaths2 = slice2.getLogFiles()
.map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath())).collect(Collectors.toList());
Assert.assertEquals(logPaths1, logPaths2);
});
return slices1.size();
}).sum();
long gotSlicesCount = fileGroupsMap1.keySet().stream()
.map(k -> Pair.of(fileGroupsMap1.get(k), fileGroupsMap2.get(k))).mapToLong(e -> {
HoodieFileGroup fg1 = e.getKey();
HoodieFileGroup fg2 = e.getValue();
Assert.assertEquals(fg1.getFileGroupId(), fg2.getFileGroupId());
List<FileSlice> slices1 = fg1.getAllRawFileSlices().collect(Collectors.toList());
List<FileSlice> slices2 = fg2.getAllRawFileSlices().collect(Collectors.toList());
Assert.assertEquals(slices1.size(), slices2.size());
IntStream.range(0, slices1.size()).mapToObj(idx -> Pair.of(slices1.get(idx), slices2.get(idx)))
.forEach(e2 -> {
FileSlice slice1 = e2.getKey();
FileSlice slice2 = e2.getValue();
Assert.assertEquals(slice1.getBaseInstantTime(), slice2.getBaseInstantTime());
Assert.assertEquals(slice1.getFileId(), slice2.getFileId());
Assert.assertEquals(slice1.getDataFile().isPresent(), slice2.getDataFile().isPresent());
if (slice1.getDataFile().isPresent()) {
HoodieDataFile df1 = slice1.getDataFile().get();
HoodieDataFile df2 = slice2.getDataFile().get();
Assert.assertEquals(df1.getCommitTime(), df2.getCommitTime());
Assert.assertEquals(df1.getFileId(), df2.getFileId());
Assert.assertEquals(df1.getFileName(), df2.getFileName());
Assert.assertEquals(Path.getPathWithoutSchemeAndAuthority(new Path(df1.getPath())),
Path.getPathWithoutSchemeAndAuthority(new Path(df2.getPath())));
}
List<Path> logPaths1 = slice1.getLogFiles()
.map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath())).collect(Collectors.toList());
List<Path> logPaths2 = slice2.getLogFiles()
.map(lf -> Path.getPathWithoutSchemeAndAuthority(lf.getPath())).collect(Collectors.toList());
Assert.assertEquals(logPaths1, logPaths2);
});
return slices1.size();
}).sum();
Assert.assertEquals(expectedTotalFileSlices, gotSlicesCount);
// Pending Compaction Operations Check
@@ -732,14 +715,14 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
}
private List<String> addInstant(HoodieTableMetaClient metaClient, String instant, boolean deltaCommit,
String baseInstant)
throws IOException {
String baseInstant) throws IOException {
List<Pair<String, HoodieWriteStat>> writeStats = partitions.stream().flatMap(p -> {
return fileIdsPerPartition.stream().map(f -> {
try {
File file = new File(basePath + "/" + p + "/"
+ (deltaCommit ? FSUtils.makeLogFileName(f, ".log", baseInstant,
Integer.parseInt(instant), TEST_WRITE_TOKEN) : FSUtils.makeDataFileName(instant, TEST_WRITE_TOKEN, f)));
+ (deltaCommit
? FSUtils.makeLogFileName(f, ".log", baseInstant, Integer.parseInt(instant), TEST_WRITE_TOKEN)
: FSUtils.makeDataFileName(instant, TEST_WRITE_TOKEN, f)));
file.createNewFile();
HoodieWriteStat w = new HoodieWriteStat();
w.setFileId(f);
@@ -753,10 +736,10 @@ public class IncrementalFSViewSyncTest extends HoodieCommonTestHarness {
HoodieCommitMetadata metadata = new HoodieCommitMetadata();
writeStats.forEach(e -> metadata.addWriteStat(e.getKey(), e.getValue()));
metaClient.getActiveTimeline().saveAsComplete(
new HoodieInstant(true, deltaCommit ? HoodieTimeline.DELTA_COMMIT_ACTION : HoodieTimeline.COMMIT_ACTION,
instant),
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
metaClient.getActiveTimeline()
.saveAsComplete(new HoodieInstant(true,
deltaCommit ? HoodieTimeline.DELTA_COMMIT_ACTION : HoodieTimeline.COMMIT_ACTION, instant),
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
// Delete pending compaction if present
metaClient.getFs().delete(new Path(metaClient.getMetaPath(),
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instant).getFileName()));

View File

@@ -28,8 +28,7 @@ public class RocksDBBasedIncrementalFSViewSyncTest extends IncrementalFSViewSync
@Override
protected SyncableFileSystemView getFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline timeline)
throws IOException {
return new RocksDbBasedFileSystemView(metaClient, timeline,
FileSystemViewStorageConfig.newBuilder().withRocksDBPath(folder.newFolder().getAbsolutePath())
.withIncrementalTimelineSync(true).build());
return new RocksDbBasedFileSystemView(metaClient, timeline, FileSystemViewStorageConfig.newBuilder()
.withRocksDBPath(folder.newFolder().getAbsolutePath()).withIncrementalTimelineSync(true).build());
}
}

View File

@@ -26,7 +26,6 @@ public class RocksDbBasedFileSystemViewTest extends HoodieTableFileSystemViewTes
protected SyncableFileSystemView getFileSystemView(HoodieTimeline timeline) throws IOException {
return new RocksDbBasedFileSystemView(metaClient, timeline,
FileSystemViewStorageConfig.newBuilder().withRocksDBPath(folder.newFolder().getAbsolutePath())
.build());
FileSystemViewStorageConfig.newBuilder().withRocksDBPath(folder.newFolder().getAbsolutePath()).build());
}
}

View File

@@ -24,8 +24,8 @@ import org.apache.hudi.common.table.SyncableFileSystemView;
public class SpillableMapBasedFileSystemViewTest extends HoodieTableFileSystemViewTest {
protected SyncableFileSystemView getFileSystemView(HoodieTimeline timeline) {
return new SpillableMapBasedFileSystemView(metaClient, timeline,
FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.SPILLABLE_DISK)
.withMaxMemoryForView(0L).build()); // pure disk base View
return new SpillableMapBasedFileSystemView(metaClient, timeline, FileSystemViewStorageConfig.newBuilder()
// pure disk base View
.withStorageType(FileSystemViewStorageType.SPILLABLE_DISK).withMaxMemoryForView(0L).build());
}
}

View File

@@ -53,8 +53,7 @@ public class CompactionTestUtils {
private static String TEST_WRITE_TOKEN = "1-0-1";
public static Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> setupAndValidateCompactionOperations(
HoodieTableMetaClient metaClient, boolean inflight,
int numEntriesInPlan1, int numEntriesInPlan2,
HoodieTableMetaClient metaClient, boolean inflight, int numEntriesInPlan1, int numEntriesInPlan2,
int numEntriesInPlan3, int numEntriesInPlan4) throws IOException {
HoodieCompactionPlan plan1 = createCompactionPlan(metaClient, "000", "001", numEntriesInPlan1, true, true);
HoodieCompactionPlan plan2 = createCompactionPlan(metaClient, "002", "003", numEntriesInPlan2, false, true);
@@ -78,17 +77,16 @@ public class CompactionTestUtils {
createDeltaCommit(metaClient, "004");
createDeltaCommit(metaClient, "006");
Map<String, String> baseInstantsToCompaction =
new ImmutableMap.Builder<String, String>().put("000", "001").put("002", "003")
.put("004", "005").put("006", "007").build();
Map<String, String> baseInstantsToCompaction = new ImmutableMap.Builder<String, String>().put("000", "001")
.put("002", "003").put("004", "005").put("006", "007").build();
List<Integer> expectedNumEntries =
Arrays.asList(numEntriesInPlan1, numEntriesInPlan2, numEntriesInPlan3, numEntriesInPlan4);
List<HoodieCompactionPlan> plans = new ImmutableList.Builder<HoodieCompactionPlan>()
.add(plan1, plan2, plan3, plan4).build();
List<HoodieCompactionPlan> plans =
new ImmutableList.Builder<HoodieCompactionPlan>().add(plan1, plan2, plan3, plan4).build();
IntStream.range(0, 4).boxed().forEach(idx -> {
if (expectedNumEntries.get(idx) > 0) {
Assert.assertEquals("check if plan " + idx + " has exp entries",
expectedNumEntries.get(idx).longValue(), plans.get(idx).getOperations().size());
Assert.assertEquals("check if plan " + idx + " has exp entries", expectedNumEntries.get(idx).longValue(),
plans.get(idx).getOperations().size());
} else {
Assert.assertNull("Plan " + idx + " has null ops", plans.get(idx).getOperations());
}
@@ -108,39 +106,37 @@ public class CompactionTestUtils {
public static Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> generateExpectedCompactionOperations(
List<HoodieCompactionPlan> plans, Map<String, String> baseInstantsToCompaction) {
return plans.stream()
.flatMap(plan -> {
if (plan.getOperations() != null) {
return plan.getOperations().stream().map(op -> Pair.of(
new HoodieFileGroupId(op.getPartitionPath(), op.getFileId()),
return plans.stream().flatMap(plan -> {
if (plan.getOperations() != null) {
return plan.getOperations().stream()
.map(op -> Pair.of(new HoodieFileGroupId(op.getPartitionPath(), op.getFileId()),
Pair.of(baseInstantsToCompaction.get(op.getBaseInstantTime()), op)));
}
return Stream.empty();
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
}
return Stream.empty();
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
}
public static void scheduleCompaction(HoodieTableMetaClient metaClient,
String instantTime, HoodieCompactionPlan compactionPlan) throws IOException {
public static void scheduleCompaction(HoodieTableMetaClient metaClient, String instantTime,
HoodieCompactionPlan compactionPlan) throws IOException {
metaClient.getActiveTimeline().saveToCompactionRequested(
new HoodieInstant(State.REQUESTED, COMPACTION_ACTION, instantTime),
AvroUtils.serializeCompactionPlan(compactionPlan));
}
public static void createDeltaCommit(HoodieTableMetaClient metaClient, String instantTime) throws IOException {
metaClient.getActiveTimeline().saveAsComplete(
new HoodieInstant(State.INFLIGHT, DELTA_COMMIT_ACTION, instantTime), Option.empty());
metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(State.INFLIGHT, DELTA_COMMIT_ACTION, instantTime),
Option.empty());
}
public static void scheduleInflightCompaction(HoodieTableMetaClient metaClient, String instantTime,
HoodieCompactionPlan compactionPlan) throws IOException {
scheduleCompaction(metaClient, instantTime, compactionPlan);
metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(
new HoodieInstant(State.REQUESTED, COMPACTION_ACTION, instantTime));
metaClient.getActiveTimeline()
.transitionCompactionRequestedToInflight(new HoodieInstant(State.REQUESTED, COMPACTION_ACTION, instantTime));
}
public static HoodieCompactionPlan createCompactionPlan(HoodieTableMetaClient metaClient, String instantId,
String compactionInstantId, int numFileIds, boolean createDataFile,
boolean deltaCommitsAfterCompactionRequests) {
String compactionInstantId, int numFileIds, boolean createDataFile, boolean deltaCommitsAfterCompactionRequests) {
List<HoodieCompactionOperation> ops = IntStream.range(0, numFileIds).boxed().map(idx -> {
try {
String fileId = UUID.randomUUID().toString();
@@ -153,15 +149,13 @@ public class CompactionTestUtils {
instantId, fileId, Option.of(2));
FileSlice slice = new FileSlice(DEFAULT_PARTITION_PATHS[0], instantId, fileId);
if (createDataFile) {
slice.setDataFile(new TestHoodieDataFile(metaClient.getBasePath() + "/" + DEFAULT_PARTITION_PATHS[0]
+ "/" + FSUtils.makeDataFileName(instantId, TEST_WRITE_TOKEN, fileId)));
slice.setDataFile(new TestHoodieDataFile(metaClient.getBasePath() + "/" + DEFAULT_PARTITION_PATHS[0] + "/"
+ FSUtils.makeDataFileName(instantId, TEST_WRITE_TOKEN, fileId)));
}
String logFilePath1 = HoodieTestUtils
.getLogFilePath(metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0], instantId, fileId,
Option.of(1));
String logFilePath2 = HoodieTestUtils
.getLogFilePath(metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0], instantId, fileId,
Option.of(2));
String logFilePath1 = HoodieTestUtils.getLogFilePath(metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0],
instantId, fileId, Option.of(1));
String logFilePath2 = HoodieTestUtils.getLogFilePath(metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0],
instantId, fileId, Option.of(2));
slice.addLogFile(new HoodieLogFile(new Path(logFilePath1)));
slice.addLogFile(new HoodieLogFile(new Path(logFilePath2)));
HoodieCompactionOperation op =

View File

@@ -109,11 +109,8 @@ public class SchemaTestUtil {
public static List<HoodieRecord> generateHoodieTestRecords(int from, int limit, Schema schema)
throws IOException, URISyntaxException {
List<IndexedRecord> records = generateTestRecords(from, limit);
return records.stream()
.map(s -> HoodieAvroUtils.rewriteRecord((GenericRecord) s, schema))
.map(p -> convertToHoodieRecords(p,
UUID.randomUUID().toString(), "000/00/00")).collect(
Collectors.toList());
return records.stream().map(s -> HoodieAvroUtils.rewriteRecord((GenericRecord) s, schema))
.map(p -> convertToHoodieRecords(p, UUID.randomUUID().toString(), "000/00/00")).collect(Collectors.toList());
}
private static HoodieRecord convertToHoodieRecords(IndexedRecord iRecord, String key, String partitionPath) {
@@ -124,14 +121,12 @@ public class SchemaTestUtil {
public static List<IndexedRecord> updateHoodieTestRecords(List<String> oldRecordKeys, List<IndexedRecord> newRecords,
String commitTime) throws IOException, URISyntaxException {
return newRecords.stream()
.map(p -> {
((GenericRecord) p).put(HoodieRecord.RECORD_KEY_METADATA_FIELD, oldRecordKeys.remove(0));
((GenericRecord) p).put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, "0000/00/00");
((GenericRecord) p).put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
return p;
}).collect(
Collectors.toList());
return newRecords.stream().map(p -> {
((GenericRecord) p).put(HoodieRecord.RECORD_KEY_METADATA_FIELD, oldRecordKeys.remove(0));
((GenericRecord) p).put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, "0000/00/00");
((GenericRecord) p).put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
return p;
}).collect(Collectors.toList());
}
@@ -139,28 +134,21 @@ public class SchemaTestUtil {
throws IOException, URISyntaxException {
List<IndexedRecord> iRecords = generateTestRecords(from, limit);
return iRecords
.stream()
.map(r -> new HoodieRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
new HoodieAvroPayload(Option.of((GenericRecord) r)))).collect(Collectors.toList());
return iRecords.stream().map(r -> new HoodieRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
new HoodieAvroPayload(Option.of((GenericRecord) r)))).collect(Collectors.toList());
}
public static List<HoodieRecord> updateHoodieTestRecordsWithoutHoodieMetadata(List<HoodieRecord> oldRecords,
Schema schema,
String fieldNameToUpdate, String newValue)
throws IOException, URISyntaxException {
return oldRecords
.stream()
.map(r -> {
try {
GenericRecord rec = (GenericRecord) r.getData().getInsertValue(schema).get();
rec.put(fieldNameToUpdate, newValue);
return new HoodieRecord<>(r.getKey(),
new HoodieAvroPayload(Option.of(rec)));
} catch (IOException io) {
throw new HoodieIOException("unable to get data from hoodie record", io);
}
}).collect(Collectors.toList());
Schema schema, String fieldNameToUpdate, String newValue) throws IOException, URISyntaxException {
return oldRecords.stream().map(r -> {
try {
GenericRecord rec = (GenericRecord) r.getData().getInsertValue(schema).get();
rec.put(fieldNameToUpdate, newValue);
return new HoodieRecord<>(r.getKey(), new HoodieAvroPayload(Option.of(rec)));
} catch (IOException io) {
throw new HoodieIOException("unable to get data from hoodie record", io);
}
}).collect(Collectors.toList());
}
public static Schema getEvolvedSchema() throws IOException {

View File

@@ -37,19 +37,17 @@ public class SpillableMapTestUtils {
public static List<String> upsertRecords(List<IndexedRecord> iRecords,
Map<String, HoodieRecord<? extends HoodieRecordPayload>> records) {
List<String> recordKeys = new ArrayList<>();
iRecords
.stream()
.forEach(r -> {
String key = ((GenericRecord) r).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String partitionPath = ((GenericRecord) r).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
recordKeys.add(key);
HoodieRecord record = new HoodieRecord<>(new HoodieKey(key, partitionPath),
new HoodieAvroPayload(Option.of((GenericRecord) r)));
record.unseal();
record.setCurrentLocation(new HoodieRecordLocation("DUMMY_COMMIT_TIME", "DUMMY_FILE_ID"));
record.seal();
records.put(key, record);
});
iRecords.stream().forEach(r -> {
String key = ((GenericRecord) r).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String partitionPath = ((GenericRecord) r).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
recordKeys.add(key);
HoodieRecord record =
new HoodieRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
record.unseal();
record.setCurrentLocation(new HoodieRecordLocation("DUMMY_COMMIT_TIME", "DUMMY_FILE_ID"));
record.seal();
records.put(key, record);
});
return recordKeys;
}
}

View File

@@ -51,9 +51,7 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
private static String TEST_WRITE_TOKEN = "1-0-1";
private static final Map<String, Double> metrics =
new ImmutableMap.Builder<String, Double>()
.put("key1", 1.0)
.put("key2", 3.0).build();
new ImmutableMap.Builder<String, Double>().put("key1", 1.0).put("key2", 3.0).build();
private Function<Pair<String, FileSlice>, Map<String, Double>> metricsCaptureFn = (partitionFileSlice) -> metrics;
@Before
@@ -64,37 +62,34 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
@Test
public void testBuildFromFileSlice() {
// Empty File-Slice with no data and log files
FileSlice emptyFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "empty1");
HoodieCompactionOperation op = CompactionUtils.buildFromFileSlice(
DEFAULT_PARTITION_PATHS[0], emptyFileSlice, Option.of(metricsCaptureFn));
FileSlice emptyFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "empty1");
HoodieCompactionOperation op =
CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], emptyFileSlice, Option.of(metricsCaptureFn));
testFileSliceCompactionOpEquality(emptyFileSlice, op, DEFAULT_PARTITION_PATHS[0]);
// File Slice with data-file but no log files
FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noLog1");
FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noLog1");
noLogFileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog_1_000.parquet"));
op = CompactionUtils.buildFromFileSlice(
DEFAULT_PARTITION_PATHS[0], noLogFileSlice, Option.of(metricsCaptureFn));
op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], noLogFileSlice, Option.of(metricsCaptureFn));
testFileSliceCompactionOpEquality(noLogFileSlice, op, DEFAULT_PARTITION_PATHS[0]);
//File Slice with no data-file but log files present
FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noData1");
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(
FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(
FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
op = CompactionUtils.buildFromFileSlice(
DEFAULT_PARTITION_PATHS[0], noDataFileSlice, Option.of(metricsCaptureFn));
// File Slice with no data-file but log files present
FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
noDataFileSlice.addLogFile(
new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
noDataFileSlice.addLogFile(
new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], noDataFileSlice, Option.of(metricsCaptureFn));
testFileSliceCompactionOpEquality(noDataFileSlice, op, DEFAULT_PARTITION_PATHS[0]);
//File Slice with data-file and log files present
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noData1");
// File Slice with data-file and log files present
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
fileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog_1_000.parquet"));
fileSlice.addLogFile(new HoodieLogFile(new Path(
FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
fileSlice.addLogFile(new HoodieLogFile(new Path(
FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
op = CompactionUtils.buildFromFileSlice(
DEFAULT_PARTITION_PATHS[0], fileSlice, Option.of(metricsCaptureFn));
fileSlice.addLogFile(
new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
fileSlice.addLogFile(
new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], fileSlice, Option.of(metricsCaptureFn));
testFileSliceCompactionOpEquality(fileSlice, op, DEFAULT_PARTITION_PATHS[0]);
}
@@ -102,23 +97,23 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
* Generate input for compaction plan tests
*/
private Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> buildCompactionPlan() {
FileSlice emptyFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "empty1");
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noData1");
FileSlice emptyFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "empty1");
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
fileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog_1_000.parquet"));
fileSlice.addLogFile(new HoodieLogFile(new Path(
FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
fileSlice.addLogFile(new HoodieLogFile(new Path(
FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noLog1");
fileSlice.addLogFile(
new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
fileSlice.addLogFile(
new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noLog1");
noLogFileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog_1_000.parquet"));
FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0],"000", "noData1");
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(
FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
noDataFileSlice.addLogFile(new HoodieLogFile(new Path(
FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
noDataFileSlice.addLogFile(
new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
noDataFileSlice.addLogFile(
new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN))));
List<FileSlice> fileSliceList = Arrays.asList(emptyFileSlice, noDataFileSlice, fileSlice, noLogFileSlice);
List<Pair<String, FileSlice>> input = fileSliceList.stream().map(f -> Pair.of(DEFAULT_PARTITION_PATHS[0], f))
.collect(Collectors.toList());
List<Pair<String, FileSlice>> input =
fileSliceList.stream().map(f -> Pair.of(DEFAULT_PARTITION_PATHS[0], f)).collect(Collectors.toList());
return Pair.of(input, CompactionUtils.buildFromFileSlices(input, Option.empty(), Option.of(metricsCaptureFn)));
}
@@ -134,14 +129,13 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> inputAndPlan = buildCompactionPlan();
HoodieCompactionPlan plan = inputAndPlan.getRight();
List<HoodieCompactionOperation> originalOps = plan.getOperations();
List<HoodieCompactionOperation> regeneratedOps =
originalOps.stream().map(op -> {
// Convert to CompactionOperation
return CompactionUtils.buildCompactionOperation(op);
}).map(op2 -> {
// Convert back to HoodieCompactionOperation and check for equality
return CompactionUtils.buildHoodieCompactionOperation(op2);
}).collect(Collectors.toList());
List<HoodieCompactionOperation> regeneratedOps = originalOps.stream().map(op -> {
// Convert to CompactionOperation
return CompactionUtils.buildCompactionOperation(op);
}).map(op2 -> {
// Convert back to HoodieCompactionOperation and check for equality
return CompactionUtils.buildHoodieCompactionOperation(op2);
}).collect(Collectors.toList());
Assert.assertTrue("Transformation did get tested", originalOps.size() > 0);
Assert.assertEquals("All fields set correctly in transformations", originalOps, regeneratedOps);
}
@@ -194,24 +188,22 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
}
/**
* Validates if generated compaction plan matches with input file-slices
* Validates if generated compaction plan matches with input file-slices
*
* @param input File Slices with partition-path
* @param plan Compaction Plan
* @param plan Compaction Plan
*/
private void testFileSlicesCompactionPlanEquality(List<Pair<String, FileSlice>> input,
HoodieCompactionPlan plan) {
private void testFileSlicesCompactionPlanEquality(List<Pair<String, FileSlice>> input, HoodieCompactionPlan plan) {
Assert.assertEquals("All file-slices present", input.size(), plan.getOperations().size());
IntStream.range(0, input.size()).boxed().forEach(idx ->
testFileSliceCompactionOpEquality(input.get(idx).getValue(), plan.getOperations().get(idx),
input.get(idx).getKey()));
IntStream.range(0, input.size()).boxed().forEach(idx -> testFileSliceCompactionOpEquality(input.get(idx).getValue(),
plan.getOperations().get(idx), input.get(idx).getKey()));
}
/**
* Validates if generated compaction operation matches with input file slice and partition path
*
* @param slice File Slice
* @param op HoodieCompactionOperation
* @param slice File Slice
* @param op HoodieCompactionOperation
* @param expPartitionPath Partition path
*/
private void testFileSliceCompactionOpEquality(FileSlice slice, HoodieCompactionOperation op,

View File

@@ -53,25 +53,18 @@ public class TestDFSPropertiesConfiguration {
// create some files.
Path filePath = new Path(dfsBasePath + "/t1.props");
writePropertiesFile(filePath, new String[]{
"", "#comment", "abc",// to be ignored
"int.prop=123", "double.prop=113.4", "string.prop=str", "boolean.prop=true", "long.prop=1354354354"
});
writePropertiesFile(filePath, new String[] {"", "#comment", "abc", // to be ignored
"int.prop=123", "double.prop=113.4", "string.prop=str", "boolean.prop=true", "long.prop=1354354354"});
filePath = new Path(dfsBasePath + "/t2.props");
writePropertiesFile(filePath, new String[]{
"string.prop=ignored", "include=t1.props"
});
writePropertiesFile(filePath, new String[] {"string.prop=ignored", "include=t1.props"});
filePath = new Path(dfsBasePath + "/t3.props");
writePropertiesFile(filePath, new String[]{
"double.prop=838.3", "include = t2.props", "double.prop=243.4", "string.prop=t3.value"
});
writePropertiesFile(filePath,
new String[] {"double.prop=838.3", "include = t2.props", "double.prop=243.4", "string.prop=t3.value"});
filePath = new Path(dfsBasePath + "/t4.props");
writePropertiesFile(filePath, new String[]{
"double.prop=838.3", "include = t4.props"
});
writePropertiesFile(filePath, new String[] {"double.prop=838.3", "include = t4.props"});
}
@AfterClass
@@ -98,7 +91,9 @@ public class TestDFSPropertiesConfiguration {
try {
props.getString("invalid.key");
fail("Should error out here.");
} catch (IllegalArgumentException iae) { /* ignore */ }
} catch (IllegalArgumentException iae) {
// ignore
}
assertEquals(123, props.getInteger("int.prop"));
assertEquals(113.4, props.getDouble("double.prop"), 0.001);
@@ -133,6 +128,8 @@ public class TestDFSPropertiesConfiguration {
try {
new DFSPropertiesConfiguration(dfs, new Path(dfsBasePath + "/t4.props"));
fail("Should error out on a self-included file.");
} catch (IllegalStateException ise) { /* ignore */ }
} catch (IllegalStateException ise) {
// ignore
}
}
}

View File

@@ -74,16 +74,16 @@ public class TestFSUtils extends HoodieCommonTestHarness {
@Test
/**
* Tests if process Files return only paths excluding marker directories
* Cleaner, Rollback and compaction-scheduling logic was recursively processing all subfolders including that
* of ".hoodie" when looking for partition-paths. This causes a race when they try to list all folders (recursively)
* but the marker directory (that of compaction inside of ".hoodie" folder) is deleted underneath by compactor.
* This code tests the fix by ensuring ".hoodie" and their subfolders are never processed.
* Tests if process Files return only paths excluding marker directories Cleaner, Rollback and compaction-scheduling
* logic was recursively processing all subfolders including that of ".hoodie" when looking for partition-paths. This
* causes a race when they try to list all folders (recursively) but the marker directory (that of compaction inside
* of ".hoodie" folder) is deleted underneath by compactor. This code tests the fix by ensuring ".hoodie" and their
* subfolders are never processed.
*/
public void testProcessFiles() throws Exception {
// All directories including marker dirs.
List<String> folders = Arrays.asList("2016/04/15", "2016/05/16", ".hoodie/.temp/2/2016/04/15",
".hoodie/.temp/2/2016/05/16");
List<String> folders =
Arrays.asList("2016/04/15", "2016/05/16", ".hoodie/.temp/2/2016/04/15", ".hoodie/.temp/2/2016/05/16");
folders.stream().forEach(f -> {
try {
metaClient.getFs().mkdirs(new Path(new Path(basePath), f));
@@ -93,12 +93,9 @@ public class TestFSUtils extends HoodieCommonTestHarness {
});
// Files inside partitions and marker directories
List<String> files = Arrays.asList(
"2016/04/15/1_1-0-1_20190528120000.parquet",
"2016/05/16/2_1-0-1_20190528120000.parquet",
".hoodie/.temp/2/2016/05/16/2_1-0-1_20190528120000.parquet",
".hoodie/.temp/2/2016/04/15/1_1-0-1_20190528120000.parquet"
);
List<String> files = Arrays.asList("2016/04/15/1_1-0-1_20190528120000.parquet",
"2016/05/16/2_1-0-1_20190528120000.parquet", ".hoodie/.temp/2/2016/05/16/2_1-0-1_20190528120000.parquet",
".hoodie/.temp/2/2016/04/15/1_1-0-1_20190528120000.parquet");
files.stream().forEach(f -> {
try {
@@ -115,8 +112,8 @@ public class TestFSUtils extends HoodieCommonTestHarness {
return true;
}, true);
Assert.assertTrue("Hoodie MetaFolder MUST be skipped but got :" + collected, collected.stream()
.noneMatch(s -> s.contains(HoodieTableMetaClient.METAFOLDER_NAME)));
Assert.assertTrue("Hoodie MetaFolder MUST be skipped but got :" + collected,
collected.stream().noneMatch(s -> s.contains(HoodieTableMetaClient.METAFOLDER_NAME)));
// Check if only files are listed
Assert.assertEquals(2, collected.size());
@@ -127,8 +124,8 @@ public class TestFSUtils extends HoodieCommonTestHarness {
return true;
}, false);
Assert.assertFalse("Hoodie MetaFolder will be present :" + collected2, collected2.stream()
.noneMatch(s -> s.contains(HoodieTableMetaClient.METAFOLDER_NAME)));
Assert.assertFalse("Hoodie MetaFolder will be present :" + collected2,
collected2.stream().noneMatch(s -> s.contains(HoodieTableMetaClient.METAFOLDER_NAME)));
// Check if only files are listed including hoodie.properties
Assert.assertEquals("Collected=" + collected2, 5, collected2.size());
}
@@ -166,7 +163,7 @@ public class TestFSUtils extends HoodieCommonTestHarness {
public void testGetRelativePartitionPath() {
Path basePath = new Path("/test/apache");
Path partitionPath = new Path("/test/apache/hudi/sub");
assertEquals("hudi/sub",FSUtils.getRelativePartitionPath(basePath, partitionPath));
assertEquals("hudi/sub", FSUtils.getRelativePartitionPath(basePath, partitionPath));
}
@Test
@@ -232,9 +229,8 @@ public class TestFSUtils extends HoodieCommonTestHarness {
String log1Ver0 = makeOldLogFileName("file1", ".log", "1", 0);
String log1Ver1 = makeOldLogFileName("file1", ".log", "1", 1);
String log1base2 = makeOldLogFileName("file1", ".log", "2", 0);
List<HoodieLogFile> logFiles =
Arrays.asList(log1base2, log1Ver1, log1Ver0).stream()
.map(f -> new HoodieLogFile(f)).collect(Collectors.toList());
List<HoodieLogFile> logFiles = Arrays.asList(log1base2, log1Ver1, log1Ver0).stream().map(f -> new HoodieLogFile(f))
.collect(Collectors.toList());
logFiles.sort(HoodieLogFile.getLogFileComparator());
assertEquals(log1Ver0, logFiles.get(0).getFileName());
assertEquals(log1Ver1, logFiles.get(1).getFileName());
@@ -265,11 +261,8 @@ public class TestFSUtils extends HoodieCommonTestHarness {
assertEquals(log1base2W1, logFiles.get(5).getFileName());
}
public static String makeOldLogFileName(String fileId, String logFileExtension,
String baseCommitTime, int version) {
Pattern oldLogFilePattern =
Pattern.compile("\\.(.*)_(.*)\\.(.*)\\.([0-9]*)(\\.([0-9]*))");
return "." + String
.format("%s_%s%s.%d", fileId, baseCommitTime, logFileExtension, version);
public static String makeOldLogFileName(String fileId, String logFileExtension, String baseCommitTime, int version) {
Pattern oldLogFilePattern = Pattern.compile("\\.(.*)_(.*)\\.(.*)\\.([0-9]*)(\\.([0-9]*))");
return "." + String.format("%s_%s%s.%d", fileId, baseCommitTime, logFileExtension, version);
}
}

View File

@@ -28,10 +28,9 @@ import org.junit.Test;
public class TestHoodieAvroUtils {
private static String EXAMPLE_SCHEMA = "{\"type\": \"record\"," + "\"name\": \"testrec\"," + "\"fields\": [ "
+ "{\"name\": \"timestamp\",\"type\": \"double\"},"
+ "{\"name\": \"_row_key\", \"type\": \"string\"},"
+ "{\"name\": \"non_pii_col\", \"type\": \"string\"},"
+ "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"}]}";
+ "{\"name\": \"timestamp\",\"type\": \"double\"}," + "{\"name\": \"_row_key\", \"type\": \"string\"},"
+ "{\"name\": \"non_pii_col\", \"type\": \"string\"},"
+ "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"}]}";
@Test
public void testPropsPresent() {

View File

@@ -66,8 +66,8 @@ public class TestParquetUtils extends HoodieCommonTestHarness {
Collections.sort(rowKeys);
assertEquals("Did not read back the expected list of keys", rowKeys, rowKeysInFile);
BloomFilter filterInFile = ParquetUtils.readBloomFilterFromParquetMetadata(HoodieTestUtils.getDefaultHadoopConf(),
new Path(filePath));
BloomFilter filterInFile =
ParquetUtils.readBloomFilterFromParquetMetadata(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath));
for (String rowKey : rowKeys) {
assertTrue("key should be found in bloom filter", filterInFile.mightContain(rowKey));
}
@@ -89,9 +89,8 @@ public class TestParquetUtils extends HoodieCommonTestHarness {
writeParquetFile(filePath, rowKeys);
// Read and verify
Set<String> filtered = ParquetUtils.filterParquetRowKeys(HoodieTestUtils.getDefaultHadoopConf(),
new Path(filePath),
filter);
Set<String> filtered =
ParquetUtils.filterParquetRowKeys(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath), filter);
assertEquals("Filtered count does not match", filter.size(), filtered.size());
@@ -100,13 +99,12 @@ public class TestParquetUtils extends HoodieCommonTestHarness {
}
}
private void writeParquetFile(String filePath,
List<String> rowKeys) throws Exception {
private void writeParquetFile(String filePath, List<String> rowKeys) throws Exception {
// Write out a parquet file
Schema schema = HoodieAvroUtils.getRecordKeySchema();
BloomFilter filter = new BloomFilter(1000, 0.0001);
HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema,
filter);
HoodieAvroWriteSupport writeSupport =
new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
ParquetWriter writer = new ParquetWriter(new Path(filePath), writeSupport, CompressionCodecName.GZIP,
120 * 1024 * 1024, ParquetWriter.DEFAULT_PAGE_SIZE);
for (String rowKey : rowKeys) {

View File

@@ -88,13 +88,10 @@ public class TestRecord implements Serializable {
this.favoriteFloatNumber = (float) ((recordNumber + commitHashCode) / 1024.0);
this.favoriteDoubleNumber = (recordNumber + commitHashCode) / 1024.0;
this.tags = new HashMap<>();
this.tags.put("mapItem1",
new TestMapItemRecord("item" + recordNumber, "item" + recordNumber + commitTimeSuffix));
this.tags.put("mapItem2",
new TestMapItemRecord("item2" + recordNumber, "item2" + recordNumber + commitTimeSuffix));
this.tags.put("mapItem1", new TestMapItemRecord("item" + recordNumber, "item" + recordNumber + commitTimeSuffix));
this.tags.put("mapItem2", new TestMapItemRecord("item2" + recordNumber, "item2" + recordNumber + commitTimeSuffix));
this.testNestedRecord = new TestNestedRecord(false, "UserId" + recordNumber + commitTimeSuffix);
this.stringArray = new String[]{"stringArray0" + commitTimeSuffix,
"stringArray1" + commitTimeSuffix};
this.stringArray = new String[] {"stringArray0" + commitTimeSuffix, "stringArray1" + commitTimeSuffix};
}
public String toJsonString() throws IOException {

View File

@@ -42,7 +42,7 @@ public class TestRocksDBManager {
@BeforeClass
public static void setUpClass() {
dbManager = new RocksDBDAO("/dummy/path",
FileSystemViewStorageConfig.newBuilder().build().newBuilder().build().getRocksdbBasePath());
FileSystemViewStorageConfig.newBuilder().build().newBuilder().build().getRocksdbBasePath());
}
@AfterClass
@@ -121,8 +121,8 @@ public class TestRocksDBManager {
prefixes.stream().forEach(prefix -> {
List<Pair<String, Payload>> gotPayloads =
dbManager.<Payload>prefixSearch(family, prefix).collect(Collectors.toList());
Assert.assertEquals("Size check for prefix (" + prefix + ") and family (" + family + ")",
0, gotPayloads.size());
Assert.assertEquals("Size check for prefix (" + prefix + ") and family (" + family + ")", 0,
gotPayloads.size());
});
});
@@ -170,10 +170,8 @@ public class TestRocksDBManager {
return false;
}
Payload payload = (Payload) o;
return Objects.equals(prefix, payload.prefix)
&& Objects.equals(key, payload.key)
&& Objects.equals(val, payload.val)
&& Objects.equals(family, payload.family);
return Objects.equals(prefix, payload.prefix) && Objects.equals(key, payload.key)
&& Objects.equals(val, payload.val) && Objects.equals(family, payload.family);
}
@Override

View File

@@ -71,7 +71,7 @@ public class TestSerializationUtils {
@Override
public boolean equals(Object obj) {
if (!(obj instanceof NonSerializableClass)) {
if (!(obj instanceof NonSerializableClass)) {
return false;
}
final NonSerializableClass other = (NonSerializableClass) obj;

View File

@@ -79,8 +79,7 @@ public class TestDiskBasedMap extends HoodieCommonTestHarness {
@Test
public void testSimpleInsertWithoutHoodieMetadata() throws IOException, URISyntaxException {
DiskBasedMap records = new DiskBasedMap<>(basePath);
List<HoodieRecord> hoodieRecords = SchemaTestUtil
.generateHoodieTestRecordsWithoutHoodieMetadata(0, 1000);
List<HoodieRecord> hoodieRecords = SchemaTestUtil.generateHoodieTestRecordsWithoutHoodieMetadata(0, 1000);
Set<String> recordKeys = new HashSet<>();
// insert generated records into the map
hoodieRecords.stream().forEach(r -> {
@@ -113,12 +112,10 @@ public class TestDiskBasedMap extends HoodieCommonTestHarness {
assertTrue(fileSize > 0);
// generate updates from inserts
List<IndexedRecord> updatedRecords =
SchemaTestUtil
.updateHoodieTestRecords(recordKeys, SchemaTestUtil.generateHoodieTestRecords(0, 100),
HoodieActiveTimeline.createNewCommitTime());
String newCommitTime = ((GenericRecord) updatedRecords.get(0))
.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
List<IndexedRecord> updatedRecords = SchemaTestUtil.updateHoodieTestRecords(recordKeys,
SchemaTestUtil.generateHoodieTestRecords(0, 100), HoodieActiveTimeline.createNewCommitTime());
String newCommitTime =
((GenericRecord) updatedRecords.get(0)).get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
// perform upserts
recordKeys = SpillableMapTestUtils.upsertRecords(updatedRecords, records);
@@ -133,8 +130,8 @@ public class TestDiskBasedMap extends HoodieCommonTestHarness {
assert recordKeys.contains(rec.getRecordKey());
try {
IndexedRecord indexedRecord = (IndexedRecord) rec.getData().getInsertValue(schema).get();
String latestCommitTime = ((GenericRecord) indexedRecord)
.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
String latestCommitTime =
((GenericRecord) indexedRecord).get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
assertEquals(latestCommitTime, newCommitTime);
} catch (IOException io) {
throw new UncheckedIOException(io);
@@ -149,15 +146,14 @@ public class TestDiskBasedMap extends HoodieCommonTestHarness {
// Test sizeEstimator without hoodie metadata fields
List<HoodieRecord> hoodieRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1, schema);
long payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0),
new HoodieRecordSizeEstimator(schema));
long payloadSize =
SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0), new HoodieRecordSizeEstimator(schema));
assertTrue(payloadSize > 0);
// Test sizeEstimator with hoodie metadata fields
schema = HoodieAvroUtils.addMetadataFields(schema);
hoodieRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1, schema);
payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0),
new HoodieRecordSizeEstimator(schema));
payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0), new HoodieRecordSizeEstimator(schema));
assertTrue(payloadSize > 0);
// Following tests payloads without an Avro Schema in the Record
@@ -165,24 +161,21 @@ public class TestDiskBasedMap extends HoodieCommonTestHarness {
// Test sizeEstimator without hoodie metadata fields and without schema object in the payload
schema = SchemaTestUtil.getSimpleSchema();
List<IndexedRecord> indexedRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1);
hoodieRecords = indexedRecords.stream()
.map(r -> new HoodieRecord(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
hoodieRecords =
indexedRecords.stream().map(r -> new HoodieRecord(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
new AvroBinaryTestPayload(Option.of((GenericRecord) r)))).collect(Collectors.toList());
payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0),
new HoodieRecordSizeEstimator(schema));
payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0), new HoodieRecordSizeEstimator(schema));
assertTrue(payloadSize > 0);
// Test sizeEstimator with hoodie metadata fields and without schema object in the payload
final Schema simpleSchemaWithMetadata = HoodieAvroUtils
.addMetadataFields(SchemaTestUtil.getSimpleSchema());
final Schema simpleSchemaWithMetadata = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
indexedRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1);
hoodieRecords = indexedRecords.stream()
.map(r -> new HoodieRecord(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
new AvroBinaryTestPayload(Option
.of(HoodieAvroUtils.rewriteRecord((GenericRecord) r, simpleSchemaWithMetadata)))))
new AvroBinaryTestPayload(
Option.of(HoodieAvroUtils.rewriteRecord((GenericRecord) r, simpleSchemaWithMetadata)))))
.collect(Collectors.toList());
payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0),
new HoodieRecordSizeEstimator(schema));
payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0), new HoodieRecordSizeEstimator(schema));
assertTrue(payloadSize > 0);
}
@@ -195,8 +188,7 @@ public class TestDiskBasedMap extends HoodieCommonTestHarness {
// Test sizeEstimatorPerformance with simpleSchema
Schema schema = SchemaTestUtil.getSimpleSchema();
List<HoodieRecord> hoodieRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1, schema);
HoodieRecordSizeEstimator sizeEstimator =
new HoodieRecordSizeEstimator(schema);
HoodieRecordSizeEstimator sizeEstimator = new HoodieRecordSizeEstimator(schema);
HoodieRecord record = hoodieRecords.remove(0);
long startTime = System.currentTimeMillis();
SpillableMapUtils.computePayloadSize(record, sizeEstimator);

View File

@@ -64,8 +64,7 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
String payloadClazz = HoodieAvroPayload.class.getName();
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
new ExternalSpillableMap<>(16L, basePath,
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); //16B
new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); // 16B
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
@@ -86,8 +85,7 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
String payloadClazz = HoodieAvroPayload.class.getName();
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
new ExternalSpillableMap<>(16L, basePath,
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); //16B
new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); // 16B
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
@@ -124,8 +122,7 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
String payloadClazz = HoodieAvroPayload.class.getName();
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
new ExternalSpillableMap<>(16L, basePath,
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); //16B
new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); // 16B
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
// insert a bunch of records so that values spill to disk too
@@ -178,9 +175,8 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
public void simpleTestWithException() throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
new ExternalSpillableMap<>(16L, failureOutputPath,
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); //16B
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L,
failureOutputPath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); // 16B
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
@@ -198,8 +194,7 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
String payloadClazz = HoodieAvroPayload.class.getName();
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
new ExternalSpillableMap<>(16L, basePath,
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); //16B
new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); // 16B
List<String> recordKeys = new ArrayList<>();
// Ensure we spill to disk
@@ -218,8 +213,8 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
List<String> keysToBeUpdated = new ArrayList<>();
keysToBeUpdated.add(key);
// Update the commitTime for this record
List<IndexedRecord> updatedRecords = SchemaTestUtil
.updateHoodieTestRecords(keysToBeUpdated, recordsToUpdate, newCommitTime);
List<IndexedRecord> updatedRecords =
SchemaTestUtil.updateHoodieTestRecords(keysToBeUpdated, recordsToUpdate, newCommitTime);
// Upsert this updated record
SpillableMapTestUtils.upsertRecords(updatedRecords, records);
GenericRecord gRecord = (GenericRecord) records.get(key).getData().getInsertValue(schema).get();
@@ -251,8 +246,7 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
String payloadClazz = HoodieAvroPayload.class.getName();
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
new ExternalSpillableMap<>(16L, basePath,
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); //16B
new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); // 16B
List<String> recordKeys = new ArrayList<>();
// Ensure we spill to disk
@@ -290,8 +284,8 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
key = recordKeys.get(recordKeys.size() - 1);
record = records.get(key);
// Get the field we want to update
fieldName = schema.getFields().stream().filter(field -> field.schema().getType() == Schema.Type.STRING)
.findAny().get().name();
fieldName = schema.getFields().stream().filter(field -> field.schema().getType() == Schema.Type.STRING).findAny()
.get().name();
// Use a new value to update this field
newValue = "update2";
recordsToUpdate = new ArrayList<>();
@@ -311,6 +305,5 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
// TODO : come up with a performance eval test for spillableMap
@Test
public void testLargeInsertUpsert() {
}
public void testLargeInsertUpsert() {}
}