1
0

Async Compaction Main API changes

This commit is contained in:
Balaji Varadarajan
2018-05-23 23:09:25 -07:00
committed by vinoth chandar
parent 9b78523d62
commit 2f8ce93030
18 changed files with 878 additions and 267 deletions

View File

@@ -41,6 +41,7 @@ import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.TableFileSystemView;
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.common.table.timeline.HoodieInstant.State;
import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
import com.uber.hoodie.config.HoodieCompactionConfig;
import com.uber.hoodie.config.HoodieIndexConfig;
@@ -202,7 +203,7 @@ public class TestMergeOnReadTable {
commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent());
String compactionCommitTime = client.startCompaction();
String compactionCommitTime = client.scheduleCompaction(Optional.empty()).get().toString();
client.compact(compactionCommitTime);
allFiles = HoodieTestUtils.listAllDataFilesInPath(dfs, cfg.getBasePath());
@@ -522,14 +523,15 @@ public class TestMergeOnReadTable {
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
String compactionCommit = client.startCompaction();
JavaRDD<WriteStatus> writeStatus = client.compact(compactionCommit);
client.commitCompaction(compactionCommit, writeStatus);
String compactionInstantTime = client.scheduleCompaction(Optional.empty()).get().toString();
JavaRDD<WriteStatus> ws = client.compact(compactionInstantTime);
client.commitCompaction(compactionInstantTime, ws, Optional.empty());
allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
roView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsTimeline(), allFiles);
List<HoodieDataFile> dataFiles2 = roView.getLatestDataFiles().collect(Collectors.toList());
final String compactedCommitTime = metaClient.getActiveTimeline().reload().getCommitsTimeline().lastInstant().get()
.getTimestamp();
@@ -679,12 +681,13 @@ public class TestMergeOnReadTable {
}
}
// Do a compaction
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
table = HoodieTable.getHoodieTable(metaClient, config, jsc);
// Mark 2nd delta-instant as completed
metaClient.getActiveTimeline().saveAsComplete(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime), Optional.empty());
String commitTime = writeClient.startCompaction();
JavaRDD<WriteStatus> result = writeClient.compact(commitTime);
// Do a compaction
String compactionInstantTime = writeClient.scheduleCompaction(Optional.empty()).get().toString();
JavaRDD<WriteStatus> result = writeClient.compact(compactionInstantTime);
// Verify that recently written compacted data file has no log file
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
@@ -741,9 +744,9 @@ public class TestMergeOnReadTable {
Assert.assertTrue(totalUpsertTime > 0);
// Do a compaction
String commitTime = writeClient.startCompaction();
statuses = writeClient.compact(commitTime);
writeClient.commitCompaction(commitTime, statuses);
String compactionInstantTime = writeClient.scheduleCompaction(Optional.empty()).get().toString();
statuses = writeClient.compact(compactionInstantTime);
writeClient.commitCompaction(compactionInstantTime, statuses, Optional.empty());
// total time taken for scanning log files should be greater than 0
long timeTakenForScanner = statuses.map(writeStatus -> writeStatus.getStat().getRuntimeStats().getTotalScanTime())
.reduce((a, b) -> a + b).longValue();
@@ -782,11 +785,11 @@ public class TestMergeOnReadTable {
Assert.assertTrue(numLogFiles > 0);
// Do a compaction
String commitTime = writeClient.startCompaction();
String commitTime = writeClient.scheduleCompaction(Optional.empty()).get().toString();
statuses = writeClient.compact(commitTime);
Assert.assertTrue(statuses.map(status -> status.getStat().getPath().contains("parquet")).count() == numLogFiles);
Assert.assertEquals(statuses.count(), numLogFiles);
writeClient.commitCompaction(commitTime, statuses);
writeClient.commitCompaction(commitTime, statuses, Optional.empty());
}
@Test
@@ -824,6 +827,8 @@ public class TestMergeOnReadTable {
writeClient.commit(newCommitTime, statuses);
// rollback a successful commit
// Sleep for small interval to force a new rollback start time.
Thread.sleep(5);
writeClient.rollback(newCommitTime);
final HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
@@ -875,12 +880,12 @@ public class TestMergeOnReadTable {
Assert.assertTrue(numLogFiles > 0);
// Do a compaction
newCommitTime = writeClient.startCompaction();
newCommitTime = writeClient.scheduleCompaction(Optional.empty()).get().toString();
statuses = writeClient.compact(newCommitTime);
// Ensure all log files have been compacted into parquet files
Assert.assertTrue(statuses.map(status -> status.getStat().getPath().contains("parquet")).count() == numLogFiles);
Assert.assertEquals(statuses.count(), numLogFiles);
writeClient.commitCompaction(newCommitTime, statuses);
writeClient.commitCompaction(newCommitTime, statuses, Optional.empty());
// Trigger a rollback of compaction
writeClient.rollback(newCommitTime);
for (String partitionPath : dataGen.getPartitionPaths()) {