Take 2: Refactor hoodie-common and create right abstractions for Hoodie Storage V2.0
- Refactored timelines to be a single timeline for all active events and one for archived events. CommitTimeline and other timelines can be inferred by applying a filter on the activeTimelime - Introduced HoodieInstant to abstract different types of action, commit time and if isInFlight - Implemented other review comments
This commit is contained in:
@@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
package com.uber.hoodie.cli;
|
package com.uber.hoodie.cli;
|
||||||
|
|
||||||
|
import com.uber.hoodie.common.table.HoodieTableConfig;
|
||||||
import org.springframework.core.Ordered;
|
import org.springframework.core.Ordered;
|
||||||
import org.springframework.core.annotation.Order;
|
import org.springframework.core.annotation.Order;
|
||||||
import org.springframework.shell.plugin.support.DefaultPromptProvider;
|
import org.springframework.shell.plugin.support.DefaultPromptProvider;
|
||||||
@@ -27,17 +28,18 @@ public class HoodiePrompt extends DefaultPromptProvider {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getPrompt() {
|
public String getPrompt() {
|
||||||
|
String tableName = HoodieCLI.tableMetadata.getTableConfig().getTableName();
|
||||||
switch (HoodieCLI.state) {
|
switch (HoodieCLI.state) {
|
||||||
case INIT:
|
case INIT:
|
||||||
return "hoodie->";
|
return "hoodie->";
|
||||||
case DATASET:
|
case DATASET:
|
||||||
return "hoodie:" + HoodieCLI.tableMetadata.getTableConfig().getTableName() + "->";
|
return "hoodie:" + tableName + "->";
|
||||||
case SYNC:
|
case SYNC:
|
||||||
return "hoodie:" + HoodieCLI.tableMetadata.getTableConfig().getTableName() + " <==> "
|
return "hoodie:" + tableName + " <==> "
|
||||||
+ HoodieCLI.syncTableMetadata.getTableConfig().getTableName() + "->";
|
+ HoodieCLI.syncTableMetadata.getTableConfig().getTableName() + "->";
|
||||||
}
|
}
|
||||||
if (HoodieCLI.tableMetadata != null)
|
if (HoodieCLI.tableMetadata != null)
|
||||||
return "hoodie:" + HoodieCLI.tableMetadata.getTableConfig().getTableName() + "->";
|
return "hoodie:" + tableName + "->";
|
||||||
return "hoodie->";
|
return "hoodie->";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -24,6 +24,8 @@ import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
|||||||
import com.uber.hoodie.common.model.HoodieWriteStat;
|
import com.uber.hoodie.common.model.HoodieWriteStat;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.util.NumericUtils;
|
import com.uber.hoodie.common.util.NumericUtils;
|
||||||
|
|
||||||
import org.apache.spark.launcher.SparkLauncher;
|
import org.apache.spark.launcher.SparkLauncher;
|
||||||
@@ -38,6 +40,7 @@ import java.util.ArrayList;
|
|||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
@@ -67,15 +70,16 @@ public class CommitsCommand implements CommandMarker {
|
|||||||
@CliOption(key = {
|
@CliOption(key = {
|
||||||
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
|
"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
|
||||||
final Integer limit) throws IOException {
|
final Integer limit) throws IOException {
|
||||||
HoodieTimeline timeline = HoodieCLI.tableMetadata.getActiveCommitTimeline();
|
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
|
||||||
List<String> commits = timeline.getInstants().collect(Collectors.toList());
|
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
|
||||||
|
List<HoodieInstant> commits = timeline.getInstants().collect(Collectors.toList());
|
||||||
String[][] rows = new String[commits.size()][];
|
String[][] rows = new String[commits.size()][];
|
||||||
Collections.reverse(commits);
|
Collections.reverse(commits);
|
||||||
for (int i = 0; i < commits.size(); i++) {
|
for (int i = 0; i < commits.size(); i++) {
|
||||||
String commit = commits.get(i);
|
HoodieInstant commit = commits.get(i);
|
||||||
HoodieCommitMetadata commitMetadata =
|
HoodieCommitMetadata commitMetadata =
|
||||||
HoodieCommitMetadata.fromBytes(timeline.readInstantDetails(commit).get());
|
HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get());
|
||||||
rows[i] = new String[] {commit,
|
rows[i] = new String[] {commit.getTimestamp(),
|
||||||
NumericUtils.humanReadableByteCount(commitMetadata.fetchTotalBytesWritten()),
|
NumericUtils.humanReadableByteCount(commitMetadata.fetchTotalBytesWritten()),
|
||||||
String.valueOf(commitMetadata.fetchTotalFilesInsert()),
|
String.valueOf(commitMetadata.fetchTotalFilesInsert()),
|
||||||
String.valueOf(commitMetadata.fetchTotalFilesUpdated()),
|
String.valueOf(commitMetadata.fetchTotalFilesUpdated()),
|
||||||
@@ -104,10 +108,14 @@ public class CommitsCommand implements CommandMarker {
|
|||||||
final String commitTime,
|
final String commitTime,
|
||||||
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
|
@CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
|
||||||
final String sparkPropertiesPath) throws Exception {
|
final String sparkPropertiesPath) throws Exception {
|
||||||
if (!HoodieCLI.tableMetadata.getActiveCommitTimeline().containsInstant(commitTime)) {
|
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
|
||||||
return "Commit " + commitTime + " not found in Commits " + HoodieCLI.tableMetadata
|
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
|
||||||
.getActiveCommitTimeline().getInstants().collect(Collectors.toList());
|
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
||||||
|
|
||||||
|
if (!timeline.containsInstant(commitInstant)) {
|
||||||
|
return "Commit " + commitTime + " not found in Commits " + timeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||||
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(),
|
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(),
|
||||||
commitTime,
|
commitTime,
|
||||||
@@ -127,13 +135,15 @@ public class CommitsCommand implements CommandMarker {
|
|||||||
public String showCommitPartitions(
|
public String showCommitPartitions(
|
||||||
@CliOption(key = {"commit"}, help = "Commit to show")
|
@CliOption(key = {"commit"}, help = "Commit to show")
|
||||||
final String commitTime) throws Exception {
|
final String commitTime) throws Exception {
|
||||||
HoodieTimeline timeline = HoodieCLI.tableMetadata.getActiveCommitTimeline();
|
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
|
||||||
if (!timeline.containsInstant(commitTime)) {
|
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
|
||||||
return "Commit " + commitTime + " not found in Commits " + HoodieCLI.tableMetadata
|
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
||||||
.getActiveCommitTimeline().getInstants().collect(Collectors.toList());
|
|
||||||
|
if (!timeline.containsInstant(commitInstant)) {
|
||||||
|
return "Commit " + commitTime + " not found in Commits " + timeline;
|
||||||
}
|
}
|
||||||
HoodieCommitMetadata meta =
|
HoodieCommitMetadata meta =
|
||||||
HoodieCommitMetadata.fromBytes(timeline.readInstantDetails(commitTime).get());
|
HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
|
||||||
List<String[]> rows = new ArrayList<String[]>();
|
List<String[]> rows = new ArrayList<String[]>();
|
||||||
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
|
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
|
||||||
.entrySet()) {
|
.entrySet()) {
|
||||||
@@ -173,13 +183,15 @@ public class CommitsCommand implements CommandMarker {
|
|||||||
public String showCommitFiles(
|
public String showCommitFiles(
|
||||||
@CliOption(key = {"commit"}, help = "Commit to show")
|
@CliOption(key = {"commit"}, help = "Commit to show")
|
||||||
final String commitTime) throws Exception {
|
final String commitTime) throws Exception {
|
||||||
HoodieTimeline timeline = HoodieCLI.tableMetadata.getActiveCommitTimeline();
|
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
|
||||||
if (!timeline.containsInstant(commitTime)) {
|
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
|
||||||
return "Commit " + commitTime + " not found in Commits " + HoodieCLI.tableMetadata
|
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
||||||
.getActiveCommitTimeline().getInstants().collect(Collectors.toList());
|
|
||||||
|
if (!timeline.containsInstant(commitInstant)) {
|
||||||
|
return "Commit " + commitTime + " not found in Commits " + timeline;
|
||||||
}
|
}
|
||||||
HoodieCommitMetadata meta =
|
HoodieCommitMetadata meta =
|
||||||
HoodieCommitMetadata.fromBytes(timeline.readInstantDetails(commitTime).get());
|
HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
|
||||||
List<String[]> rows = new ArrayList<String[]>();
|
List<String[]> rows = new ArrayList<String[]>();
|
||||||
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
|
for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
|
||||||
.entrySet()) {
|
.entrySet()) {
|
||||||
@@ -208,26 +220,26 @@ public class CommitsCommand implements CommandMarker {
|
|||||||
@CliOption(key = {"path"}, help = "Path of the dataset to compare to")
|
@CliOption(key = {"path"}, help = "Path of the dataset to compare to")
|
||||||
final String path) throws Exception {
|
final String path) throws Exception {
|
||||||
HoodieTableMetaClient target = new HoodieTableMetaClient(HoodieCLI.fs, path);
|
HoodieTableMetaClient target = new HoodieTableMetaClient(HoodieCLI.fs, path);
|
||||||
HoodieTimeline targetTimeline = target.getActiveCommitTimeline();
|
HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitTimeline().filterCompletedInstants();;
|
||||||
HoodieTableMetaClient source = HoodieCLI.tableMetadata;
|
HoodieTableMetaClient source = HoodieCLI.tableMetadata;
|
||||||
HoodieTimeline sourceTimeline = source.getActiveCommitTimeline();
|
HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitTimeline().filterCompletedInstants();;
|
||||||
String targetLatestCommit =
|
String targetLatestCommit =
|
||||||
targetTimeline.getInstants().iterator().hasNext() ? "0" : targetTimeline.lastInstant().get();
|
targetTimeline.getInstants().iterator().hasNext() ? "0" : targetTimeline.lastInstant().get().getTimestamp();
|
||||||
String sourceLatestCommit =
|
String sourceLatestCommit =
|
||||||
sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get();
|
sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get().getTimestamp();
|
||||||
|
|
||||||
if (sourceLatestCommit != null && sourceTimeline
|
if (sourceLatestCommit != null && sourceTimeline
|
||||||
.compareInstants(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
|
.compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
|
||||||
// source is behind the target
|
// source is behind the target
|
||||||
List<String> commitsToCatchup =
|
List<String> commitsToCatchup =
|
||||||
targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
|
targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
|
||||||
.collect(Collectors.toList());
|
.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||||
return "Source " + source.getTableConfig().getTableName() + " is behind by " + commitsToCatchup.size()
|
return "Source " + source.getTableConfig().getTableName() + " is behind by "
|
||||||
+ " commits. Commits to catch up - " + commitsToCatchup;
|
+ commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
|
||||||
} else {
|
} else {
|
||||||
List<String> commitsToCatchup =
|
List<String> commitsToCatchup =
|
||||||
sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
|
sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
|
||||||
.collect(Collectors.toList());
|
.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||||
return "Source " + source.getTableConfig().getTableName() + " is ahead by "
|
return "Source " + source.getTableConfig().getTableName() + " is ahead by "
|
||||||
+ commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
|
+ commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import com.uber.hoodie.cli.utils.HiveUtil;
|
|||||||
import com.uber.hoodie.cli.HoodieCLI;
|
import com.uber.hoodie.cli.HoodieCLI;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import org.springframework.shell.core.CommandMarker;
|
import org.springframework.shell.core.CommandMarker;
|
||||||
import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
|
import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
|
||||||
import org.springframework.shell.core.annotation.CliCommand;
|
import org.springframework.shell.core.annotation.CliCommand;
|
||||||
@@ -60,9 +61,9 @@ public class HoodieSyncCommand implements CommandMarker {
|
|||||||
"hivePass"}, mandatory = true, unspecifiedDefaultValue = "", help = "hive password to connect to")
|
"hivePass"}, mandatory = true, unspecifiedDefaultValue = "", help = "hive password to connect to")
|
||||||
final String hivePass) throws Exception {
|
final String hivePass) throws Exception {
|
||||||
HoodieTableMetaClient target = HoodieCLI.syncTableMetadata;
|
HoodieTableMetaClient target = HoodieCLI.syncTableMetadata;
|
||||||
HoodieTimeline targetTimeline = target.getActiveCommitTimeline();
|
HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitTimeline();
|
||||||
HoodieTableMetaClient source = HoodieCLI.tableMetadata;
|
HoodieTableMetaClient source = HoodieCLI.tableMetadata;
|
||||||
HoodieTimeline sourceTimeline = source.getActiveCommitTimeline();
|
HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitTimeline();
|
||||||
long sourceCount = 0;
|
long sourceCount = 0;
|
||||||
long targetCount = 0;
|
long targetCount = 0;
|
||||||
if ("complete".equals(mode)) {
|
if ("complete".equals(mode)) {
|
||||||
@@ -74,36 +75,40 @@ public class HoodieSyncCommand implements CommandMarker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
String targetLatestCommit =
|
String targetLatestCommit =
|
||||||
targetTimeline.getInstants().iterator().hasNext() ? "0" : targetTimeline.lastInstant().get();
|
targetTimeline.getInstants().iterator().hasNext() ? "0" : targetTimeline.lastInstant().get().getTimestamp();
|
||||||
String sourceLatestCommit =
|
String sourceLatestCommit =
|
||||||
sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get();
|
sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get().getTimestamp();
|
||||||
|
|
||||||
if (sourceLatestCommit != null && sourceTimeline
|
if (sourceLatestCommit != null && sourceTimeline
|
||||||
.compareInstants(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
|
.compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
|
||||||
// source is behind the target
|
// source is behind the target
|
||||||
List<String> commitsToCatchup =
|
List<HoodieInstant> commitsToCatchup =
|
||||||
targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
|
targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE).getInstants()
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (commitsToCatchup.isEmpty()) {
|
if (commitsToCatchup.isEmpty()) {
|
||||||
return "Count difference now is (count(" + target.getTableConfig().getTableName()
|
return "Count difference now is (count(" + target.getTableConfig().getTableName()
|
||||||
+ ") - count(" + source.getTableConfig().getTableName() + ") == " + (targetCount
|
+ ") - count(" + source.getTableConfig().getTableName() + ") == " + (targetCount
|
||||||
- sourceCount);
|
- sourceCount);
|
||||||
} else {
|
} else {
|
||||||
long newInserts = CommitUtil.countNewRecords(target, commitsToCatchup);
|
long newInserts = CommitUtil.countNewRecords(target,
|
||||||
|
commitsToCatchup.stream().map(HoodieInstant::getTimestamp)
|
||||||
|
.collect(Collectors.toList()));
|
||||||
return "Count difference now is (count(" + target.getTableConfig().getTableName()
|
return "Count difference now is (count(" + target.getTableConfig().getTableName()
|
||||||
+ ") - count(" + source.getTableConfig().getTableName() + ") == " + (targetCount
|
+ ") - count(" + source.getTableConfig().getTableName() + ") == " + (targetCount
|
||||||
- sourceCount) + ". Catch up count is " + newInserts;
|
- sourceCount) + ". Catch up count is " + newInserts;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
List<String> commitsToCatchup =
|
List<HoodieInstant> commitsToCatchup =
|
||||||
sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
|
sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE).getInstants()
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (commitsToCatchup.isEmpty()) {
|
if (commitsToCatchup.isEmpty()) {
|
||||||
return "Count difference now is (count(" + source.getTableConfig().getTableName()
|
return "Count difference now is (count(" + source.getTableConfig().getTableName()
|
||||||
+ ") - count(" + target.getTableConfig().getTableName() + ") == " + (sourceCount
|
+ ") - count(" + target.getTableConfig().getTableName() + ") == " + (sourceCount
|
||||||
- targetCount);
|
- targetCount);
|
||||||
} else {
|
} else {
|
||||||
long newInserts = CommitUtil.countNewRecords(source, commitsToCatchup);
|
long newInserts = CommitUtil.countNewRecords(source,
|
||||||
|
commitsToCatchup.stream().map(HoodieInstant::getTimestamp)
|
||||||
|
.collect(Collectors.toList()));
|
||||||
return "Count difference now is (count(" + source.getTableConfig().getTableName()
|
return "Count difference now is (count(" + source.getTableConfig().getTableName()
|
||||||
+ ") - count(" + target.getTableConfig().getTableName() + ") == " + (sourceCount
|
+ ") - count(" + target.getTableConfig().getTableName() + ") == " + (sourceCount
|
||||||
- targetCount) + ". Catch up count is " + newInserts;
|
- targetCount) + ". Catch up count is " + newInserts;
|
||||||
|
|||||||
@@ -24,6 +24,8 @@ import com.uber.hoodie.cli.HoodieCLI;
|
|||||||
import com.uber.hoodie.cli.HoodiePrintHelper;
|
import com.uber.hoodie.cli.HoodiePrintHelper;
|
||||||
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import com.uber.hoodie.common.util.NumericUtils;
|
import com.uber.hoodie.common.util.NumericUtils;
|
||||||
|
|
||||||
@@ -53,21 +55,22 @@ public class StatsCommand implements CommandMarker {
|
|||||||
long totalRecordsUpserted = 0;
|
long totalRecordsUpserted = 0;
|
||||||
long totalRecordsWritten = 0;
|
long totalRecordsWritten = 0;
|
||||||
|
|
||||||
HoodieTimeline timeline = HoodieCLI.tableMetadata.getActiveCommitTimeline();
|
HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
|
||||||
|
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
|
||||||
|
|
||||||
String[][] rows = new String[new Long(timeline.getTotalInstants()).intValue() + 1][];
|
String[][] rows = new String[new Long(timeline.countInstants()).intValue() + 1][];
|
||||||
int i = 0;
|
int i = 0;
|
||||||
DecimalFormat df = new DecimalFormat("#.00");
|
DecimalFormat df = new DecimalFormat("#.00");
|
||||||
for (String commitTime : timeline.getInstants().collect(
|
for (HoodieInstant commitTime : timeline.getInstants().collect(
|
||||||
Collectors.toList())) {
|
Collectors.toList())) {
|
||||||
String waf = "0";
|
String waf = "0";
|
||||||
HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(timeline.readInstantDetails(commitTime).get());
|
HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitTime).get());
|
||||||
if (commit.fetchTotalUpdateRecordsWritten() > 0) {
|
if (commit.fetchTotalUpdateRecordsWritten() > 0) {
|
||||||
waf = df.format(
|
waf = df.format(
|
||||||
(float) commit.fetchTotalRecordsWritten() / commit
|
(float) commit.fetchTotalRecordsWritten() / commit
|
||||||
.fetchTotalUpdateRecordsWritten());
|
.fetchTotalUpdateRecordsWritten());
|
||||||
}
|
}
|
||||||
rows[i++] = new String[] {commitTime,
|
rows[i++] = new String[] {commitTime.getTimestamp(),
|
||||||
String.valueOf(commit.fetchTotalUpdateRecordsWritten()),
|
String.valueOf(commit.fetchTotalUpdateRecordsWritten()),
|
||||||
String.valueOf(commit.fetchTotalRecordsWritten()), waf};
|
String.valueOf(commit.fetchTotalRecordsWritten()), waf};
|
||||||
totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
|
totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ package com.uber.hoodie.cli.utils;
|
|||||||
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@@ -27,10 +28,11 @@ public class CommitUtil {
|
|||||||
public static long countNewRecords(HoodieTableMetaClient target, List<String> commitsToCatchup)
|
public static long countNewRecords(HoodieTableMetaClient target, List<String> commitsToCatchup)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
long totalNew = 0;
|
long totalNew = 0;
|
||||||
HoodieTimeline timeline = target.getActiveCommitTimeline();
|
HoodieTimeline timeline = target.getActiveTimeline().reload().getCommitTimeline().filterCompletedInstants();
|
||||||
timeline = timeline.reload();
|
|
||||||
for(String commit:commitsToCatchup) {
|
for(String commit:commitsToCatchup) {
|
||||||
HoodieCommitMetadata c = HoodieCommitMetadata.fromBytes(timeline.readInstantDetails(commit).get());
|
HoodieCommitMetadata c = HoodieCommitMetadata.fromBytes(timeline
|
||||||
|
.getInstantDetails(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commit))
|
||||||
|
.get());
|
||||||
totalNew += c.fetchTotalRecordsWritten() - c.fetchTotalUpdateRecordsWritten();
|
totalNew += c.fetchTotalRecordsWritten() - c.fetchTotalUpdateRecordsWritten();
|
||||||
}
|
}
|
||||||
return totalNew;
|
return totalNew;
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ class DedupeSparkJob (basePath: String,
|
|||||||
val fsView = new ReadOptimizedTableView(fs, metadata)
|
val fsView = new ReadOptimizedTableView(fs, metadata)
|
||||||
|
|
||||||
val allFiles = fs.listStatus(new org.apache.hadoop.fs.Path(s"${basePath}/${duplicatedPartitionPath}"))
|
val allFiles = fs.listStatus(new org.apache.hadoop.fs.Path(s"${basePath}/${duplicatedPartitionPath}"))
|
||||||
val latestFiles:java.util.List[HoodieDataFile] = fsView.streamLatestVersions(allFiles).collect(Collectors.toList[HoodieDataFile]())
|
val latestFiles:java.util.List[HoodieDataFile] = fsView.getLatestVersions(allFiles).collect(Collectors.toList[HoodieDataFile]())
|
||||||
val filteredStatuses = latestFiles.map(f => f.getPath)
|
val filteredStatuses = latestFiles.map(f => f.getPath)
|
||||||
LOG.info(s" List of files under partition: ${} => ${filteredStatuses.mkString(" ")}")
|
LOG.info(s" List of files under partition: ${} => ${filteredStatuses.mkString(" ")}")
|
||||||
|
|
||||||
@@ -129,7 +129,7 @@ class DedupeSparkJob (basePath: String,
|
|||||||
val fsView = new ReadOptimizedTableView(fs, metadata)
|
val fsView = new ReadOptimizedTableView(fs, metadata)
|
||||||
|
|
||||||
val allFiles = fs.listStatus(new Path(s"${basePath}/${duplicatedPartitionPath}"))
|
val allFiles = fs.listStatus(new Path(s"${basePath}/${duplicatedPartitionPath}"))
|
||||||
val latestFiles:java.util.List[HoodieDataFile] = fsView.streamLatestVersions(allFiles).collect(Collectors.toList[HoodieDataFile]())
|
val latestFiles:java.util.List[HoodieDataFile] = fsView.getLatestVersions(allFiles).collect(Collectors.toList[HoodieDataFile]())
|
||||||
|
|
||||||
val fileNameToPathMap = latestFiles.map(f => (f.getFileId, new Path(f.getPath))).toMap
|
val fileNameToPathMap = latestFiles.map(f => (f.getFileId, new Path(f.getPath))).toMap
|
||||||
val dupeFixPlan = planDuplicateFix()
|
val dupeFixPlan = planDuplicateFix()
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ import com.uber.hoodie.common.model.HoodieRecord;
|
|||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
@@ -89,8 +90,10 @@ public class HoodieReadClient implements Serializable {
|
|||||||
this.jsc = jsc;
|
this.jsc = jsc;
|
||||||
this.fs = FSUtils.getFs();
|
this.fs = FSUtils.getFs();
|
||||||
this.metaClient = new HoodieTableMetaClient(fs, basePath, true);
|
this.metaClient = new HoodieTableMetaClient(fs, basePath, true);
|
||||||
this.commitTimeline = metaClient.getActiveCommitTimeline();
|
this.commitTimeline =
|
||||||
this.index = new HoodieBloomIndex(HoodieWriteConfig.newBuilder().withPath(basePath).build(), jsc);
|
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
|
this.index =
|
||||||
|
new HoodieBloomIndex(HoodieWriteConfig.newBuilder().withPath(basePath).build(), jsc);
|
||||||
this.sqlContextOpt = Optional.absent();
|
this.sqlContextOpt = Optional.absent();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -191,7 +194,7 @@ public class HoodieReadClient implements Serializable {
|
|||||||
+ metaClient.getBasePath());
|
+ metaClient.getBasePath());
|
||||||
}
|
}
|
||||||
|
|
||||||
List<HoodieDataFile> latestFiles = fileSystemView.streamLatestVersions(fs.globStatus(new Path(path))).collect(
|
List<HoodieDataFile> latestFiles = fileSystemView.getLatestVersions(fs.globStatus(new Path(path))).collect(
|
||||||
Collectors.toList());
|
Collectors.toList());
|
||||||
for (HoodieDataFile file : latestFiles) {
|
for (HoodieDataFile file : latestFiles) {
|
||||||
filteredPaths.add(file.getPath());
|
filteredPaths.add(file.getPath());
|
||||||
@@ -212,17 +215,17 @@ public class HoodieReadClient implements Serializable {
|
|||||||
*/
|
*/
|
||||||
public Dataset<Row> readSince(String lastCommitTimestamp) {
|
public Dataset<Row> readSince(String lastCommitTimestamp) {
|
||||||
|
|
||||||
List<String> commitsToReturn =
|
List<HoodieInstant> commitsToReturn =
|
||||||
commitTimeline.findInstantsAfter(lastCommitTimestamp, Integer.MAX_VALUE)
|
commitTimeline.findInstantsAfter(lastCommitTimestamp, Integer.MAX_VALUE)
|
||||||
.collect(Collectors.toList());
|
.getInstants().collect(Collectors.toList());
|
||||||
//TODO: we can potentially trim this down to only affected partitions, using CommitMetadata
|
//TODO: we can potentially trim this down to only affected partitions, using CommitMetadata
|
||||||
try {
|
try {
|
||||||
|
|
||||||
// Go over the commit metadata, and obtain the new files that need to be read.
|
// Go over the commit metadata, and obtain the new files that need to be read.
|
||||||
HashMap<String, String> fileIdToFullPath = new HashMap<>();
|
HashMap<String, String> fileIdToFullPath = new HashMap<>();
|
||||||
for (String commit: commitsToReturn) {
|
for (HoodieInstant commit: commitsToReturn) {
|
||||||
HoodieCommitMetadata metadata =
|
HoodieCommitMetadata metadata =
|
||||||
HoodieCommitMetadata.fromBytes(commitTimeline.readInstantDetails(commit).get());
|
HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(commit).get());
|
||||||
// get files from each commit, and replace any previous versions
|
// get files from each commit, and replace any previous versions
|
||||||
fileIdToFullPath.putAll(metadata.getFileIdAndFullPaths());
|
fileIdToFullPath.putAll(metadata.getFileIdAndFullPaths());
|
||||||
}
|
}
|
||||||
@@ -240,13 +243,15 @@ public class HoodieReadClient implements Serializable {
|
|||||||
*/
|
*/
|
||||||
public Dataset<Row> readCommit(String commitTime) {
|
public Dataset<Row> readCommit(String commitTime) {
|
||||||
assertSqlContext();
|
assertSqlContext();
|
||||||
if (!commitTimeline.containsInstant(commitTime)) {
|
HoodieInstant commitInstant =
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
||||||
|
if (!commitTimeline.containsInstant(commitInstant)) {
|
||||||
new HoodieException("No commit exists at " + commitTime);
|
new HoodieException("No commit exists at " + commitTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
HoodieCommitMetadata commitMetdata =
|
HoodieCommitMetadata commitMetdata =
|
||||||
HoodieCommitMetadata.fromBytes(commitTimeline.readInstantDetails(commitTime).get());
|
HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(commitInstant).get());
|
||||||
Collection<String> paths = commitMetdata.getFileIdAndFullPaths().values();
|
Collection<String> paths = commitMetdata.getFileIdAndFullPaths().values();
|
||||||
return sqlContextOpt.get().read()
|
return sqlContextOpt.get().read()
|
||||||
.parquet(paths.toArray(new String[paths.size()]))
|
.parquet(paths.toArray(new String[paths.size()]))
|
||||||
@@ -298,13 +303,14 @@ public class HoodieReadClient implements Serializable {
|
|||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public List<String> listCommitsSince(String commitTimestamp) {
|
public List<String> listCommitsSince(String commitTimestamp) {
|
||||||
return commitTimeline.findInstantsAfter(commitTimestamp, Integer.MAX_VALUE).collect(Collectors.toList());
|
return commitTimeline.findInstantsAfter(commitTimestamp, Integer.MAX_VALUE).getInstants()
|
||||||
|
.map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the last successful commit (a successful write operation) into a Hoodie table.
|
* Returns the last successful commit (a successful write operation) into a Hoodie table.
|
||||||
*/
|
*/
|
||||||
public String latestCommit() {
|
public String latestCommit() {
|
||||||
return commitTimeline.lastInstant().get();
|
return commitTimeline.lastInstant().get().getTimestamp();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ import com.uber.hoodie.common.model.HoodieRecordPayload;
|
|||||||
import com.uber.hoodie.common.model.HoodieWriteStat;
|
import com.uber.hoodie.common.model.HoodieWriteStat;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
import com.uber.hoodie.exception.HoodieCommitException;
|
import com.uber.hoodie.exception.HoodieCommitException;
|
||||||
@@ -302,7 +304,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
|||||||
logger.info("Comitting " + commitTime);
|
logger.info("Comitting " + commitTime);
|
||||||
HoodieTableMetaClient metaClient =
|
HoodieTableMetaClient metaClient =
|
||||||
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
|
||||||
|
|
||||||
List<Tuple2<String, HoodieWriteStat>> stats =
|
List<Tuple2<String, HoodieWriteStat>> stats =
|
||||||
writeStatuses.mapToPair(new PairFunction<WriteStatus, String, HoodieWriteStat>() {
|
writeStatuses.mapToPair(new PairFunction<WriteStatus, String, HoodieWriteStat>() {
|
||||||
@@ -319,7 +321,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
commitTimeline.saveInstantAsComplete(commitTime,
|
activeTimeline.saveAsComplete(
|
||||||
|
new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime),
|
||||||
Optional.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
Optional.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||||
// Save was a success
|
// Save was a success
|
||||||
// We cannot have unbounded commit files. Archive commits if we have to archive
|
// We cannot have unbounded commit files. Archive commits if we have to archive
|
||||||
@@ -356,17 +359,19 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
|||||||
final Timer.Context context = metrics.getRollbackCtx();
|
final Timer.Context context = metrics.getRollbackCtx();
|
||||||
HoodieTableMetaClient metaClient =
|
HoodieTableMetaClient metaClient =
|
||||||
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
|
||||||
|
HoodieTimeline inflightTimeline = activeTimeline.getCommitTimeline().filterInflights();
|
||||||
|
HoodieTimeline commitTimeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (commitTimeline.lastInstant().isPresent()
|
if (commitTimeline.lastInstant().isPresent()
|
||||||
&& commitTimeline.findInstantsAfter(commitTime, Integer.MAX_VALUE).count() > 0) {
|
&& !commitTimeline.findInstantsAfter(commitTime, Integer.MAX_VALUE).empty()) {
|
||||||
throw new HoodieRollbackException("Found commits after time :" + commitTime +
|
throw new HoodieRollbackException("Found commits after time :" + commitTime +
|
||||||
", please rollback greater commits first");
|
", please rollback greater commits first");
|
||||||
}
|
}
|
||||||
|
|
||||||
List<String> inflights =
|
List<String> inflights = inflightTimeline.getInstants().map(HoodieInstant::getTimestamp)
|
||||||
commitTimeline.getInflightInstants().collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (!inflights.isEmpty() && inflights.indexOf(commitTime) != inflights.size() - 1) {
|
if (!inflights.isEmpty() && inflights.indexOf(commitTime) != inflights.size() - 1) {
|
||||||
throw new HoodieRollbackException(
|
throw new HoodieRollbackException(
|
||||||
"Found in-flight commits after time :" + commitTime +
|
"Found in-flight commits after time :" + commitTime +
|
||||||
@@ -374,10 +379,12 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (inflights.contains(commitTime) || (commitTimeline.lastInstant().isPresent()
|
if (inflights.contains(commitTime) || (commitTimeline.lastInstant().isPresent()
|
||||||
&& commitTimeline.lastInstant().get().equals(commitTime))) {
|
&& commitTimeline.lastInstant().get().getTimestamp().equals(commitTime))) {
|
||||||
// 1. Atomically unpublish this commit
|
// 1. Atomically unpublish this commit
|
||||||
if(commitTimeline.containsInstant(commitTime)) {
|
if(!inflights.contains(commitTime)) {
|
||||||
commitTimeline.revertInstantToInflight(commitTime);
|
// This is completed commit, first revert it to inflight to unpublish data
|
||||||
|
activeTimeline.revertToInflight(
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime));
|
||||||
}
|
}
|
||||||
// 2. Revert the index changes
|
// 2. Revert the index changes
|
||||||
logger.info("Clean out index changes at time: " + commitTime);
|
logger.info("Clean out index changes at time: " + commitTime);
|
||||||
@@ -415,7 +422,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
|||||||
});
|
});
|
||||||
// 4. Remove commit
|
// 4. Remove commit
|
||||||
logger.info("Clean out metadata files at time: " + commitTime);
|
logger.info("Clean out metadata files at time: " + commitTime);
|
||||||
commitTimeline.removeInflightFromTimeline(commitTime);
|
activeTimeline.deleteInflight(
|
||||||
|
new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime));
|
||||||
|
|
||||||
if (context != null) {
|
if (context != null) {
|
||||||
long durationInMs = metrics.getDurationInMs(context.stop());
|
long durationInMs = metrics.getDurationInMs(context.stop());
|
||||||
@@ -446,7 +454,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
|||||||
final Timer.Context context = metrics.getCleanCtx();
|
final Timer.Context context = metrics.getCleanCtx();
|
||||||
HoodieTableMetaClient metaClient =
|
HoodieTableMetaClient metaClient =
|
||||||
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
|
||||||
|
|
||||||
List<String> partitionsToClean = FSUtils.getAllPartitionPaths(fs, metaClient.getBasePath());
|
List<String> partitionsToClean = FSUtils.getAllPartitionPaths(fs, metaClient.getBasePath());
|
||||||
// shuffle to distribute cleaning work across partitions evenly
|
// shuffle to distribute cleaning work across partitions evenly
|
||||||
@@ -497,8 +504,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
|||||||
logger.info("Generate a new commit time " + commitTime);
|
logger.info("Generate a new commit time " + commitTime);
|
||||||
HoodieTableMetaClient metaClient =
|
HoodieTableMetaClient metaClient =
|
||||||
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
|
||||||
commitTimeline.saveInstantAsInflight(commitTime);
|
activeTimeline.createInflight(
|
||||||
|
new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static SparkConf registerClasses(SparkConf conf) {
|
public static SparkConf registerClasses(SparkConf conf) {
|
||||||
@@ -534,14 +542,17 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Cleanup all inflight commits
|
* Cleanup all inflight commits
|
||||||
|
*
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
private void rollbackInflightCommits() {
|
private void rollbackInflightCommits() {
|
||||||
HoodieTableMetaClient metaClient =
|
HoodieTableMetaClient metaClient =
|
||||||
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
HoodieTimeline inflightTimeline =
|
||||||
|
metaClient.getActiveTimeline().getCommitTimeline().filterInflights();
|
||||||
|
|
||||||
List<String> commits = commitTimeline.getInflightInstants().collect(Collectors.toList());
|
List<String> commits = inflightTimeline.getInstants().map(HoodieInstant::getTimestamp)
|
||||||
|
.collect(Collectors.toList());
|
||||||
Collections.reverse(commits);
|
Collections.reverse(commits);
|
||||||
for (String commit : commits) {
|
for (String commit : commits) {
|
||||||
rollback(commit);
|
rollback(commit);
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ package com.uber.hoodie.index;
|
|||||||
import com.google.common.base.Optional;
|
import com.google.common.base.Optional;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
import com.uber.hoodie.WriteStatus;
|
import com.uber.hoodie.WriteStatus;
|
||||||
import com.uber.hoodie.common.model.HoodieKey;
|
import com.uber.hoodie.common.model.HoodieKey;
|
||||||
@@ -128,9 +129,12 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
|||||||
String fileId =
|
String fileId =
|
||||||
Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));
|
Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));
|
||||||
|
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
HoodieTimeline commitTimeline =
|
||||||
|
metaClient.getActiveTimeline().getCommitTimeline()
|
||||||
|
.filterCompletedInstants();
|
||||||
// if the last commit ts for this row is less than the system commit ts
|
// if the last commit ts for this row is less than the system commit ts
|
||||||
if (commitTimeline.hasInstants() && commitTimeline.containsInstant(commitTs)) {
|
if (!commitTimeline.empty() && commitTimeline.containsInstant(
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTs))) {
|
||||||
rec.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId));
|
rec.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ import com.google.common.base.Optional;
|
|||||||
import com.uber.hoodie.common.model.HoodieDataFile;
|
import com.uber.hoodie.common.model.HoodieDataFile;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
import com.uber.hoodie.WriteStatus;
|
import com.uber.hoodie.WriteStatus;
|
||||||
@@ -218,13 +219,13 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
|
|||||||
public Iterable<Tuple2<String, String>> call(String partitionPath) {
|
public Iterable<Tuple2<String, String>> call(String partitionPath) {
|
||||||
FileSystem fs = FSUtils.getFs();
|
FileSystem fs = FSUtils.getFs();
|
||||||
TableFileSystemView view = new ReadOptimizedTableView(fs, metaClient);
|
TableFileSystemView view = new ReadOptimizedTableView(fs, metaClient);
|
||||||
java.util.Optional<String> latestCommitTime =
|
java.util.Optional<HoodieInstant> latestCommitTime =
|
||||||
metaClient.getActiveCommitTimeline().lastInstant();
|
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants().lastInstant();
|
||||||
List<Tuple2<String, String>> list = new ArrayList<>();
|
List<Tuple2<String, String>> list = new ArrayList<>();
|
||||||
if (latestCommitTime.isPresent()) {
|
if (latestCommitTime.isPresent()) {
|
||||||
List<HoodieDataFile> filteredFiles =
|
List<HoodieDataFile> filteredFiles =
|
||||||
view.streamLatestVersionInPartition(partitionPath,
|
view.getLatestVersionInPartition(partitionPath,
|
||||||
latestCommitTime.get()).collect(Collectors.toList());
|
latestCommitTime.get().getTimestamp()).collect(Collectors.toList());
|
||||||
for (HoodieDataFile file : filteredFiles) {
|
for (HoodieDataFile file : filteredFiles) {
|
||||||
list.add(new Tuple2<>(partitionPath, file.getFileName()));
|
list.add(new Tuple2<>(partitionPath, file.getFileName()));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import com.uber.hoodie.common.model.HoodieDataFile;
|
|||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
@@ -38,55 +39,56 @@ import java.util.stream.Collectors;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Cleaner is responsible for garbage collecting older files in a given partition path, such that
|
* Cleaner is responsible for garbage collecting older files in a given partition path, such that
|
||||||
*
|
* <p>
|
||||||
* 1) It provides sufficient time for existing queries running on older versions, to finish
|
* 1) It provides sufficient time for existing queries running on older versions, to finish
|
||||||
*
|
* <p>
|
||||||
* 2) It bounds the growth of the files in the file system
|
* 2) It bounds the growth of the files in the file system
|
||||||
*
|
* <p>
|
||||||
* TODO: Should all cleaning be done based on {@link com.uber.hoodie.common.model.HoodieCommitMetadata}
|
* TODO: Should all cleaning be done based on {@link com.uber.hoodie.common.model.HoodieCommitMetadata}
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class HoodieCleaner {
|
public class HoodieCleaner {
|
||||||
private static Logger logger = LogManager.getLogger(HoodieCleaner.class);
|
private static Logger logger = LogManager.getLogger(HoodieCleaner.class);
|
||||||
|
|
||||||
|
|
||||||
public enum CleaningPolicy {
|
public enum CleaningPolicy {
|
||||||
KEEP_LATEST_FILE_VERSIONS,
|
KEEP_LATEST_FILE_VERSIONS,
|
||||||
KEEP_LATEST_COMMITS
|
KEEP_LATEST_COMMITS
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private final TableFileSystemView fileSystemView;
|
private final TableFileSystemView fileSystemView;
|
||||||
private final HoodieTimeline commitTimeline;
|
private final HoodieTimeline commitTimeline;
|
||||||
private HoodieTableMetaClient metaClient;
|
private HoodieTableMetaClient metaClient;
|
||||||
private HoodieWriteConfig config;
|
private HoodieWriteConfig config;
|
||||||
private FileSystem fs;
|
private FileSystem fs;
|
||||||
|
|
||||||
public HoodieCleaner(HoodieTableMetaClient metaClient,
|
public HoodieCleaner(HoodieTableMetaClient metaClient, HoodieWriteConfig config,
|
||||||
HoodieWriteConfig config,
|
|
||||||
FileSystem fs) {
|
FileSystem fs) {
|
||||||
this.metaClient = metaClient;
|
this.metaClient = metaClient;
|
||||||
this.fileSystemView = new ReadOptimizedTableView(fs, metaClient);
|
this.fileSystemView = new ReadOptimizedTableView(fs, metaClient);
|
||||||
this.commitTimeline = metaClient.getActiveCommitTimeline();
|
this.commitTimeline =
|
||||||
|
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
this.config = config;
|
this.config = config;
|
||||||
this.fs = fs;
|
this.fs = fs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* Selects the older versions of files for cleaning, such that it bounds the number of versions of each file.
|
* Selects the older versions of files for cleaning, such that it bounds the number of versions of each file.
|
||||||
* This policy is useful, if you are simply interested in querying the table, and you don't want too many
|
* This policy is useful, if you are simply interested in querying the table, and you don't want too many
|
||||||
* versions for a single file (i.e run it with versionsRetained = 1)
|
* versions for a single file (i.e run it with versionsRetained = 1)
|
||||||
*
|
*
|
||||||
*
|
|
||||||
* @param partitionPath
|
* @param partitionPath
|
||||||
* @return
|
* @return
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
private List<String> getFilesToCleanKeepingLatestVersions(String partitionPath) throws IOException {
|
private List<String> getFilesToCleanKeepingLatestVersions(String partitionPath)
|
||||||
logger.info("Cleaning "+ partitionPath+", retaining latest "+ config.getCleanerFileVersionsRetained()+" file versions. ");
|
throws IOException {
|
||||||
List<List<HoodieDataFile>> fileVersions = fileSystemView.streamEveryVersionInPartition(partitionPath).collect(
|
logger.info("Cleaning " + partitionPath + ", retaining latest " + config
|
||||||
Collectors.toList());
|
.getCleanerFileVersionsRetained() + " file versions. ");
|
||||||
|
List<List<HoodieDataFile>> fileVersions =
|
||||||
|
fileSystemView.getEveryVersionInPartition(partitionPath)
|
||||||
|
.collect(Collectors.toList());
|
||||||
List<String> deletePaths = new ArrayList<>();
|
List<String> deletePaths = new ArrayList<>();
|
||||||
|
|
||||||
for (List<HoodieDataFile> versionsForFileId : fileVersions) {
|
for (List<HoodieDataFile> versionsForFileId : fileVersions) {
|
||||||
@@ -99,9 +101,7 @@ public class HoodieCleaner {
|
|||||||
}
|
}
|
||||||
// Delete the remaining files
|
// Delete the remaining files
|
||||||
while (commitItr.hasNext()) {
|
while (commitItr.hasNext()) {
|
||||||
deletePaths.add(String.format("%s/%s/%s",
|
deletePaths.add(String.format("%s/%s/%s", config.getBasePath(), partitionPath,
|
||||||
config.getBasePath(),
|
|
||||||
partitionPath,
|
|
||||||
commitItr.next().getFileName()));
|
commitItr.next().getFileName()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -111,16 +111,16 @@ public class HoodieCleaner {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Selects the versions for file for cleaning, such that it
|
* Selects the versions for file for cleaning, such that it
|
||||||
*
|
* <p>
|
||||||
* - Leaves the latest version of the file untouched
|
* - Leaves the latest version of the file untouched
|
||||||
* - For older versions,
|
* - For older versions,
|
||||||
* - It leaves all the commits untouched which has occured in last <code>config.getCleanerCommitsRetained()</code> commits
|
* - It leaves all the commits untouched which has occured in last <code>config.getCleanerCommitsRetained()</code> commits
|
||||||
* - It leaves ONE commit before this window. We assume that the max(query execution time) == commit_batch_time * config.getCleanerCommitsRetained(). This is 12 hours by default.
|
* - It leaves ONE commit before this window. We assume that the max(query execution time) == commit_batch_time * config.getCleanerCommitsRetained(). This is 12 hours by default.
|
||||||
* This is essential to leave the file used by the query thats running for the max time.
|
* This is essential to leave the file used by the query thats running for the max time.
|
||||||
*
|
* <p>
|
||||||
* This provides the effect of having lookback into all changes that happened in the last X
|
* This provides the effect of having lookback into all changes that happened in the last X
|
||||||
* commits. (eg: if you retain 24 commits, and commit batch time is 30 mins, then you have 12 hrs of lookback)
|
* commits. (eg: if you retain 24 commits, and commit batch time is 30 mins, then you have 12 hrs of lookback)
|
||||||
*
|
* <p>
|
||||||
* This policy is the default.
|
* This policy is the default.
|
||||||
*
|
*
|
||||||
* @param partitionPath
|
* @param partitionPath
|
||||||
@@ -135,11 +135,12 @@ public class HoodieCleaner {
|
|||||||
List<String> deletePaths = new ArrayList<>();
|
List<String> deletePaths = new ArrayList<>();
|
||||||
|
|
||||||
// determine if we have enough commits, to start cleaning.
|
// determine if we have enough commits, to start cleaning.
|
||||||
if (commitTimeline.getTotalInstants() > commitsRetained) {
|
if (commitTimeline.countInstants() > commitsRetained) {
|
||||||
String earliestCommitToRetain =
|
HoodieInstant earliestCommitToRetain =
|
||||||
commitTimeline.nthInstant(commitTimeline.getTotalInstants() - commitsRetained).get();
|
commitTimeline.nthInstant(commitTimeline.countInstants() - commitsRetained).get();
|
||||||
List<List<HoodieDataFile>> fileVersions =
|
List<List<HoodieDataFile>> fileVersions =
|
||||||
fileSystemView.streamEveryVersionInPartition(partitionPath).collect(Collectors.toList());
|
fileSystemView.getEveryVersionInPartition(partitionPath)
|
||||||
|
.collect(Collectors.toList());
|
||||||
for (List<HoodieDataFile> fileList : fileVersions) {
|
for (List<HoodieDataFile> fileList : fileVersions) {
|
||||||
String lastVersion = FSUtils.getCommitTime(fileList.get(0).getFileName());
|
String lastVersion = FSUtils.getCommitTime(fileList.get(0).getFileName());
|
||||||
String lastVersionBeforeEarliestCommitToRetain =
|
String lastVersionBeforeEarliestCommitToRetain =
|
||||||
@@ -160,7 +161,8 @@ public class HoodieCleaner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Always keep the last commit
|
// Always keep the last commit
|
||||||
if (commitTimeline.compareInstants(earliestCommitToRetain, fileCommitTime,
|
if (commitTimeline
|
||||||
|
.compareTimestamps(earliestCommitToRetain.getTimestamp(), fileCommitTime,
|
||||||
HoodieTimeline.GREATER)) {
|
HoodieTimeline.GREATER)) {
|
||||||
// this is a commit, that should be cleaned.
|
// this is a commit, that should be cleaned.
|
||||||
deletePaths.add(String
|
deletePaths.add(String
|
||||||
@@ -177,10 +179,12 @@ public class HoodieCleaner {
|
|||||||
/**
|
/**
|
||||||
* Gets the latest version < commitTime. This version file could still be used by queries.
|
* Gets the latest version < commitTime. This version file could still be used by queries.
|
||||||
*/
|
*/
|
||||||
private String getLatestVersionBeforeCommit(List<HoodieDataFile> fileList, String commitTime) {
|
private String getLatestVersionBeforeCommit(List<HoodieDataFile> fileList,
|
||||||
|
HoodieInstant commitTime) {
|
||||||
for (HoodieDataFile file : fileList) {
|
for (HoodieDataFile file : fileList) {
|
||||||
String fileCommitTime = FSUtils.getCommitTime(file.getFileName());
|
String fileCommitTime = FSUtils.getCommitTime(file.getFileName());
|
||||||
if (commitTimeline.compareInstants(commitTime, fileCommitTime, HoodieTimeline.GREATER)) {
|
if (commitTimeline.compareTimestamps(commitTime.getTimestamp(), fileCommitTime,
|
||||||
|
HoodieTimeline.GREATER)) {
|
||||||
// fileList is sorted on the reverse, so the first commit we find <= commitTime is the one we want
|
// fileList is sorted on the reverse, so the first commit we find <= commitTime is the one we want
|
||||||
return fileCommitTime;
|
return fileCommitTime;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ package com.uber.hoodie.io;
|
|||||||
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieActiveCommitTimeline;
|
import com.uber.hoodie.common.table.timeline.HoodieArchivedTimeline;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieArchivedCommitTimeline;
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
import com.uber.hoodie.common.file.HoodieAppendLog;
|
import com.uber.hoodie.common.file.HoodieAppendLog;
|
||||||
import com.uber.hoodie.exception.HoodieCommitException;
|
import com.uber.hoodie.exception.HoodieCommitException;
|
||||||
@@ -34,6 +34,7 @@ import org.apache.log4j.Logger;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
@@ -47,11 +48,10 @@ public class HoodieCommitArchiveLog {
|
|||||||
private final FileSystem fs;
|
private final FileSystem fs;
|
||||||
private final HoodieWriteConfig config;
|
private final HoodieWriteConfig config;
|
||||||
|
|
||||||
public HoodieCommitArchiveLog(HoodieWriteConfig config,
|
public HoodieCommitArchiveLog(HoodieWriteConfig config, FileSystem fs) {
|
||||||
FileSystem fs) {
|
|
||||||
this.fs = fs;
|
this.fs = fs;
|
||||||
this.config = config;
|
this.config = config;
|
||||||
this.archiveFilePath = HoodieArchivedCommitTimeline
|
this.archiveFilePath = HoodieArchivedTimeline
|
||||||
.getArchiveLogPath(config.getBasePath() + "/" + HoodieTableMetaClient.METAFOLDER_NAME);
|
.getArchiveLogPath(config.getBasePath() + "/" + HoodieTableMetaClient.METAFOLDER_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -59,7 +59,7 @@ public class HoodieCommitArchiveLog {
|
|||||||
* Check if commits need to be archived. If yes, archive commits.
|
* Check if commits need to be archived. If yes, archive commits.
|
||||||
*/
|
*/
|
||||||
public boolean archiveIfRequired() {
|
public boolean archiveIfRequired() {
|
||||||
List<String> commitsToArchive = getCommitsToArchive().collect(Collectors.toList());
|
List<HoodieInstant> commitsToArchive = getCommitsToArchive().collect(Collectors.toList());
|
||||||
if (commitsToArchive.iterator().hasNext()) {
|
if (commitsToArchive.iterator().hasNext()) {
|
||||||
log.info("Archiving commits " + commitsToArchive);
|
log.info("Archiving commits " + commitsToArchive);
|
||||||
archive(commitsToArchive);
|
archive(commitsToArchive);
|
||||||
@@ -70,41 +70,42 @@ public class HoodieCommitArchiveLog {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Stream<String> getCommitsToArchive() {
|
private Stream<HoodieInstant> getCommitsToArchive() {
|
||||||
int maxCommitsToKeep = config.getMaxCommitsToKeep();
|
int maxCommitsToKeep = config.getMaxCommitsToKeep();
|
||||||
int minCommitsToKeep = config.getMinCommitsToKeep();
|
int minCommitsToKeep = config.getMinCommitsToKeep();
|
||||||
|
|
||||||
HoodieTableMetaClient metaClient =
|
HoodieTableMetaClient metaClient =
|
||||||
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
HoodieTimeline commitTimeline =
|
||||||
|
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
|
|
||||||
if (commitTimeline.hasInstants() && commitTimeline.getTotalInstants() > maxCommitsToKeep) {
|
if (!commitTimeline.empty() && commitTimeline.countInstants() > maxCommitsToKeep) {
|
||||||
// Actually do the commits
|
// Actually do the commits
|
||||||
return commitTimeline.getInstants()
|
return commitTimeline.getInstants()
|
||||||
.limit(commitTimeline.getTotalInstants() - minCommitsToKeep);
|
.limit(commitTimeline.countInstants() - minCommitsToKeep);
|
||||||
}
|
}
|
||||||
return Stream.empty();
|
return Stream.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean deleteCommits(List<String> commitsToArchive) {
|
private boolean deleteCommits(List<HoodieInstant> commitsToArchive) {
|
||||||
log.info("Deleting commits " + commitsToArchive);
|
log.info("Deleting commits " + commitsToArchive);
|
||||||
HoodieTableMetaClient metaClient =
|
HoodieTableMetaClient metaClient =
|
||||||
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
HoodieTimeline commitTimeline =
|
||||||
|
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
|
|
||||||
boolean success = true;
|
boolean success = true;
|
||||||
for(String commitToArchive:commitsToArchive) {
|
for (HoodieInstant commitToArchive : commitsToArchive) {
|
||||||
Path commitFile = new Path(metaClient.getMetaPath(),
|
Path commitFile =
|
||||||
((HoodieActiveCommitTimeline) commitTimeline)
|
new Path(metaClient.getMetaPath(), commitToArchive.getFileName());
|
||||||
.getCompletedFileName(commitToArchive));
|
|
||||||
try {
|
try {
|
||||||
if (fs.exists(commitFile)) {
|
if (fs.exists(commitFile)) {
|
||||||
success &= fs.delete(commitFile, false);
|
success &= fs.delete(commitFile, false);
|
||||||
log.info("Archived and deleted commit file " + commitFile);
|
log.info("Archived and deleted commit file " + commitFile);
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new HoodieIOException(
|
throw new HoodieIOException("Failed to delete archived commit " + commitToArchive,
|
||||||
"Failed to delete archived commit " + commitToArchive, e);
|
e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return success;
|
return success;
|
||||||
@@ -120,18 +121,19 @@ public class HoodieCommitArchiveLog {
|
|||||||
.compression(HoodieAppendLog.CompressionType.RECORD, new BZip2Codec()));
|
.compression(HoodieAppendLog.CompressionType.RECORD, new BZip2Codec()));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void archive(List<String> commits) throws HoodieCommitException {
|
private void archive(List<HoodieInstant> commits) throws HoodieCommitException {
|
||||||
HoodieTableMetaClient metaClient =
|
HoodieTableMetaClient metaClient =
|
||||||
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
new HoodieTableMetaClient(fs, config.getBasePath(), true);
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
HoodieTimeline commitTimeline =
|
||||||
|
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
|
|
||||||
HoodieAppendLog.Writer writer = null;
|
HoodieAppendLog.Writer writer = null;
|
||||||
try {
|
try {
|
||||||
writer = openWriter();
|
writer = openWriter();
|
||||||
for (String commitTime : commits) {
|
for (HoodieInstant commitTime : commits) {
|
||||||
Text k = new Text(commitTime);
|
Text k = new Text(commitTime.getTimestamp());
|
||||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
|
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
|
||||||
.fromBytes(commitTimeline.readInstantDetails(commitTime).get());
|
.fromBytes(commitTimeline.getInstantDetails(commitTime).get());
|
||||||
Text v = new Text(commitMetadata.toJsonString());
|
Text v = new Text(commitMetadata.toJsonString());
|
||||||
writer.append(k, v);
|
writer.append(k, v);
|
||||||
log.info("Wrote " + k);
|
log.info("Wrote " + k);
|
||||||
|
|||||||
@@ -50,7 +50,8 @@ public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
|
|||||||
this.config = config;
|
this.config = config;
|
||||||
this.fs = FSUtils.getFs();
|
this.fs = FSUtils.getFs();
|
||||||
this.metaClient = metaClient;
|
this.metaClient = metaClient;
|
||||||
this.hoodieTimeline = metaClient.getActiveCommitTimeline();
|
this.hoodieTimeline =
|
||||||
|
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
this.fileSystemView = new ReadOptimizedTableView(fs, metaClient);
|
this.fileSystemView = new ReadOptimizedTableView(fs, metaClient);
|
||||||
this.schema =
|
this.schema =
|
||||||
HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
|
HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import com.uber.hoodie.common.model.HoodieDataFile;
|
|||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
import com.uber.hoodie.WriteStatus;
|
import com.uber.hoodie.WriteStatus;
|
||||||
@@ -291,13 +292,15 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
FileSystem fs = FSUtils.getFs();
|
FileSystem fs = FSUtils.getFs();
|
||||||
List<SmallFile> smallFileLocations = new ArrayList<>();
|
List<SmallFile> smallFileLocations = new ArrayList<>();
|
||||||
|
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
HoodieTimeline commitTimeline =
|
||||||
|
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
TableFileSystemView fileSystemView = new ReadOptimizedTableView(fs, metaClient);
|
TableFileSystemView fileSystemView = new ReadOptimizedTableView(fs, metaClient);
|
||||||
|
|
||||||
if (commitTimeline.hasInstants()) { // if we have some commits
|
if (!commitTimeline.empty()) { // if we have some commits
|
||||||
String latestCommitTime = commitTimeline.lastInstant().get();
|
HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
|
||||||
List<HoodieDataFile> allFiles = fileSystemView.streamLatestVersionInPartition(partitionPath, latestCommitTime).collect(
|
List<HoodieDataFile> allFiles = fileSystemView
|
||||||
Collectors.toList());
|
.getLatestVersionInPartition(partitionPath, latestCommitTime.getTimestamp())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
for (HoodieDataFile file : allFiles) {
|
for (HoodieDataFile file : allFiles) {
|
||||||
if (file.getFileSize() < config.getParquetSmallFileLimit()) {
|
if (file.getFileSize() < config.getParquetSmallFileLimit()) {
|
||||||
@@ -322,12 +325,13 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
*/
|
*/
|
||||||
private long averageBytesPerRecord() {
|
private long averageBytesPerRecord() {
|
||||||
long avgSize = 0L;
|
long avgSize = 0L;
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
HoodieTimeline commitTimeline =
|
||||||
|
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
try {
|
try {
|
||||||
if (commitTimeline.hasInstants()) {
|
if (!commitTimeline.empty()) {
|
||||||
String latestCommitTime = commitTimeline.lastInstant().get();
|
HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
|
||||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
|
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
|
||||||
.fromBytes(commitTimeline.readInstantDetails(latestCommitTime).get());
|
.fromBytes(commitTimeline.getInstantDetails(latestCommitTime).get());
|
||||||
avgSize = (long) Math.ceil(
|
avgSize = (long) Math.ceil(
|
||||||
(1.0 * commitMetadata.fetchTotalBytesWritten()) / commitMetadata
|
(1.0 * commitMetadata.fetchTotalBytesWritten()) / commitMetadata
|
||||||
.fetchTotalRecordsWritten());
|
.fetchTotalRecordsWritten());
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ import com.uber.hoodie.common.model.HoodieWriteStat;
|
|||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import com.uber.hoodie.common.util.ParquetUtils;
|
import com.uber.hoodie.common.util.ParquetUtils;
|
||||||
@@ -41,7 +42,6 @@ import com.uber.hoodie.index.HoodieIndex;
|
|||||||
import com.uber.hoodie.io.HoodieCleaner;
|
import com.uber.hoodie.io.HoodieCleaner;
|
||||||
|
|
||||||
import org.apache.avro.generic.GenericRecord;
|
import org.apache.avro.generic.GenericRecord;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
@@ -61,10 +61,8 @@ import java.util.HashMap;
|
|||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.SortedMap;
|
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@@ -293,15 +291,16 @@ public class TestHoodieClient implements Serializable {
|
|||||||
assertNoWriteErrors(statuses);
|
assertNoWriteErrors(statuses);
|
||||||
|
|
||||||
HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
|
HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
|
||||||
HoodieTimeline timeline = metadata.getActiveCommitTimeline();
|
HoodieTimeline timeline =
|
||||||
|
metadata.getActiveTimeline().getCommitTimeline();
|
||||||
|
|
||||||
TableFileSystemView fsView = new ReadOptimizedTableView(fs, metadata);
|
TableFileSystemView fsView = new ReadOptimizedTableView(fs, metadata);
|
||||||
// Need to ensure the following
|
// Need to ensure the following
|
||||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||||
// compute all the versions of all files, from time 0
|
// compute all the versions of all files, from time 0
|
||||||
HashMap<String, TreeSet<String>> fileIdToVersions = new HashMap<>();
|
HashMap<String, TreeSet<String>> fileIdToVersions = new HashMap<>();
|
||||||
for (String entry : timeline.getInstants().collect(Collectors.toList())) {
|
for (HoodieInstant entry : timeline.getInstants().collect(Collectors.toList())) {
|
||||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(timeline.readInstantDetails(entry).get());
|
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(entry).get());
|
||||||
|
|
||||||
for (HoodieWriteStat wstat : commitMetadata.getWriteStats(partitionPath)) {
|
for (HoodieWriteStat wstat : commitMetadata.getWriteStats(partitionPath)) {
|
||||||
if (!fileIdToVersions.containsKey(wstat.getFileId())) {
|
if (!fileIdToVersions.containsKey(wstat.getFileId())) {
|
||||||
@@ -312,7 +311,7 @@ public class TestHoodieClient implements Serializable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
List<List<HoodieDataFile>> fileVersions = fsView.streamEveryVersionInPartition(partitionPath).collect(Collectors.toList());
|
List<List<HoodieDataFile>> fileVersions = fsView.getEveryVersionInPartition(partitionPath).collect(Collectors.toList());
|
||||||
for (List<HoodieDataFile> entry : fileVersions) {
|
for (List<HoodieDataFile> entry : fileVersions) {
|
||||||
// No file has no more than max versions
|
// No file has no more than max versions
|
||||||
String fileId = entry.iterator().next().getFileId();
|
String fileId = entry.iterator().next().getFileId();
|
||||||
@@ -373,13 +372,14 @@ public class TestHoodieClient implements Serializable {
|
|||||||
assertNoWriteErrors(statuses);
|
assertNoWriteErrors(statuses);
|
||||||
|
|
||||||
HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
|
HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
|
||||||
HoodieTimeline activeTimeline = metadata.getActiveCommitTimeline();
|
HoodieTimeline activeTimeline = metadata.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
Optional<String> earliestRetainedCommit = activeTimeline.nthFromLastInstant(maxCommits - 1);
|
Optional<HoodieInstant>
|
||||||
Set<String> acceptableCommits =
|
earliestRetainedCommit = activeTimeline.nthFromLastInstant(maxCommits - 1);
|
||||||
|
Set<HoodieInstant> acceptableCommits =
|
||||||
activeTimeline.getInstants().collect(Collectors.toSet());
|
activeTimeline.getInstants().collect(Collectors.toSet());
|
||||||
if (earliestRetainedCommit.isPresent()) {
|
if (earliestRetainedCommit.isPresent()) {
|
||||||
acceptableCommits.removeAll(
|
acceptableCommits.removeAll(
|
||||||
activeTimeline.findInstantsInRange("000", earliestRetainedCommit.get())
|
activeTimeline.findInstantsInRange("000", earliestRetainedCommit.get().getTimestamp()).getInstants()
|
||||||
.collect(Collectors.toSet()));
|
.collect(Collectors.toSet()));
|
||||||
acceptableCommits.add(earliestRetainedCommit.get());
|
acceptableCommits.add(earliestRetainedCommit.get());
|
||||||
}
|
}
|
||||||
@@ -387,16 +387,16 @@ public class TestHoodieClient implements Serializable {
|
|||||||
TableFileSystemView fsView = new ReadOptimizedTableView(fs, metadata);
|
TableFileSystemView fsView = new ReadOptimizedTableView(fs, metadata);
|
||||||
// Need to ensure the following
|
// Need to ensure the following
|
||||||
for (String partitionPath : dataGen.getPartitionPaths()) {
|
for (String partitionPath : dataGen.getPartitionPaths()) {
|
||||||
List<List<HoodieDataFile>> fileVersions = fsView.streamEveryVersionInPartition(partitionPath).collect(Collectors.toList());
|
List<List<HoodieDataFile>> fileVersions = fsView.getEveryVersionInPartition(partitionPath).collect(Collectors.toList());
|
||||||
for (List<HoodieDataFile> entry : fileVersions) {
|
for (List<HoodieDataFile> entry : fileVersions) {
|
||||||
Set<String> commitTimes = new HashSet<>();
|
Set<String> commitTimes = new HashSet<>();
|
||||||
for(HoodieDataFile value:entry) {
|
for(HoodieDataFile value:entry) {
|
||||||
System.out.println("Data File - " + value);
|
System.out.println("Data File - " + value);
|
||||||
commitTimes.add(value.getCommitTime());
|
commitTimes.add(value.getCommitTime());
|
||||||
}
|
}
|
||||||
System.out.println("Existing commits " + activeTimeline.getInstants().collect(Collectors.toList()));
|
|
||||||
assertEquals("Only contain acceptable versions of file should be present",
|
assertEquals("Only contain acceptable versions of file should be present",
|
||||||
acceptableCommits, commitTimes);
|
acceptableCommits.stream().map(HoodieInstant::getTimestamp)
|
||||||
|
.collect(Collectors.toSet()), commitTimes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -637,9 +637,8 @@ public class TestHoodieClient implements Serializable {
|
|||||||
|
|
||||||
assertEquals("2 files needs to be committed.", 2, statuses.size());
|
assertEquals("2 files needs to be committed.", 2, statuses.size());
|
||||||
HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
|
HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
|
||||||
HoodieTimeline activeTimeline = metadata.getActiveCommitTimeline();
|
|
||||||
TableFileSystemView fileSystemView = new ReadOptimizedTableView(fs, metadata);
|
TableFileSystemView fileSystemView = new ReadOptimizedTableView(fs, metadata);
|
||||||
List<HoodieDataFile> files = fileSystemView.streamLatestVersionInPartition(TEST_PARTITION_PATH, commitTime3).collect(
|
List<HoodieDataFile> files = fileSystemView.getLatestVersionInPartition(TEST_PARTITION_PATH, commitTime3).collect(
|
||||||
Collectors.toList());
|
Collectors.toList());
|
||||||
int numTotalInsertsInCommit3 = 0;
|
int numTotalInsertsInCommit3 = 0;
|
||||||
for (HoodieDataFile file: files) {
|
for (HoodieDataFile file: files) {
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ package com.uber.hoodie.common;
|
|||||||
import com.uber.hoodie.WriteStatus;
|
import com.uber.hoodie.WriteStatus;
|
||||||
import com.uber.hoodie.common.model.HoodieRecord;
|
import com.uber.hoodie.common.model.HoodieRecord;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
@@ -60,11 +61,11 @@ public class HoodieClientTestUtils {
|
|||||||
|
|
||||||
|
|
||||||
public static void fakeCommitFile(String basePath, String commitTime) throws IOException {
|
public static void fakeCommitFile(String basePath, String commitTime) throws IOException {
|
||||||
fakeMetaFile(basePath, commitTime, HoodieTableMetaClient.COMMIT_EXTENSION);
|
fakeMetaFile(basePath, commitTime, HoodieTimeline.COMMIT_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void fakeInFlightFile(String basePath, String commitTime) throws IOException {
|
public static void fakeInFlightFile(String basePath, String commitTime) throws IOException {
|
||||||
fakeMetaFile(basePath, commitTime, HoodieTableMetaClient.INFLIGHT_FILE_SUFFIX);
|
fakeMetaFile(basePath, commitTime, HoodieTimeline.INFLIGHT_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void fakeDataFile(String basePath, String partitionPath, String commitTime, String fileId) throws Exception {
|
public static void fakeDataFile(String basePath, String partitionPath, String commitTime, String fileId) throws Exception {
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
|||||||
import com.uber.hoodie.common.model.HoodieKey;
|
import com.uber.hoodie.common.model.HoodieKey;
|
||||||
import com.uber.hoodie.common.model.HoodieRecord;
|
import com.uber.hoodie.common.model.HoodieRecord;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
||||||
|
|
||||||
@@ -142,7 +143,7 @@ public class HoodieTestDataGenerator {
|
|||||||
|
|
||||||
public static void createCommitFile(String basePath, String commitTime) throws IOException {
|
public static void createCommitFile(String basePath, String commitTime) throws IOException {
|
||||||
Path commitFile =
|
Path commitFile =
|
||||||
new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTableMetaClient.makeCommitFileName(commitTime));
|
new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCommitFileName(commitTime));
|
||||||
FileSystem fs = FSUtils.getFs();
|
FileSystem fs = FSUtils.getFs();
|
||||||
FSDataOutputStream os = fs.create(commitFile, true);
|
FSDataOutputStream os = fs.create(commitFile, true);
|
||||||
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
|
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
package com.uber.hoodie.func;
|
package com.uber.hoodie.func;
|
||||||
|
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
import com.uber.hoodie.WriteStatus;
|
import com.uber.hoodie.WriteStatus;
|
||||||
import com.uber.hoodie.common.TestRawTripPayload;
|
import com.uber.hoodie.common.TestRawTripPayload;
|
||||||
@@ -79,7 +80,7 @@ public class TestUpdateMapFunction {
|
|||||||
rowChange3));
|
rowChange3));
|
||||||
Iterator<List<WriteStatus>> insertResult = table.handleInsert(records.iterator());
|
Iterator<List<WriteStatus>> insertResult = table.handleInsert(records.iterator());
|
||||||
Path commitFile =
|
Path commitFile =
|
||||||
new Path(config.getBasePath() + "/.hoodie/" + HoodieTableMetaClient.makeCommitFileName("100"));
|
new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100"));
|
||||||
FSUtils.getFs().create(commitFile);
|
FSUtils.getFs().create(commitFile);
|
||||||
|
|
||||||
// Now try an update with an evolved schema
|
// Now try an update with an evolved schema
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ package com.uber.hoodie.io;
|
|||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
import com.uber.hoodie.common.HoodieTestDataGenerator;
|
import com.uber.hoodie.common.HoodieTestDataGenerator;
|
||||||
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
||||||
@@ -78,15 +79,16 @@ public class TestHoodieCommitArchiveLog {
|
|||||||
HoodieTestDataGenerator.createCommitFile(basePath, "102");
|
HoodieTestDataGenerator.createCommitFile(basePath, "102");
|
||||||
HoodieTestDataGenerator.createCommitFile(basePath, "103");
|
HoodieTestDataGenerator.createCommitFile(basePath, "103");
|
||||||
|
|
||||||
HoodieTimeline timeline = metadata.getActiveCommitTimeline();
|
HoodieTimeline timeline =
|
||||||
|
metadata.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
|
|
||||||
assertEquals("Loaded 4 commits and the count should match", 4,
|
assertEquals("Loaded 4 commits and the count should match", 4, timeline.countInstants());
|
||||||
timeline.getTotalInstants());
|
|
||||||
boolean result = archiveLog.archiveIfRequired();
|
boolean result = archiveLog.archiveIfRequired();
|
||||||
assertTrue(result);
|
assertTrue(result);
|
||||||
timeline = timeline.reload();
|
timeline =
|
||||||
|
metadata.getActiveTimeline().reload().getCommitTimeline().filterCompletedInstants();
|
||||||
assertEquals("Should not archive commits when maxCommitsToKeep is 5", 4,
|
assertEquals("Should not archive commits when maxCommitsToKeep is 5", 4,
|
||||||
timeline.getTotalInstants());
|
timeline.countInstants());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -104,19 +106,21 @@ public class TestHoodieCommitArchiveLog {
|
|||||||
HoodieTestDataGenerator.createCommitFile(basePath, "104");
|
HoodieTestDataGenerator.createCommitFile(basePath, "104");
|
||||||
HoodieTestDataGenerator.createCommitFile(basePath, "105");
|
HoodieTestDataGenerator.createCommitFile(basePath, "105");
|
||||||
|
|
||||||
HoodieTimeline timeline = metadata.getActiveCommitTimeline();
|
HoodieTimeline timeline =
|
||||||
List<String> originalCommits = timeline.getInstants().collect(
|
metadata.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
Collectors.toList());
|
List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList());
|
||||||
|
|
||||||
assertEquals("Loaded 6 commits and the count should match", 6, timeline.getTotalInstants());
|
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
|
||||||
boolean result = archiveLog.archiveIfRequired();
|
boolean result = archiveLog.archiveIfRequired();
|
||||||
assertTrue(result);
|
assertTrue(result);
|
||||||
timeline = timeline.reload();
|
timeline =
|
||||||
|
metadata.getActiveTimeline().reload().getCommitTimeline().filterCompletedInstants();
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Should archive commits when maxCommitsToKeep is 5 and now the commits length should be minCommitsToKeep which is 2",
|
"Should archive commits when maxCommitsToKeep is 5 and now the commits length should be minCommitsToKeep which is 2",
|
||||||
2, timeline.getTotalInstants());
|
2, timeline.countInstants());
|
||||||
assertEquals("Archive should not archive the last 2 commits",
|
assertEquals("Archive should not archive the last 2 commits",
|
||||||
Lists.newArrayList("104", "105"), timeline.getInstants().collect(Collectors.toList()));
|
Lists.newArrayList("104", "105"),
|
||||||
|
timeline.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList()));
|
||||||
|
|
||||||
// Remove all the commits from the original commits, make it ready to be checked against the read map
|
// Remove all the commits from the original commits, make it ready to be checked against the read map
|
||||||
timeline.getInstants().forEach(originalCommits::remove);
|
timeline.getInstants().forEach(originalCommits::remove);
|
||||||
@@ -134,7 +138,8 @@ public class TestHoodieCommitArchiveLog {
|
|||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Read commits map should match the originalCommits - commitsLoadedAfterArchival",
|
"Read commits map should match the originalCommits - commitsLoadedAfterArchival",
|
||||||
originalCommits, new ArrayList<>(readCommits.keySet()));
|
originalCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()),
|
||||||
|
new ArrayList<>(readCommits.keySet()));
|
||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -153,15 +158,21 @@ public class TestHoodieCommitArchiveLog {
|
|||||||
HoodieTestDataGenerator.createCommitFile(basePath, "104");
|
HoodieTestDataGenerator.createCommitFile(basePath, "104");
|
||||||
HoodieTestDataGenerator.createCommitFile(basePath, "105");
|
HoodieTestDataGenerator.createCommitFile(basePath, "105");
|
||||||
|
|
||||||
HoodieTimeline timeline = metadata.getActiveCommitTimeline();
|
HoodieTimeline timeline =
|
||||||
assertEquals("Loaded 6 commits and the count should match", 6, timeline.getTotalInstants());
|
metadata.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
|
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
|
||||||
boolean result = archiveLog.archiveIfRequired();
|
boolean result = archiveLog.archiveIfRequired();
|
||||||
assertTrue(result);
|
assertTrue(result);
|
||||||
timeline = timeline.reload();
|
timeline =
|
||||||
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("100"));
|
metadata.getActiveTimeline().reload().getCommitTimeline().filterCompletedInstants();
|
||||||
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("101"));
|
assertTrue("Archived commits should always be safe",
|
||||||
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("102"));
|
timeline.containsOrBeforeTimelineStarts("100"));
|
||||||
assertTrue("Archived commits should always be safe", timeline.containsOrBeforeTimelineStarts("103"));
|
assertTrue("Archived commits should always be safe",
|
||||||
|
timeline.containsOrBeforeTimelineStarts("101"));
|
||||||
|
assertTrue("Archived commits should always be safe",
|
||||||
|
timeline.containsOrBeforeTimelineStarts("102"));
|
||||||
|
assertTrue("Archived commits should always be safe",
|
||||||
|
timeline.containsOrBeforeTimelineStarts("103"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -184,8 +184,8 @@ public class TestCopyOnWriteTable {
|
|||||||
if (file.getName().endsWith(".parquet")) {
|
if (file.getName().endsWith(".parquet")) {
|
||||||
if (FSUtils.getFileId(file.getName())
|
if (FSUtils.getFileId(file.getName())
|
||||||
.equals(FSUtils.getFileId(parquetFile.getName())) && metadata
|
.equals(FSUtils.getFileId(parquetFile.getName())) && metadata
|
||||||
.getActiveCommitTimeline()
|
.getActiveTimeline().getCommitTimeline()
|
||||||
.compareInstants(FSUtils.getCommitTime(file.getName()),
|
.compareTimestamps(FSUtils.getCommitTime(file.getName()),
|
||||||
FSUtils.getCommitTime(parquetFile.getName()), HoodieTimeline.GREATER)) {
|
FSUtils.getCommitTime(parquetFile.getName()), HoodieTimeline.GREATER)) {
|
||||||
updatedParquetFile = file;
|
updatedParquetFile = file;
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -16,12 +16,12 @@
|
|||||||
|
|
||||||
package com.uber.hoodie.common.model;
|
package com.uber.hoodie.common.model;
|
||||||
|
|
||||||
public enum HoodieStorageType {
|
public enum HoodieFileFormat {
|
||||||
PARQUET(".parquet");
|
PARQUET(".parquet");
|
||||||
|
|
||||||
private final String extension;
|
private final String extension;
|
||||||
|
|
||||||
HoodieStorageType(String extension) {
|
HoodieFileFormat(String extension) {
|
||||||
this.extension = extension;
|
this.extension = extension;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -16,9 +16,8 @@
|
|||||||
|
|
||||||
package com.uber.hoodie.common.table;
|
package com.uber.hoodie.common.table;
|
||||||
|
|
||||||
import com.uber.hoodie.common.model.HoodieStorageType;
|
import com.uber.hoodie.common.model.HoodieFileFormat;
|
||||||
import com.uber.hoodie.common.model.HoodieTableType;
|
import com.uber.hoodie.common.model.HoodieTableType;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
|
||||||
import com.uber.hoodie.exception.HoodieIOException;
|
import com.uber.hoodie.exception.HoodieIOException;
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
@@ -46,10 +45,10 @@ public class HoodieTableConfig implements Serializable {
|
|||||||
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
|
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
|
||||||
public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
|
public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
|
||||||
public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
|
public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
|
||||||
public static final String HOODIE_RO_STORAGE_FORMAT_PROP_NAME =
|
public static final String HOODIE_RO_FILE_FORMAT_PROP_NAME =
|
||||||
"hoodie.table.ro.storage.format";
|
"hoodie.table.ro.file.format";
|
||||||
public static final HoodieTableType DEFAULT_TABLE_TYPE = HoodieTableType.COPY_ON_WRITE;
|
public static final HoodieTableType DEFAULT_TABLE_TYPE = HoodieTableType.COPY_ON_WRITE;
|
||||||
public static final HoodieStorageType DEFAULT_RO_STORAGE_FORMAT = HoodieStorageType.PARQUET;
|
public static final HoodieFileFormat DEFAULT_RO_FILE_FORMAT = HoodieFileFormat.PARQUET;
|
||||||
private Properties props;
|
private Properties props;
|
||||||
|
|
||||||
public HoodieTableConfig(FileSystem fs, String metaPath) {
|
public HoodieTableConfig(FileSystem fs, String metaPath) {
|
||||||
@@ -127,12 +126,12 @@ public class HoodieTableConfig implements Serializable {
|
|||||||
/**
|
/**
|
||||||
* Get the Read Optimized Storage Format
|
* Get the Read Optimized Storage Format
|
||||||
*
|
*
|
||||||
* @return HoodieStorageType for the Read Optimized Storage format
|
* @return HoodieFileFormat for the Read Optimized Storage format
|
||||||
*/
|
*/
|
||||||
public HoodieStorageType getROStorageFormat() {
|
public HoodieFileFormat getROFileFormat() {
|
||||||
if (props.contains(HOODIE_RO_STORAGE_FORMAT_PROP_NAME)) {
|
if (props.contains(HOODIE_RO_FILE_FORMAT_PROP_NAME)) {
|
||||||
return HoodieStorageType.valueOf(props.getProperty(HOODIE_RO_STORAGE_FORMAT_PROP_NAME));
|
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RO_FILE_FORMAT_PROP_NAME));
|
||||||
}
|
}
|
||||||
return DEFAULT_RO_STORAGE_FORMAT;
|
return DEFAULT_RO_FILE_FORMAT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,10 +17,8 @@
|
|||||||
package com.uber.hoodie.common.table;
|
package com.uber.hoodie.common.table;
|
||||||
|
|
||||||
import com.uber.hoodie.common.model.HoodieTableType;
|
import com.uber.hoodie.common.model.HoodieTableType;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieActiveCommitTimeline;
|
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieArchivedCommitTimeline;
|
import com.uber.hoodie.common.table.timeline.HoodieArchivedTimeline;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieCleanerTimeline;
|
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieSavePointTimeline;
|
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import com.uber.hoodie.exception.DatasetNotFoundException;
|
import com.uber.hoodie.exception.DatasetNotFoundException;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
@@ -30,8 +28,13 @@ import org.apache.hadoop.fs.PathFilter;
|
|||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import java.io.Externalizable;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.ObjectInput;
|
||||||
|
import java.io.ObjectInputStream;
|
||||||
|
import java.io.ObjectOutput;
|
||||||
|
import java.io.ObjectOutputStream;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
@@ -50,28 +53,22 @@ import java.util.Properties;
|
|||||||
public class HoodieTableMetaClient implements Serializable {
|
public class HoodieTableMetaClient implements Serializable {
|
||||||
private final transient static Logger log = LogManager.getLogger(HoodieTableMetaClient.class);
|
private final transient static Logger log = LogManager.getLogger(HoodieTableMetaClient.class);
|
||||||
public static String METAFOLDER_NAME = ".hoodie";
|
public static String METAFOLDER_NAME = ".hoodie";
|
||||||
public static String COMMIT_EXTENSION = ".commit";
|
|
||||||
public static String CLEAN_EXTENSION = ".clean";
|
|
||||||
public static String SAVEPOINT_EXTENSION = ".savepoint";
|
|
||||||
public static String INFLIGHT_FILE_SUFFIX = ".inflight";
|
|
||||||
|
|
||||||
private String basePath;
|
private String basePath;
|
||||||
private transient FileSystem fs;
|
private transient FileSystem fs;
|
||||||
private String metaPath;
|
private String metaPath;
|
||||||
private HoodieTableType tableType;
|
private HoodieTableType tableType;
|
||||||
private HoodieTableConfig tableConfig;
|
private HoodieTableConfig tableConfig;
|
||||||
private HoodieTimeline activeCommitTimeline;
|
private HoodieActiveTimeline activeTimeline;
|
||||||
private HoodieTimeline archivedCommitTimeline;
|
private HoodieArchivedTimeline archivedTimeline;
|
||||||
private HoodieTimeline savePointTimeline;
|
|
||||||
private HoodieTimeline cleanerTimeline;
|
|
||||||
|
|
||||||
public HoodieTableMetaClient(FileSystem fs, String basePath) throws DatasetNotFoundException {
|
public HoodieTableMetaClient(FileSystem fs, String basePath) throws DatasetNotFoundException {
|
||||||
// Do not load any timeline by default
|
// Do not load any timeline by default
|
||||||
this(fs, basePath, false);
|
this(fs, basePath, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public HoodieTableMetaClient(FileSystem fs, String basePath,
|
public HoodieTableMetaClient(FileSystem fs, String basePath, boolean loadActiveTimelineOnLoad)
|
||||||
boolean loadActiveCommitTimelineOnLoad) throws DatasetNotFoundException {
|
throws DatasetNotFoundException {
|
||||||
log.info("Loading HoodieTableMetaClient from " + basePath);
|
log.info("Loading HoodieTableMetaClient from " + basePath);
|
||||||
this.basePath = basePath;
|
this.basePath = basePath;
|
||||||
this.fs = fs;
|
this.fs = fs;
|
||||||
@@ -82,14 +79,15 @@ public class HoodieTableMetaClient implements Serializable {
|
|||||||
this.tableConfig = new HoodieTableConfig(fs, metaPath);
|
this.tableConfig = new HoodieTableConfig(fs, metaPath);
|
||||||
this.tableType = tableConfig.getTableType();
|
this.tableType = tableConfig.getTableType();
|
||||||
log.info("Finished Loading Table of type " + tableType + " from " + basePath);
|
log.info("Finished Loading Table of type " + tableType + " from " + basePath);
|
||||||
if (loadActiveCommitTimelineOnLoad) {
|
if (loadActiveTimelineOnLoad) {
|
||||||
log.info("Loading Active commit timeline for " + basePath);
|
log.info("Loading Active commit timeline for " + basePath);
|
||||||
getActiveCommitTimeline();
|
getActiveTimeline();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For serailizing and de-serializing
|
* For serailizing and de-serializing
|
||||||
|
*
|
||||||
* @deprecated
|
* @deprecated
|
||||||
*/
|
*/
|
||||||
public HoodieTableMetaClient() {
|
public HoodieTableMetaClient() {
|
||||||
@@ -97,6 +95,7 @@ public class HoodieTableMetaClient implements Serializable {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* This method is only used when this object is deserialized in a spark executor.
|
* This method is only used when this object is deserialized in a spark executor.
|
||||||
|
*
|
||||||
* @deprecated
|
* @deprecated
|
||||||
*/
|
*/
|
||||||
private void readObject(java.io.ObjectInputStream in)
|
private void readObject(java.io.ObjectInputStream in)
|
||||||
@@ -105,6 +104,11 @@ public class HoodieTableMetaClient implements Serializable {
|
|||||||
this.fs = FSUtils.getFs();
|
this.fs = FSUtils.getFs();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void writeObject(java.io.ObjectOutputStream out)
|
||||||
|
throws IOException {
|
||||||
|
out.defaultWriteObject();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return Base path
|
* @return Base path
|
||||||
*/
|
*/
|
||||||
@@ -134,16 +138,16 @@ public class HoodieTableMetaClient implements Serializable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the active commits as a timeline
|
* Get the active instants as a timeline
|
||||||
*
|
*
|
||||||
* @return Active commit timeline
|
* @return Active instants timeline
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public synchronized HoodieTimeline getActiveCommitTimeline() {
|
public synchronized HoodieActiveTimeline getActiveTimeline() {
|
||||||
if (activeCommitTimeline == null) {
|
if (activeTimeline == null) {
|
||||||
activeCommitTimeline = new HoodieActiveCommitTimeline(fs, metaPath);
|
activeTimeline = new HoodieActiveTimeline(fs, metaPath);
|
||||||
}
|
}
|
||||||
return activeCommitTimeline;
|
return activeTimeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -153,40 +157,13 @@ public class HoodieTableMetaClient implements Serializable {
|
|||||||
* @return Active commit timeline
|
* @return Active commit timeline
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public HoodieTimeline getArchivedCommitTimeline() {
|
public synchronized HoodieArchivedTimeline getArchivedTimeline() {
|
||||||
if (archivedCommitTimeline == null) {
|
if (archivedTimeline == null) {
|
||||||
archivedCommitTimeline = new HoodieArchivedCommitTimeline(fs, metaPath);
|
archivedTimeline = new HoodieArchivedTimeline(fs, metaPath);
|
||||||
}
|
}
|
||||||
return archivedCommitTimeline;
|
return archivedTimeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the save points as a timeline.
|
|
||||||
*
|
|
||||||
* @return Savepoint timeline
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public HoodieTimeline getSavePointsTimeline() {
|
|
||||||
if (savePointTimeline == null) {
|
|
||||||
savePointTimeline = new HoodieSavePointTimeline(fs, metaPath);
|
|
||||||
}
|
|
||||||
return savePointTimeline;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the cleaner activity as a timeline.
|
|
||||||
*
|
|
||||||
* @return Cleaner activity
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public HoodieTimeline getCleanerTimeline() {
|
|
||||||
if (cleanerTimeline == null) {
|
|
||||||
cleanerTimeline = new HoodieCleanerTimeline(fs, metaPath);
|
|
||||||
}
|
|
||||||
return cleanerTimeline;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper method to initialize a given path as a hoodie dataset with configs passed in as as Properties
|
* Helper method to initialize a given path as a hoodie dataset with configs passed in as as Properties
|
||||||
*
|
*
|
||||||
@@ -215,34 +192,6 @@ public class HoodieTableMetaClient implements Serializable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// HELPER METHODS TO CREATE META FILE NAMES
|
// HELPER METHODS TO CREATE META FILE NAMES
|
||||||
public static String makeCommitFileName(String commitTime) {
|
|
||||||
return commitTime + COMMIT_EXTENSION;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String makeInflightCommitFileName(String commitTime) {
|
|
||||||
return commitTime + INFLIGHT_FILE_SUFFIX;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String makeCleanerFileName(String instant) {
|
|
||||||
return instant + CLEAN_EXTENSION;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String makeInflightCleanerFileName(String instant) {
|
|
||||||
return instant + CLEAN_EXTENSION + INFLIGHT_FILE_SUFFIX;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String makeInflightSavePointFileName(String commitTime) {
|
|
||||||
return commitTime + SAVEPOINT_EXTENSION + INFLIGHT_FILE_SUFFIX;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String makeSavePointFileName(String commitTime) {
|
|
||||||
return commitTime + SAVEPOINT_EXTENSION;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String getCommitFromCommitFile(String commitFileName) {
|
|
||||||
return commitFileName.split("\\.")[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter)
|
public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return fs.listStatus(metaPath, nameFilter);
|
return fs.listStatus(metaPath, nameFilter);
|
||||||
|
|||||||
@@ -17,153 +17,125 @@
|
|||||||
package com.uber.hoodie.common.table;
|
package com.uber.hoodie.common.table;
|
||||||
|
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieDefaultTimeline;
|
import com.uber.hoodie.common.table.timeline.HoodieDefaultTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.function.BiPredicate;
|
import java.util.function.BiPredicate;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* HoodieTimeline allows representation of meta-data events as a timeline.
|
* HoodieTimeline is a view of meta-data instants in the hoodie dataset.
|
||||||
* Instants are specific points in time represented as strings.
|
* Instants are specific points in time represented as HoodieInstant.
|
||||||
* in this format YYYYMMDDHHmmSS. e.g. 20170101193218
|
|
||||||
* Any operation on the timeline starts with the inflight instant and then when complete marks
|
|
||||||
* the completed instant and removes the inflight instant.
|
|
||||||
* Completed instants are plainly referred to as just instants
|
|
||||||
* <p>
|
* <p>
|
||||||
* Timelines as immutable once created. Any operation to change the timeline (like create/delete instants)
|
* Timelines are immutable once created and operations create new instance of
|
||||||
* will not be reflected unless explicitly reloaded using the reload()
|
* timelines which filter on the instants and this can be chained.
|
||||||
*
|
*
|
||||||
* @see com.uber.hoodie.common.table.HoodieTableMetaClient
|
* @see com.uber.hoodie.common.table.HoodieTableMetaClient
|
||||||
* @see HoodieDefaultTimeline
|
* @see HoodieDefaultTimeline
|
||||||
|
* @see HoodieInstant
|
||||||
* @since 0.3.0
|
* @since 0.3.0
|
||||||
*/
|
*/
|
||||||
public interface HoodieTimeline extends Serializable {
|
public interface HoodieTimeline extends Serializable {
|
||||||
|
String COMMIT_ACTION = "commit";
|
||||||
|
String CLEAN_ACTION = "clean";
|
||||||
|
String SAVEPOINT_ACTION = "savepoint";
|
||||||
|
String INFLIGHT_EXTENSION = ".inflight";
|
||||||
|
String COMMIT_EXTENSION = "." + COMMIT_ACTION;
|
||||||
|
String CLEAN_EXTENSION = "." + CLEAN_ACTION;
|
||||||
|
String SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION;
|
||||||
|
//this is to preserve backwards compatibility on commit in-flight filenames
|
||||||
|
String INFLIGHT_COMMIT_EXTENSION = INFLIGHT_EXTENSION;
|
||||||
|
String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
|
||||||
|
String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find all the completed instants after startTs and before or on endTs
|
* Filter this timeline to just include the in-flights
|
||||||
|
*
|
||||||
|
* @return New instance of HoodieTimeline with just in-flights
|
||||||
|
*/
|
||||||
|
HoodieTimeline filterInflights();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filter this timeline to just include the completed instants
|
||||||
|
*
|
||||||
|
* @return New instance of HoodieTimeline with just completed instants
|
||||||
|
*/
|
||||||
|
HoodieTimeline filterCompletedInstants();
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new Timeline with instants after startTs and before or on endTs
|
||||||
*
|
*
|
||||||
* @param startTs
|
* @param startTs
|
||||||
* @param endTs
|
* @param endTs
|
||||||
* @return Stream of instants
|
|
||||||
*/
|
*/
|
||||||
Stream<String> findInstantsInRange(String startTs, String endTs);
|
HoodieTimeline findInstantsInRange(String startTs, String endTs);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find all the completed instants after startTs
|
* Create a new Timeline with all the instants after startTs
|
||||||
*
|
*
|
||||||
* @param commitTime
|
* @param commitTime
|
||||||
* @param numCommits
|
* @param numCommits
|
||||||
* @return Stream of instants
|
|
||||||
*/
|
*/
|
||||||
Stream<String> findInstantsAfter(String commitTime, int numCommits);
|
HoodieTimeline findInstantsAfter(String commitTime, int numCommits);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If the timeline has any completed instants
|
* If the timeline has any instants
|
||||||
*
|
*
|
||||||
* @return true if timeline is not empty
|
* @return true if timeline is empty
|
||||||
*/
|
*/
|
||||||
boolean hasInstants();
|
boolean empty();
|
||||||
|
|
||||||
/**
|
|
||||||
* If the timeline has any in-complete instants
|
|
||||||
*
|
|
||||||
* @return true if timeline has any in-complete instants
|
|
||||||
*/
|
|
||||||
boolean hasInflightInstants();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return total number of completed instants
|
* @return total number of completed instants
|
||||||
*/
|
*/
|
||||||
int getTotalInstants();
|
int countInstants();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return first completed instant if available
|
* @return first completed instant if available
|
||||||
*/
|
*/
|
||||||
Optional<String> firstInstant();
|
Optional<HoodieInstant> firstInstant();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param n
|
* @param n
|
||||||
* @return nth completed instant from the first completed instant
|
* @return nth completed instant from the first completed instant
|
||||||
*/
|
*/
|
||||||
Optional<String> nthInstant(int n);
|
Optional<HoodieInstant> nthInstant(int n);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return last completed instant if available
|
* @return last completed instant if available
|
||||||
*/
|
*/
|
||||||
Optional<String> lastInstant();
|
Optional<HoodieInstant> lastInstant();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param n
|
* @param n
|
||||||
* @return nth completed instant going back from the last completed instant
|
* @return nth completed instant going back from the last completed instant
|
||||||
*/
|
*/
|
||||||
Optional<String> nthFromLastInstant(int n);
|
Optional<HoodieInstant> nthFromLastInstant(int n);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if the passed instant is present as a completed instant on the timeline
|
* @return true if the passed instant is present as a completed instant on the timeline
|
||||||
*/
|
*/
|
||||||
boolean containsInstant(String instant);
|
boolean containsInstant(HoodieInstant instant);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if the passed instant is present as a completed instant on the timeline or
|
* @return true if the passed instant is present as a completed instant on the timeline or
|
||||||
* if the instant is before the first completed instant in the timeline
|
* if the instant is before the first completed instant in the timeline
|
||||||
*/
|
*/
|
||||||
boolean containsOrBeforeTimelineStarts(String instant);
|
boolean containsOrBeforeTimelineStarts(String ts);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return Get the stream of completed instants
|
* @return Get the stream of completed instants
|
||||||
*/
|
*/
|
||||||
Stream<String> getInstants();
|
Stream<HoodieInstant> getInstants();
|
||||||
|
|
||||||
/**
|
|
||||||
* @return Get the stream of in-flight instants
|
|
||||||
*/
|
|
||||||
Stream<String> getInflightInstants();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if the passed in instant is before the first completed instant in the timeline
|
* @return true if the passed in instant is before the first completed instant in the timeline
|
||||||
*/
|
*/
|
||||||
boolean isInstantBeforeTimelineStarts(String instant);
|
boolean isBeforeTimelineStarts(String ts);
|
||||||
|
|
||||||
/**
|
|
||||||
* Register the passed in instant as a in-flight
|
|
||||||
*
|
|
||||||
* @param instant
|
|
||||||
*/
|
|
||||||
void saveInstantAsInflight(String instant);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Register the passed in instant as a completed instant.
|
|
||||||
* It needs to have a corresponding in-flight instant, otherwise it will fail.
|
|
||||||
* Pass a optional byte[] to save with the instant.
|
|
||||||
*
|
|
||||||
* @param instant
|
|
||||||
* @param data
|
|
||||||
*/
|
|
||||||
void saveInstantAsComplete(String instant, Optional<byte[]> data);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Un-Register a completed instant as in-flight. This is usually atomic way to
|
|
||||||
* revert the effects of a operation on hoodie datasets
|
|
||||||
*
|
|
||||||
* @param instant
|
|
||||||
*/
|
|
||||||
void revertInstantToInflight(String instant);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Remove the in-flight instant from the timeline
|
|
||||||
*
|
|
||||||
* @param instant
|
|
||||||
*/
|
|
||||||
void removeInflightFromTimeline(String instant);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reload the timeline. Timelines are immutable once created.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
HoodieTimeline reload() throws IOException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read the completed instant details
|
* Read the completed instant details
|
||||||
@@ -171,7 +143,7 @@ public interface HoodieTimeline extends Serializable {
|
|||||||
* @param instant
|
* @param instant
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
Optional<byte[]> readInstantDetails(String instant);
|
Optional<byte[]> getInstantDetails(HoodieInstant instant);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper methods to compare instants
|
* Helper methods to compare instants
|
||||||
@@ -183,8 +155,55 @@ public interface HoodieTimeline extends Serializable {
|
|||||||
(commit1, commit2) -> commit1.compareTo(commit2) <= 0;
|
(commit1, commit2) -> commit1.compareTo(commit2) <= 0;
|
||||||
BiPredicate<String, String> LESSER = (commit1, commit2) -> commit1.compareTo(commit2) < 0;
|
BiPredicate<String, String> LESSER = (commit1, commit2) -> commit1.compareTo(commit2) < 0;
|
||||||
|
|
||||||
default boolean compareInstants(String commit1, String commit2,
|
default boolean compareTimestamps(String commit1, String commit2,
|
||||||
BiPredicate<String, String> predicateToApply) {
|
BiPredicate<String, String> predicateToApply) {
|
||||||
return predicateToApply.test(commit1, commit2);
|
return predicateToApply.test(commit1, commit2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static HoodieInstant getCompletedInstant(final HoodieInstant instant) {
|
||||||
|
return new HoodieInstant(false, instant.getAction(), instant.getTimestamp());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static HoodieInstant getInflightInstant(final HoodieInstant instant) {
|
||||||
|
return new HoodieInstant(true, instant.getAction(), instant.getTimestamp());
|
||||||
|
}
|
||||||
|
|
||||||
|
static String makeCommitFileName(String commitTime) {
|
||||||
|
return commitTime + HoodieTimeline.COMMIT_EXTENSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String makeInflightCommitFileName(String commitTime) {
|
||||||
|
return commitTime + HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String makeCleanerFileName(String instant) {
|
||||||
|
return instant + HoodieTimeline.CLEAN_EXTENSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String makeInflightCleanerFileName(String instant) {
|
||||||
|
return instant + HoodieTimeline.INFLIGHT_CLEAN_EXTENSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String makeInflightSavePointFileName(String commitTime) {
|
||||||
|
return commitTime + HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String makeSavePointFileName(String commitTime) {
|
||||||
|
return commitTime + HoodieTimeline.SAVEPOINT_EXTENSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String getCommitFromCommitFile(String commitFileName) {
|
||||||
|
return commitFileName.split("\\.")[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
static String makeFileNameAsComplete(String fileName) {
|
||||||
|
return fileName.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
static String makeFileNameAsInflight(String fileName) {
|
||||||
|
return fileName + HoodieTimeline.INFLIGHT_EXTENSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ public interface TableFileSystemView {
|
|||||||
* @param maxCommitTime
|
* @param maxCommitTime
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
Stream<HoodieDataFile> streamLatestVersionInPartition(String partitionPathStr,
|
Stream<HoodieDataFile> getLatestVersionInPartition(String partitionPathStr,
|
||||||
String maxCommitTime);
|
String maxCommitTime);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -60,7 +60,7 @@ public interface TableFileSystemView {
|
|||||||
* @param partitionPath
|
* @param partitionPath
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
Stream<List<HoodieDataFile>> streamEveryVersionInPartition(String partitionPath);
|
Stream<List<HoodieDataFile>> getEveryVersionInPartition(String partitionPath);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stream all the versions from the passed in fileStatus[] with commit times containing in commitsToReturn.
|
* Stream all the versions from the passed in fileStatus[] with commit times containing in commitsToReturn.
|
||||||
@@ -69,7 +69,7 @@ public interface TableFileSystemView {
|
|||||||
* @param commitsToReturn
|
* @param commitsToReturn
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
Stream<HoodieDataFile> streamLatestVersionInRange(FileStatus[] fileStatuses,
|
Stream<HoodieDataFile> getLatestVersionInRange(FileStatus[] fileStatuses,
|
||||||
List<String> commitsToReturn);
|
List<String> commitsToReturn);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -79,15 +79,15 @@ public interface TableFileSystemView {
|
|||||||
* @param maxCommitToReturn
|
* @param maxCommitToReturn
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
Stream<HoodieDataFile> streamLatestVersionsBeforeOrOn(FileStatus[] fileStatuses,
|
Stream<HoodieDataFile> getLatestVersionsBeforeOrOn(FileStatus[] fileStatuses,
|
||||||
String maxCommitToReturn);
|
String maxCommitToReturn);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stream latest versions from the passed in FileStatus[].
|
* Stream latest versions from the passed in FileStatus[].
|
||||||
* Similar to calling streamLatestVersionsBeforeOrOn(fileStatuses, currentTimeAsCommitTime)
|
* Similar to calling getLatestVersionsBeforeOrOn(fileStatuses, currentTimeAsCommitTime)
|
||||||
*
|
*
|
||||||
* @param fileStatuses
|
* @param fileStatuses
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
Stream<HoodieDataFile> streamLatestVersions(FileStatus[] fileStatuses);
|
Stream<HoodieDataFile> getLatestVersions(FileStatus[] fileStatuses);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,77 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.uber.hoodie.common.table.timeline;
|
|
||||||
|
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
|
||||||
import com.uber.hoodie.exception.HoodieIOException;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Active commit timeline. Much optimized for reading than the ArchivedTimeline.
|
|
||||||
*/
|
|
||||||
public class HoodieActiveCommitTimeline extends HoodieDefaultTimeline {
|
|
||||||
public HoodieActiveCommitTimeline(FileSystem fs, String metaPath) {
|
|
||||||
super(fs, metaPath);
|
|
||||||
String completedInstantExtension = HoodieTableMetaClient.COMMIT_EXTENSION;
|
|
||||||
String inflightInstantExtension = INFLIGHT_EXTENSION;
|
|
||||||
|
|
||||||
FileStatus[] fileStatuses;
|
|
||||||
try {
|
|
||||||
fileStatuses = HoodieTableMetaClient.scanFiles(fs, new Path(metaPath),
|
|
||||||
path -> path.toString().endsWith(completedInstantExtension) || path.toString()
|
|
||||||
.endsWith(inflightInstantExtension));
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException("Failed to scan metadata", e);
|
|
||||||
}
|
|
||||||
this.instants = Arrays.stream(fileStatuses)
|
|
||||||
.filter(status -> status.getPath().getName().endsWith(completedInstantExtension))
|
|
||||||
.map(fileStatus -> fileStatus.getPath().getName().replaceAll(completedInstantExtension, ""))
|
|
||||||
.sorted().collect(Collectors.toList());
|
|
||||||
this.inflights = Arrays.stream(fileStatuses).filter(
|
|
||||||
status -> status.getPath().getName().endsWith(inflightInstantExtension)).map(
|
|
||||||
fileStatus -> fileStatus.getPath().getName()
|
|
||||||
.replaceAll(inflightInstantExtension, "")).sorted()
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getInflightFileName(String instant) {
|
|
||||||
return HoodieTableMetaClient.makeInflightCommitFileName(instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getCompletedFileName(String instant) {
|
|
||||||
return HoodieTableMetaClient.makeCommitFileName(instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getTimelineName() {
|
|
||||||
return "commit";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public HoodieTimeline reload() throws IOException {
|
|
||||||
return new HoodieActiveCommitTimeline(fs, metaPath);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,252 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.uber.hoodie.common.table.timeline;
|
||||||
|
|
||||||
|
import com.google.common.io.Closeables;
|
||||||
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
|
import com.uber.hoodie.exception.HoodieIOException;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.log4j.LogManager;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours (configurable)
|
||||||
|
* is in the ActiveTimeline and the rest are Archived. ActiveTimeline is a special timeline
|
||||||
|
* that allows for creation of instants on the timeline.
|
||||||
|
* <p></p>
|
||||||
|
* The timeline is not automatically reloaded on any mutation operation, clients have to manually call reload()
|
||||||
|
* so that they can chain multiple mutations to the timeline and then call reload() once.
|
||||||
|
* <p></p>
|
||||||
|
* This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
|
||||||
|
*/
|
||||||
|
public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||||
|
private final transient static Logger log = LogManager.getLogger(HoodieActiveTimeline.class);
|
||||||
|
private String metaPath;
|
||||||
|
private transient FileSystem fs;
|
||||||
|
|
||||||
|
|
||||||
|
protected HoodieActiveTimeline(FileSystem fs, String metaPath, String[] includedExtensions) {
|
||||||
|
// Filter all the filter in the metapath and include only the extensions passed and
|
||||||
|
// convert them into HoodieInstant
|
||||||
|
try {
|
||||||
|
this.instants =
|
||||||
|
Arrays.stream(HoodieTableMetaClient.scanFiles(fs, new Path(metaPath), path -> {
|
||||||
|
// Include only the meta files with extensions that needs to be included
|
||||||
|
String extension = FSUtils.getFileExtension(path.getName());
|
||||||
|
return Arrays.stream(includedExtensions).anyMatch(Predicate.isEqual(extension));
|
||||||
|
})).sorted(Comparator.comparing(
|
||||||
|
// Sort the meta-data by the instant time (first part of the file name)
|
||||||
|
fileStatus -> FSUtils.getInstantTime(fileStatus.getPath().getName())))
|
||||||
|
// create HoodieInstantMarkers from FileStatus, which extracts properties
|
||||||
|
.map(HoodieInstant::new).collect(Collectors.toList());
|
||||||
|
log.info("Loaded instants " + instants);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException("Failed to scan metadata", e);
|
||||||
|
}
|
||||||
|
this.fs = fs;
|
||||||
|
this.metaPath = metaPath;
|
||||||
|
// multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
|
||||||
|
this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HoodieActiveTimeline(FileSystem fs, String metaPath) {
|
||||||
|
this(fs, metaPath,
|
||||||
|
new String[] {COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, SAVEPOINT_EXTENSION,
|
||||||
|
INFLIGHT_SAVEPOINT_EXTENSION, CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For serialization and de-serialization only.
|
||||||
|
* @deprecated
|
||||||
|
*/
|
||||||
|
public HoodieActiveTimeline() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method is only used when this object is deserialized in a spark executor.
|
||||||
|
*
|
||||||
|
* @deprecated
|
||||||
|
*/
|
||||||
|
private void readObject(java.io.ObjectInputStream in)
|
||||||
|
throws IOException, ClassNotFoundException {
|
||||||
|
in.defaultReadObject();
|
||||||
|
this.fs = FSUtils.getFs();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get only the commits (inflight and completed) in the active timeline
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public HoodieTimeline getCommitTimeline() {
|
||||||
|
return new HoodieDefaultTimeline(filterInstantsByAction(COMMIT_ACTION),
|
||||||
|
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get only the cleaner action (inflight and completed) in the active timeline
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public HoodieTimeline getCleanerTimeline() {
|
||||||
|
return new HoodieDefaultTimeline(filterInstantsByAction(CLEAN_ACTION),
|
||||||
|
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get only the save point action (inflight and completed) in the active timeline
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public HoodieTimeline getSavePointTimeline() {
|
||||||
|
return new HoodieDefaultTimeline(filterInstantsByAction(SAVEPOINT_ACTION),
|
||||||
|
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected Stream<HoodieInstant> filterInstantsByAction(String action) {
|
||||||
|
return instants.stream().filter(s -> s.getAction().equals(action));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void createInflight(HoodieInstant instant) {
|
||||||
|
log.info("Creating a new in-flight instant " + instant);
|
||||||
|
// Create the in-flight file
|
||||||
|
createFileInMetaPath(instant.getFileName(), Optional.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void saveAsComplete(HoodieInstant instant, Optional<byte[]> data) {
|
||||||
|
log.info("Marking instant complete " + instant);
|
||||||
|
moveInflightToComplete(instant, HoodieTimeline.getCompletedInstant(instant), data);
|
||||||
|
log.info("Completed " + instant);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void revertToInflight(HoodieInstant instant) {
|
||||||
|
log.info("Reverting instant to inflight " + instant);
|
||||||
|
moveCompleteToInflight(instant, HoodieTimeline.getInflightInstant(instant));
|
||||||
|
log.info("Reverted " + instant + " to inflight");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void deleteInflight(HoodieInstant instant) {
|
||||||
|
log.info("Deleting in-flight " + instant);
|
||||||
|
Path inFlightCommitFilePath = new Path(metaPath, instant.getFileName());
|
||||||
|
try {
|
||||||
|
boolean result = fs.delete(inFlightCommitFilePath, false);
|
||||||
|
if (result) {
|
||||||
|
log.info("Removed in-flight " + instant);
|
||||||
|
} else {
|
||||||
|
throw new HoodieIOException("Could not delete in-flight instant " + instant);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException(
|
||||||
|
"Could not remove inflight commit " + inFlightCommitFilePath, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
|
||||||
|
Path detailPath = new Path(metaPath, instant.getFileName());
|
||||||
|
return readDataFromPath(detailPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected void moveInflightToComplete(HoodieInstant inflight, HoodieInstant completed,
|
||||||
|
Optional<byte[]> data) {
|
||||||
|
Path commitFilePath = new Path(metaPath, completed.getFileName());
|
||||||
|
try {
|
||||||
|
// open a new file and write the commit metadata in
|
||||||
|
Path inflightCommitFile = new Path(metaPath, inflight.getFileName());
|
||||||
|
createFileInMetaPath(inflight.getFileName(), data);
|
||||||
|
boolean success = fs.rename(inflightCommitFile, commitFilePath);
|
||||||
|
if (!success) {
|
||||||
|
throw new HoodieIOException(
|
||||||
|
"Could not rename " + inflightCommitFile + " to " + commitFilePath);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException("Could not complete " + inflight, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void moveCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) {
|
||||||
|
Path inFlightCommitFilePath = new Path(metaPath, inflight.getFileName());
|
||||||
|
try {
|
||||||
|
if (!fs.exists(inFlightCommitFilePath)) {
|
||||||
|
Path commitFilePath = new Path(metaPath, completed.getFileName());
|
||||||
|
boolean success = fs.rename(commitFilePath, inFlightCommitFilePath);
|
||||||
|
if (!success) {
|
||||||
|
throw new HoodieIOException(
|
||||||
|
"Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException("Could not complete revert " + completed, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void createFileInMetaPath(String filename, Optional<byte[]> content) {
|
||||||
|
Path fullPath = new Path(metaPath, filename);
|
||||||
|
try {
|
||||||
|
if (!content.isPresent()) {
|
||||||
|
if (fs.createNewFile(fullPath)) {
|
||||||
|
log.info("Created a new file in meta path: " + fullPath);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
FSDataOutputStream fsout = fs.create(fullPath, true);
|
||||||
|
fsout.write(content.get());
|
||||||
|
fsout.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
throw new HoodieIOException("Failed to create file " + fullPath);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException("Failed to create file " + fullPath, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Optional<byte[]> readDataFromPath(Path detailPath) {
|
||||||
|
FSDataInputStream is = null;
|
||||||
|
try {
|
||||||
|
is = fs.open(detailPath);
|
||||||
|
return Optional.of(IOUtils.toByteArray(is));
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException("Could not read commit details from " + detailPath, e);
|
||||||
|
} finally {
|
||||||
|
if (is != null) {
|
||||||
|
Closeables.closeQuietly(is);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public HoodieActiveTimeline reload() {
|
||||||
|
return new HoodieActiveTimeline(fs, metaPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -18,30 +18,42 @@ package com.uber.hoodie.common.table.timeline;
|
|||||||
|
|
||||||
import com.google.common.io.Closeables;
|
import com.google.common.io.Closeables;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import com.uber.hoodie.exception.HoodieIOException;
|
import com.uber.hoodie.exception.HoodieIOException;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.io.SequenceFile;
|
import org.apache.hadoop.io.SequenceFile;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.log4j.LogManager;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.io.Serializable;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Archived commit timeline. These commits are usually cleaned up and the meta data is archived for
|
* Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours (configurable)
|
||||||
* future triaging
|
* is in the ActiveTimeline and the rest are in ArchivedTimeline.
|
||||||
*
|
* <p></p>
|
||||||
* @since 0.3.0
|
* Instants are read from the archive file during initialization and never refreshed. To refresh, clients
|
||||||
|
* need to call reload()
|
||||||
|
* <p></p>
|
||||||
|
* This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
|
||||||
*/
|
*/
|
||||||
public class HoodieArchivedCommitTimeline extends HoodieDefaultTimeline {
|
public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
|
||||||
private static final String HOODIE_COMMIT_ARCHIVE_LOG_FILE = "commits.archived";
|
private static final String HOODIE_COMMIT_ARCHIVE_LOG_FILE = "commits.archived";
|
||||||
private transient Map<String, byte[]> readCommits = new HashMap<>();
|
private transient FileSystem fs;
|
||||||
|
private String metaPath;
|
||||||
|
private Map<String, byte[]> readCommits = new HashMap<>();
|
||||||
|
|
||||||
public HoodieArchivedCommitTimeline(FileSystem fs, String metaPath) {
|
private final transient static Logger log = LogManager.getLogger(HoodieArchivedTimeline.class);
|
||||||
|
|
||||||
|
public HoodieArchivedTimeline(FileSystem fs, String metaPath) {
|
||||||
// Read back the commits to make sure
|
// Read back the commits to make sure
|
||||||
Path archiveLogPath = getArchiveLogPath(metaPath);
|
Path archiveLogPath = getArchiveLogPath(metaPath);
|
||||||
try {
|
try {
|
||||||
@@ -55,8 +67,9 @@ public class HoodieArchivedCommitTimeline extends HoodieDefaultTimeline {
|
|||||||
// This is okay because only tooling will load the archived commit timeline today
|
// This is okay because only tooling will load the archived commit timeline today
|
||||||
readCommits.put(key.toString(), Arrays.copyOf(val.getBytes(), val.getLength()));
|
readCommits.put(key.toString(), Arrays.copyOf(val.getBytes(), val.getLength()));
|
||||||
}
|
}
|
||||||
this.instants = new ArrayList<>(readCommits.keySet());
|
this.instants = readCommits.keySet().stream().map(
|
||||||
this.inflights = new ArrayList<>(0);
|
s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)).collect(
|
||||||
|
Collectors.toList());
|
||||||
} finally {
|
} finally {
|
||||||
Closeables.closeQuietly(reader);
|
Closeables.closeQuietly(reader);
|
||||||
}
|
}
|
||||||
@@ -64,58 +77,42 @@ public class HoodieArchivedCommitTimeline extends HoodieDefaultTimeline {
|
|||||||
throw new HoodieIOException(
|
throw new HoodieIOException(
|
||||||
"Could not load archived commit timeline from path " + archiveLogPath, e);
|
"Could not load archived commit timeline from path " + archiveLogPath, e);
|
||||||
}
|
}
|
||||||
|
// multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
|
||||||
|
this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
|
||||||
|
this.fs = fs;
|
||||||
|
this.metaPath = metaPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
/**
|
||||||
public void saveInstantAsInflight(String instant) {
|
* For serialization and de-serialization only.
|
||||||
throw new UnsupportedOperationException(
|
* @deprecated
|
||||||
"Could not save inflight instant in ArchivedTimeline " + instant);
|
*/
|
||||||
|
public HoodieArchivedTimeline() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
/**
|
||||||
public void saveInstantAsComplete(String instant, Optional<byte[]> data) {
|
* This method is only used when this object is deserialized in a spark executor.
|
||||||
throw new UnsupportedOperationException(
|
*
|
||||||
"Could not save instant as complete in ArchivedTimeline " + instant);
|
* @deprecated
|
||||||
|
*/
|
||||||
|
private void readObject(java.io.ObjectInputStream in)
|
||||||
|
throws IOException, ClassNotFoundException {
|
||||||
|
in.defaultReadObject();
|
||||||
|
this.fs = FSUtils.getFs();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void revertInstantToInflight(String instant) {
|
|
||||||
throw new UnsupportedOperationException(
|
|
||||||
"Could not revert instant in ArchivedTimeline " + instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void removeInflightFromTimeline(String instant) {
|
|
||||||
throw new UnsupportedOperationException(
|
|
||||||
"Could not delete inflight instant from ArchivedTimeline " + instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public HoodieTimeline reload() throws IOException {
|
|
||||||
return new HoodieArchivedCommitTimeline(fs, metaPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Optional<byte[]> readInstantDetails(String instant) {
|
|
||||||
return Optional.ofNullable(readCommits.get(instant));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getInflightFileName(String instant) {
|
|
||||||
throw new UnsupportedOperationException("No inflight filename for archived commits");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getCompletedFileName(String instant) {
|
|
||||||
throw new UnsupportedOperationException("No inflight filename for archived commits");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getTimelineName() {
|
|
||||||
return "archived-commits";
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Path getArchiveLogPath(String metaPath) {
|
public static Path getArchiveLogPath(String metaPath) {
|
||||||
return new Path(metaPath, HOODIE_COMMIT_ARCHIVE_LOG_FILE);
|
return new Path(metaPath, HOODIE_COMMIT_ARCHIVE_LOG_FILE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
|
||||||
|
return Optional.ofNullable(readCommits.get(instant.getTimestamp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public HoodieArchivedTimeline reload() {
|
||||||
|
return new HoodieArchivedTimeline(fs, metaPath);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -1,57 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.uber.hoodie.common.table.timeline;
|
|
||||||
|
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
public class HoodieCleanerTimeline extends HoodieDefaultTimeline {
|
|
||||||
public HoodieCleanerTimeline(FileSystem fs, String path) {
|
|
||||||
super(fs, path, HoodieTableMetaClient.CLEAN_EXTENSION);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public HoodieTimeline reload() throws IOException {
|
|
||||||
return new HoodieCleanerTimeline(fs, metaPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Optional<byte[]> readInstantDetails(String instant) {
|
|
||||||
// TODO - Nothing about the clean written today - this should change
|
|
||||||
return Optional.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getInflightFileName(String instant) {
|
|
||||||
return HoodieTableMetaClient.makeInflightCleanerFileName(instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getCompletedFileName(String instant) {
|
|
||||||
return HoodieTableMetaClient.makeCleanerFileName(instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getTimelineName() {
|
|
||||||
return "cleaner";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -16,329 +16,131 @@
|
|||||||
|
|
||||||
package com.uber.hoodie.common.table.timeline;
|
package com.uber.hoodie.common.table.timeline;
|
||||||
|
|
||||||
import com.google.common.io.Closeables;
|
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
|
||||||
import com.uber.hoodie.exception.HoodieIOException;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* HoodieTimeline allows representation of meta-data events as a timeline.
|
* HoodieDefaultTimeline is a default implementation of the HoodieTimeline.
|
||||||
* Instants are specific points in time represented as strings.
|
* It provides methods to inspect a List[HoodieInstant]. Function to get the details of the instant
|
||||||
* in this format YYYYMMDDHHmmSS. e.g. 20170101193218
|
* is passed in as a lamdba.
|
||||||
* Any operation on the timeline starts with the inflight instant and then when complete marks
|
|
||||||
* the completed instant and removes the inflight instant.
|
|
||||||
* Completed instants are plainly referred to as just instants
|
|
||||||
* <p>
|
|
||||||
* Timelines as immutable once created. Any operation to change the timeline (like create/delete instants)
|
|
||||||
* will not be reflected unless explicitly reloaded using the reload()
|
|
||||||
*
|
*
|
||||||
* @see com.uber.hoodie.common.table.HoodieTableMetaClient
|
|
||||||
* @see HoodieTimeline
|
* @see HoodieTimeline
|
||||||
* @since 0.3.0
|
|
||||||
*/
|
*/
|
||||||
public abstract class HoodieDefaultTimeline implements HoodieTimeline {
|
public class HoodieDefaultTimeline implements HoodieTimeline {
|
||||||
private final transient static Logger log = LogManager.getLogger(HoodieDefaultTimeline.class);
|
private final transient static Logger log = LogManager.getLogger(HoodieDefaultTimeline.class);
|
||||||
|
|
||||||
public static final String INFLIGHT_EXTENSION = ".inflight";
|
protected Function<HoodieInstant, Optional<byte[]>> details;
|
||||||
protected String metaPath;
|
protected List<HoodieInstant> instants;
|
||||||
protected transient FileSystem fs;
|
|
||||||
protected List<String> inflights;
|
|
||||||
protected List<String> instants;
|
|
||||||
|
|
||||||
public HoodieDefaultTimeline(FileSystem fs, String metaPath, String fileExtension) {
|
public HoodieDefaultTimeline(Stream<HoodieInstant> instants,
|
||||||
String completedInstantExtension = fileExtension;
|
Function<HoodieInstant, Optional<byte[]>> details) {
|
||||||
String inflightInstantExtension = fileExtension + INFLIGHT_EXTENSION;
|
|
||||||
|
|
||||||
FileStatus[] fileStatuses;
|
|
||||||
try {
|
|
||||||
fileStatuses = HoodieTableMetaClient.scanFiles(fs, new Path(metaPath),
|
|
||||||
path -> path.toString().endsWith(completedInstantExtension) || path.toString()
|
|
||||||
.endsWith(inflightInstantExtension));
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException("Failed to scan metadata", e);
|
|
||||||
}
|
|
||||||
this.instants = Arrays.stream(fileStatuses)
|
|
||||||
.filter(status -> status.getPath().getName().endsWith(completedInstantExtension))
|
|
||||||
.map(fileStatus -> fileStatus.getPath().getName().replaceAll(completedInstantExtension, ""))
|
|
||||||
.sorted().collect(Collectors.toList());
|
|
||||||
this.inflights = Arrays.stream(fileStatuses).filter(
|
|
||||||
status -> status.getPath().getName().endsWith(inflightInstantExtension)).map(
|
|
||||||
fileStatus -> fileStatus.getPath().getName()
|
|
||||||
.replaceAll(inflightInstantExtension, "")).sorted()
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
this.fs = fs;
|
|
||||||
this.metaPath = metaPath;
|
|
||||||
}
|
|
||||||
|
|
||||||
public HoodieDefaultTimeline(Stream<String> instants, Stream<String> inflights) {
|
|
||||||
this.instants = instants.collect(Collectors.toList());
|
this.instants = instants.collect(Collectors.toList());
|
||||||
this.inflights = inflights.collect(Collectors.toList());
|
this.details = details;
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This constructor only supports backwards compatibility in inflight commits in ActiveCommitTimeline.
|
|
||||||
* This should never be used.
|
|
||||||
*
|
|
||||||
* @param fs
|
|
||||||
* @param metaPath
|
|
||||||
* @deprecated
|
|
||||||
*/
|
|
||||||
public HoodieDefaultTimeline(FileSystem fs, String metaPath) {
|
|
||||||
this.fs = fs;
|
|
||||||
this.metaPath = metaPath;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For serailizing and de-serializing
|
* For serailizing and de-serializing
|
||||||
|
*
|
||||||
* @deprecated
|
* @deprecated
|
||||||
*/
|
*/
|
||||||
public HoodieDefaultTimeline() {
|
public HoodieDefaultTimeline() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public HoodieTimeline filterInflights() {
|
||||||
|
return new HoodieDefaultTimeline(instants.stream().filter(HoodieInstant::isInflight),
|
||||||
|
details);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
public HoodieTimeline filterCompletedInstants() {
|
||||||
* This method is only used when this object is deserialized in a spark executor.
|
return new HoodieDefaultTimeline(instants.stream().filter(s -> !s.isInflight()), details);
|
||||||
* @deprecated
|
|
||||||
*/
|
|
||||||
private void readObject(java.io.ObjectInputStream in)
|
|
||||||
throws IOException, ClassNotFoundException {
|
|
||||||
in.defaultReadObject();
|
|
||||||
this.fs = FSUtils.getFs();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stream<String> findInstantsInRange(String startTs, String endTs) {
|
public HoodieDefaultTimeline findInstantsInRange(String startTs, String endTs) {
|
||||||
return instants.stream().filter(
|
return new HoodieDefaultTimeline(instants.stream().filter(
|
||||||
s -> compareInstants(s, startTs, GREATER) && compareInstants(s, endTs,
|
s -> compareTimestamps(s.getTimestamp(), startTs, GREATER) && compareTimestamps(
|
||||||
LESSER_OR_EQUAL));
|
s.getTimestamp(), endTs, LESSER_OR_EQUAL)), details);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stream<String> findInstantsAfter(String commitTime, int numCommits) {
|
public HoodieDefaultTimeline findInstantsAfter(String commitTime, int numCommits) {
|
||||||
return instants.stream().filter(s -> compareInstants(s, commitTime, GREATER))
|
return new HoodieDefaultTimeline(
|
||||||
.limit(numCommits);
|
instants.stream().filter(s -> compareTimestamps(s.getTimestamp(), commitTime, GREATER))
|
||||||
|
.limit(numCommits), details);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean hasInstants() {
|
public boolean empty() {
|
||||||
return instants.stream().count() != 0;
|
return !instants.stream().findFirst().isPresent();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean hasInflightInstants() {
|
public int countInstants() {
|
||||||
return inflights.stream().count() != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getTotalInstants() {
|
|
||||||
return new Long(instants.stream().count()).intValue();
|
return new Long(instants.stream().count()).intValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Optional<String> firstInstant() {
|
public Optional<HoodieInstant> firstInstant() {
|
||||||
return instants.stream().findFirst();
|
return instants.stream().findFirst();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Optional<String> nthInstant(int n) {
|
public Optional<HoodieInstant> nthInstant(int n) {
|
||||||
if(!hasInstants() || n >= getTotalInstants()) {
|
if (empty() || n >= countInstants()) {
|
||||||
return Optional.empty();
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
return Optional.of(instants.get(n));
|
return Optional.of(instants.get(n));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Optional<String> lastInstant() {
|
public Optional<HoodieInstant> lastInstant() {
|
||||||
return hasInstants() ? nthInstant(getTotalInstants() - 1) : Optional.empty();
|
return empty() ? Optional.empty() : nthInstant(countInstants() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Optional<String> nthFromLastInstant(int n) {
|
public Optional<HoodieInstant> nthFromLastInstant(int n) {
|
||||||
if(getTotalInstants() < n + 1) {
|
if (countInstants() < n + 1) {
|
||||||
return Optional.empty();
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
return nthInstant(getTotalInstants() - 1 - n);
|
return nthInstant(countInstants() - 1 - n);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean containsInstant(String instant) {
|
public boolean containsInstant(HoodieInstant instant) {
|
||||||
return instants.stream().anyMatch(s -> s.equals(instant));
|
return instants.stream().anyMatch(s -> s.equals(instant));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean containsOrBeforeTimelineStarts(String instant) {
|
public boolean containsOrBeforeTimelineStarts(String instant) {
|
||||||
return containsInstant(instant) || isInstantBeforeTimelineStarts(instant);
|
return instants.stream().anyMatch(s -> s.getTimestamp().equals(instant))
|
||||||
|
|| isBeforeTimelineStarts(instant);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stream<String> getInstants() {
|
public Stream<HoodieInstant> getInstants() {
|
||||||
return instants.stream();
|
return instants.stream();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stream<String> getInflightInstants() {
|
public boolean isBeforeTimelineStarts(String instant) {
|
||||||
return inflights.stream();
|
Optional<HoodieInstant> firstCommit = firstInstant();
|
||||||
|
return firstCommit.isPresent() && compareTimestamps(instant,
|
||||||
|
firstCommit.get().getTimestamp(), LESSER);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isInstantBeforeTimelineStarts(String instant) {
|
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
|
||||||
Optional<String> firstCommit = firstInstant();
|
return details.apply(instant);
|
||||||
return firstCommit.isPresent() && compareInstants(instant, firstCommit.get(), LESSER);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void saveInstantAsInflight(String instant) {
|
|
||||||
log.info("Creating a new in-flight " + getTimelineName() + " " + instant);
|
|
||||||
// Create the in-flight file
|
|
||||||
createFileInMetaPath(getInflightFileName(instant), Optional.empty());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void saveInstantAsComplete(String instant, Optional<byte[]> data) {
|
|
||||||
log.info("Marking complete " + getTimelineName() + " " + instant);
|
|
||||||
moveInflightToComplete(instant, data, getCompletedFileName(instant),
|
|
||||||
HoodieTableMetaClient.makeInflightCommitFileName(instant));
|
|
||||||
log.info("Completed " + getTimelineName() + " " + instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void revertInstantToInflight(String instant) {
|
|
||||||
log.info("Reverting instant to inflight " + getTimelineName() + " " + instant);
|
|
||||||
moveCompleteToInflight(instant, getCompletedFileName(instant),
|
|
||||||
getInflightFileName(instant));
|
|
||||||
log.info("Reverted " + getTimelineName() + " " + instant + " to inflight");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void removeInflightFromTimeline(String instant) {
|
|
||||||
log.info("Removing in-flight " + getTimelineName() + " " + instant);
|
|
||||||
String inFlightCommitFileName = getInflightFileName(instant);
|
|
||||||
Path inFlightCommitFilePath = new Path(metaPath, inFlightCommitFileName);
|
|
||||||
try {
|
|
||||||
fs.delete(inFlightCommitFilePath, false);
|
|
||||||
log.info("Removed in-flight " + getTimelineName() + " " + instant);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException(
|
|
||||||
"Could not remove inflight commit " + inFlightCommitFilePath, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Optional<byte[]> readInstantDetails(String instant) {
|
|
||||||
Path detailPath = new Path(metaPath, getCompletedFileName(instant));
|
|
||||||
return readDataFromPath(detailPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the in-flight instant file name
|
|
||||||
*
|
|
||||||
* @param instant
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
protected abstract String getInflightFileName(String instant);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the completed instant file name
|
|
||||||
*
|
|
||||||
* @param instant
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
protected abstract String getCompletedFileName(String instant);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the timeline name
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
protected abstract String getTimelineName();
|
|
||||||
|
|
||||||
|
|
||||||
protected void moveInflightToComplete(String instant, Optional<byte[]> data,
|
|
||||||
String commitFileName, String inflightFileName) {
|
|
||||||
Path commitFilePath = new Path(metaPath, commitFileName);
|
|
||||||
try {
|
|
||||||
// open a new file and write the commit metadata in
|
|
||||||
Path inflightCommitFile = new Path(metaPath, inflightFileName);
|
|
||||||
createFileInMetaPath(inflightFileName, data);
|
|
||||||
boolean success = fs.rename(inflightCommitFile, commitFilePath);
|
|
||||||
if (!success) {
|
|
||||||
throw new HoodieIOException(
|
|
||||||
"Could not rename " + inflightCommitFile + " to " + commitFilePath);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException("Could not complete commit " + instant, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void moveCompleteToInflight(String instant, String commitFileName,
|
|
||||||
String inflightFileName) {
|
|
||||||
Path inFlightCommitFilePath = new Path(metaPath, inflightFileName);
|
|
||||||
try {
|
|
||||||
if (!fs.exists(inFlightCommitFilePath)) {
|
|
||||||
Path commitFilePath = new Path(metaPath, commitFileName);
|
|
||||||
boolean success = fs.rename(commitFilePath, inFlightCommitFilePath);
|
|
||||||
if (!success) {
|
|
||||||
throw new HoodieIOException(
|
|
||||||
"Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException("Could not complete commit revert " + instant, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void createFileInMetaPath(String filename, Optional<byte[]> content) {
|
|
||||||
Path fullPath = new Path(metaPath, filename);
|
|
||||||
try {
|
|
||||||
if (!content.isPresent()) {
|
|
||||||
if (fs.createNewFile(fullPath)) {
|
|
||||||
log.info("Created a new file in meta path: " + fullPath);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
FSDataOutputStream fsout = fs.create(fullPath, true);
|
|
||||||
fsout.write(content.get());
|
|
||||||
fsout.close();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
throw new HoodieIOException("Failed to create file " + fullPath);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException("Failed to create file " + fullPath, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected Optional<byte[]> readDataFromPath(Path detailPath) {
|
|
||||||
FSDataInputStream is = null;
|
|
||||||
try {
|
|
||||||
is = fs.open(detailPath);
|
|
||||||
return Optional.of(IOUtils.toByteArray(is));
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException("Could not read commit details from " + detailPath, e);
|
|
||||||
} finally {
|
|
||||||
if (is != null) {
|
|
||||||
Closeables.closeQuietly(is);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return this.getClass().getName() + ": " + instants.stream().map(Object::toString)
|
return this.getClass().getName() + ": " + instants.stream().map(Object::toString)
|
||||||
|
|||||||
@@ -0,0 +1,121 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.uber.hoodie.common.table.timeline;
|
||||||
|
|
||||||
|
import com.google.common.io.Files;
|
||||||
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A Hoodie Instant represents a action done on a hoodie dataset.
|
||||||
|
* All actions start with a inflight instant and then create a completed instant after done.
|
||||||
|
*
|
||||||
|
* @see HoodieTimeline
|
||||||
|
*/
|
||||||
|
public class HoodieInstant implements Serializable {
|
||||||
|
private boolean isInflight = false;
|
||||||
|
private String action;
|
||||||
|
private String timestamp;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load the instant from the meta FileStatus
|
||||||
|
* @param fileStatus
|
||||||
|
*/
|
||||||
|
public HoodieInstant(FileStatus fileStatus) {
|
||||||
|
// First read the instant timestamp. [==>20170101193025<==].commit
|
||||||
|
String fileName = fileStatus.getPath().getName();
|
||||||
|
String fileExtension = Files.getFileExtension(fileName);
|
||||||
|
timestamp = fileName.replace("." + fileExtension, "");
|
||||||
|
|
||||||
|
// Next read the action for this marker
|
||||||
|
action = fileExtension;
|
||||||
|
if(action.equals("inflight")) {
|
||||||
|
// This is to support backwards compatibility on how in-flight commit files were written
|
||||||
|
// General rule is inflight extension is .<action>.inflight, but for commit it is .inflight
|
||||||
|
action = "commit";
|
||||||
|
isInflight = true;
|
||||||
|
} else if (action.contains(HoodieTimeline.INFLIGHT_EXTENSION)) {
|
||||||
|
isInflight = true;
|
||||||
|
action = action.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public HoodieInstant(boolean isInflight, String action, String timestamp) {
|
||||||
|
this.isInflight = isInflight;
|
||||||
|
this.action = action;
|
||||||
|
this.timestamp = timestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isInflight() {
|
||||||
|
return isInflight;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAction() {
|
||||||
|
return action;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTimestamp() {
|
||||||
|
return timestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the filename for this instant
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public String getFileName() {
|
||||||
|
if (HoodieTimeline.COMMIT_ACTION.equals(action)) {
|
||||||
|
return isInflight ?
|
||||||
|
HoodieTimeline.makeInflightCommitFileName(timestamp) :
|
||||||
|
HoodieTimeline.makeCommitFileName(timestamp);
|
||||||
|
} else if (HoodieTimeline.CLEAN_ACTION.equals(action)) {
|
||||||
|
return isInflight ?
|
||||||
|
HoodieTimeline.makeInflightCleanerFileName(timestamp) :
|
||||||
|
HoodieTimeline.makeCleanerFileName(timestamp);
|
||||||
|
} else if (HoodieTimeline.SAVEPOINT_ACTION.equals(action)) {
|
||||||
|
return isInflight ?
|
||||||
|
HoodieTimeline.makeInflightSavePointFileName(timestamp) :
|
||||||
|
HoodieTimeline.makeSavePointFileName(timestamp);
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Cannot get file name for unknown action " + action);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o)
|
||||||
|
return true;
|
||||||
|
if (o == null || getClass() != o.getClass())
|
||||||
|
return false;
|
||||||
|
HoodieInstant that = (HoodieInstant) o;
|
||||||
|
return isInflight == that.isInflight &&
|
||||||
|
Objects.equals(action, that.action) &&
|
||||||
|
Objects.equals(timestamp, that.timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(isInflight, action, timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "[" + ((isInflight) ? "==>" : "") + timestamp + "__" + action + "]";
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.uber.hoodie.common.table.timeline;
|
|
||||||
|
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
public class HoodieSavePointTimeline extends HoodieDefaultTimeline {
|
|
||||||
public HoodieSavePointTimeline(FileSystem fs, String metaPath) {
|
|
||||||
super(fs, metaPath, HoodieTableMetaClient.SAVEPOINT_EXTENSION);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public HoodieTimeline reload() throws IOException {
|
|
||||||
return new HoodieSavePointTimeline(fs, metaPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getInflightFileName(String instant) {
|
|
||||||
return HoodieTableMetaClient.makeInflightSavePointFileName(instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getCompletedFileName(String instant) {
|
|
||||||
return HoodieTableMetaClient.makeSavePointFileName(instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getTimelineName() {
|
|
||||||
return "savepoint";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -20,6 +20,7 @@ import com.uber.hoodie.common.model.HoodieDataFile;
|
|||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.exception.HoodieIOException;
|
import com.uber.hoodie.exception.HoodieIOException;
|
||||||
import org.apache.commons.lang3.tuple.Pair;
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
@@ -38,7 +39,7 @@ import java.util.stream.Stream;
|
|||||||
/**
|
/**
|
||||||
* Common abstract implementation for multiple TableFileSystemView Implementations.
|
* Common abstract implementation for multiple TableFileSystemView Implementations.
|
||||||
* 2 possible implementations are ReadOptimizedView and RealtimeView
|
* 2 possible implementations are ReadOptimizedView and RealtimeView
|
||||||
*
|
* <p>
|
||||||
* Concrete implementations extending this abstract class, should only implement
|
* Concrete implementations extending this abstract class, should only implement
|
||||||
* listDataFilesInPartition which includes files to be included in the view
|
* listDataFilesInPartition which includes files to be included in the view
|
||||||
*
|
*
|
||||||
@@ -54,24 +55,26 @@ public abstract class AbstractTableFileSystemView implements TableFileSystemView
|
|||||||
public AbstractTableFileSystemView(FileSystem fs, HoodieTableMetaClient metaClient) {
|
public AbstractTableFileSystemView(FileSystem fs, HoodieTableMetaClient metaClient) {
|
||||||
this.metaClient = metaClient;
|
this.metaClient = metaClient;
|
||||||
this.fs = fs;
|
this.fs = fs;
|
||||||
this.activeCommitTimeline = metaClient.getActiveCommitTimeline();
|
// Get the active timeline and filter only completed commits
|
||||||
|
this.activeCommitTimeline =
|
||||||
|
metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Stream<HoodieDataFile> getLatestDataFilesForFileId(final String partitionPath,
|
public Stream<HoodieDataFile> getLatestDataFilesForFileId(final String partitionPath,
|
||||||
String fileId) {
|
String fileId) {
|
||||||
Optional<String> lastInstant = activeCommitTimeline.lastInstant();
|
Optional<HoodieInstant> lastInstant = activeCommitTimeline.lastInstant();
|
||||||
if (lastInstant.isPresent()) {
|
if (lastInstant.isPresent()) {
|
||||||
return streamLatestVersionInPartition(partitionPath, lastInstant.get())
|
return getLatestVersionInPartition(partitionPath, lastInstant.get().getTimestamp())
|
||||||
.filter(hoodieDataFile -> hoodieDataFile.getFileId().equals(fileId));
|
.filter(hoodieDataFile -> hoodieDataFile.getFileId().equals(fileId));
|
||||||
}
|
}
|
||||||
return Stream.empty();
|
return Stream.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stream<HoodieDataFile> streamLatestVersionInPartition(String partitionPathStr,
|
public Stream<HoodieDataFile> getLatestVersionInPartition(String partitionPathStr,
|
||||||
String maxCommitTime) {
|
String maxCommitTime) {
|
||||||
try {
|
try {
|
||||||
return streamLatestVersionsBeforeOrOn(listDataFilesInPartition(partitionPathStr),
|
return getLatestVersionsBeforeOrOn(listDataFilesInPartition(partitionPathStr),
|
||||||
maxCommitTime);
|
maxCommitTime);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new HoodieIOException(
|
throw new HoodieIOException(
|
||||||
@@ -81,11 +84,11 @@ public abstract class AbstractTableFileSystemView implements TableFileSystemView
|
|||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stream<List<HoodieDataFile>> streamEveryVersionInPartition(String partitionPath) {
|
public Stream<List<HoodieDataFile>> getEveryVersionInPartition(String partitionPath) {
|
||||||
try {
|
try {
|
||||||
if (activeCommitTimeline.lastInstant().isPresent()) {
|
if (activeCommitTimeline.lastInstant().isPresent()) {
|
||||||
return streamFilesByFileId(listDataFilesInPartition(partitionPath),
|
return getFilesByFileId(listDataFilesInPartition(partitionPath),
|
||||||
activeCommitTimeline.lastInstant().get());
|
activeCommitTimeline.lastInstant().get().getTimestamp());
|
||||||
}
|
}
|
||||||
return Stream.empty();
|
return Stream.empty();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
@@ -98,13 +101,14 @@ public abstract class AbstractTableFileSystemView implements TableFileSystemView
|
|||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stream<HoodieDataFile> streamLatestVersionInRange(FileStatus[] fileStatuses,
|
public Stream<HoodieDataFile> getLatestVersionInRange(FileStatus[] fileStatuses,
|
||||||
List<String> commitsToReturn) {
|
List<String> commitsToReturn) {
|
||||||
if (!activeCommitTimeline.hasInstants() || commitsToReturn.isEmpty()) {
|
if (activeCommitTimeline.empty() || commitsToReturn.isEmpty()) {
|
||||||
return Stream.empty();
|
return Stream.empty();
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
return streamFilesByFileId(fileStatuses, activeCommitTimeline.lastInstant().get())
|
return getFilesByFileId(fileStatuses,
|
||||||
|
activeCommitTimeline.lastInstant().get().getTimestamp())
|
||||||
.map((Function<List<HoodieDataFile>, Optional<HoodieDataFile>>) fss -> {
|
.map((Function<List<HoodieDataFile>, Optional<HoodieDataFile>>) fss -> {
|
||||||
for (HoodieDataFile fs : fss) {
|
for (HoodieDataFile fs : fss) {
|
||||||
if (commitsToReturn.contains(fs.getCommitTime())) {
|
if (commitsToReturn.contains(fs.getCommitTime())) {
|
||||||
@@ -120,17 +124,18 @@ public abstract class AbstractTableFileSystemView implements TableFileSystemView
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stream<HoodieDataFile> streamLatestVersionsBeforeOrOn(FileStatus[] fileStatuses,
|
public Stream<HoodieDataFile> getLatestVersionsBeforeOrOn(FileStatus[] fileStatuses,
|
||||||
String maxCommitToReturn) {
|
String maxCommitToReturn) {
|
||||||
try {
|
try {
|
||||||
if (!activeCommitTimeline.hasInstants()) {
|
if (activeCommitTimeline.empty()) {
|
||||||
return Stream.empty();
|
return Stream.empty();
|
||||||
}
|
}
|
||||||
return streamFilesByFileId(fileStatuses, activeCommitTimeline.lastInstant().get())
|
return getFilesByFileId(fileStatuses,
|
||||||
|
activeCommitTimeline.lastInstant().get().getTimestamp())
|
||||||
.map((Function<List<HoodieDataFile>, Optional<HoodieDataFile>>) fss -> {
|
.map((Function<List<HoodieDataFile>, Optional<HoodieDataFile>>) fss -> {
|
||||||
for (HoodieDataFile fs1 : fss) {
|
for (HoodieDataFile fs1 : fss) {
|
||||||
if (activeCommitTimeline
|
if (activeCommitTimeline
|
||||||
.compareInstants(fs1.getCommitTime(), maxCommitToReturn,
|
.compareTimestamps(fs1.getCommitTime(), maxCommitToReturn,
|
||||||
HoodieTimeline.LESSER_OR_EQUAL)) {
|
HoodieTimeline.LESSER_OR_EQUAL)) {
|
||||||
return Optional.of(fs1);
|
return Optional.of(fs1);
|
||||||
}
|
}
|
||||||
@@ -143,19 +148,20 @@ public abstract class AbstractTableFileSystemView implements TableFileSystemView
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stream<HoodieDataFile> streamLatestVersions(FileStatus[] fileStatuses) {
|
public Stream<HoodieDataFile> getLatestVersions(FileStatus[] fileStatuses) {
|
||||||
try {
|
try {
|
||||||
if (!activeCommitTimeline.hasInstants()) {
|
if (activeCommitTimeline.empty()) {
|
||||||
return Stream.empty();
|
return Stream.empty();
|
||||||
}
|
}
|
||||||
return streamFilesByFileId(fileStatuses, activeCommitTimeline.lastInstant().get())
|
return getFilesByFileId(fileStatuses,
|
||||||
|
activeCommitTimeline.lastInstant().get().getTimestamp())
|
||||||
.map(statuses -> statuses.get(0));
|
.map(statuses -> statuses.get(0));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new HoodieIOException("Could not filter files for latest version ", e);
|
throw new HoodieIOException("Could not filter files for latest version ", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Stream<List<HoodieDataFile>> streamFilesByFileId(FileStatus[] files,
|
protected Stream<List<HoodieDataFile>> getFilesByFileId(FileStatus[] files,
|
||||||
String maxCommitTime) throws IOException {
|
String maxCommitTime) throws IOException {
|
||||||
return groupFilesByFileId(files, maxCommitTime).values().stream();
|
return groupFilesByFileId(files, maxCommitTime).values().stream();
|
||||||
}
|
}
|
||||||
@@ -173,7 +179,7 @@ public abstract class AbstractTableFileSystemView implements TableFileSystemView
|
|||||||
return Arrays.stream(files).flatMap(fileStatus -> {
|
return Arrays.stream(files).flatMap(fileStatus -> {
|
||||||
HoodieDataFile dataFile = new HoodieDataFile(fileStatus);
|
HoodieDataFile dataFile = new HoodieDataFile(fileStatus);
|
||||||
if (activeCommitTimeline.containsOrBeforeTimelineStarts(dataFile.getCommitTime())
|
if (activeCommitTimeline.containsOrBeforeTimelineStarts(dataFile.getCommitTime())
|
||||||
&& activeCommitTimeline.compareInstants(dataFile.getCommitTime(), maxCommitTime,
|
&& activeCommitTimeline.compareTimestamps(dataFile.getCommitTime(), maxCommitTime,
|
||||||
HoodieTimeline.LESSER_OR_EQUAL)) {
|
HoodieTimeline.LESSER_OR_EQUAL)) {
|
||||||
return Stream.of(Pair.of(dataFile.getFileId(), dataFile));
|
return Stream.of(Pair.of(dataFile.getFileId(), dataFile));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ public class ReadOptimizedTableView extends AbstractTableFileSystemView {
|
|||||||
Path partitionPath = new Path(metaClient.getBasePath(), partitionPathStr);
|
Path partitionPath = new Path(metaClient.getBasePath(), partitionPathStr);
|
||||||
try {
|
try {
|
||||||
return fs.listStatus(partitionPath, path -> path.getName()
|
return fs.listStatus(partitionPath, path -> path.getName()
|
||||||
.contains(metaClient.getTableConfig().getROStorageFormat().getFileExtension()));
|
.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension()));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new HoodieIOException(
|
throw new HoodieIOException(
|
||||||
"Failed to list data files in partition " + partitionPathStr, e);
|
"Failed to list data files in partition " + partitionPathStr, e);
|
||||||
|
|||||||
@@ -16,6 +16,10 @@
|
|||||||
|
|
||||||
package com.uber.hoodie.common.util;
|
package com.uber.hoodie.common.util;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.exception.HoodieIOException;
|
import com.uber.hoodie.exception.HoodieIOException;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
@@ -24,6 +28,7 @@ import org.apache.hadoop.fs.Path;
|
|||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@@ -91,18 +96,27 @@ public class FSUtils {
|
|||||||
/**
|
/**
|
||||||
* Obtain all the partition paths, that are present in this table.
|
* Obtain all the partition paths, that are present in this table.
|
||||||
*/
|
*/
|
||||||
public static List<String> getAllPartitionPaths(FileSystem fs, String basePath) throws IOException {
|
public static List<String> getAllPartitionPaths(FileSystem fs, String basePath)
|
||||||
|
throws IOException {
|
||||||
List<String> partitionsToClean = new ArrayList<>();
|
List<String> partitionsToClean = new ArrayList<>();
|
||||||
// TODO(vc): For now, assume partitions are two levels down from base path.
|
// TODO(vc): For now, assume partitions are two levels down from base path.
|
||||||
FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"));
|
FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"));
|
||||||
for (FileStatus status : folders) {
|
for (FileStatus status : folders) {
|
||||||
Path path = status.getPath();
|
Path path = status.getPath();
|
||||||
partitionsToClean.add(String.format("%s/%s/%s",
|
partitionsToClean.add(String.format("%s/%s/%s", path.getParent().getParent().getName(),
|
||||||
path.getParent().getParent().getName(),
|
path.getParent().getName(), path.getName()));
|
||||||
path.getParent().getName(),
|
|
||||||
path.getName()));
|
|
||||||
}
|
}
|
||||||
return partitionsToClean;
|
return partitionsToClean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String getFileExtension(String fullName) {
|
||||||
|
Preconditions.checkNotNull(fullName);
|
||||||
|
String fileName = (new File(fullName)).getName();
|
||||||
|
int dotIndex = fileName.indexOf('.');
|
||||||
|
return dotIndex == -1 ? "" : fileName.substring(dotIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getInstantTime(String name) {
|
||||||
|
return name.replace(getFileExtension(name), "");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,8 +19,10 @@ package com.uber.hoodie.common.model;
|
|||||||
import com.esotericsoftware.kryo.Kryo;
|
import com.esotericsoftware.kryo.Kryo;
|
||||||
import com.esotericsoftware.kryo.io.Input;
|
import com.esotericsoftware.kryo.io.Input;
|
||||||
import com.esotericsoftware.kryo.io.Output;
|
import com.esotericsoftware.kryo.io.Output;
|
||||||
|
import com.esotericsoftware.kryo.serializers.JavaSerializer;
|
||||||
import com.uber.hoodie.common.table.HoodieTableConfig;
|
import com.uber.hoodie.common.table.HoodieTableConfig;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
@@ -30,6 +32,8 @@ import java.io.ByteArrayInputStream;
|
|||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.ObjectInputStream;
|
||||||
|
import java.io.ObjectOutputStream;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
@@ -66,13 +70,13 @@ public class HoodieTestUtils {
|
|||||||
|
|
||||||
public static final void createCommitFiles(String basePath, String... commitTimes) throws IOException {
|
public static final void createCommitFiles(String basePath, String... commitTimes) throws IOException {
|
||||||
for (String commitTime: commitTimes) {
|
for (String commitTime: commitTimes) {
|
||||||
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + HoodieTableMetaClient.makeCommitFileName(commitTime)).createNewFile();
|
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + HoodieTimeline.makeCommitFileName(commitTime)).createNewFile();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final void createInflightCommitFiles(String basePath, String... commitTimes) throws IOException {
|
public static final void createInflightCommitFiles(String basePath, String... commitTimes) throws IOException {
|
||||||
for (String commitTime: commitTimes) {
|
for (String commitTime: commitTimes) {
|
||||||
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + HoodieTableMetaClient.makeInflightCommitFileName(commitTime)).createNewFile();
|
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + HoodieTimeline.makeInflightCommitFileName(commitTime)).createNewFile();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -97,15 +101,15 @@ public class HoodieTestUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static final boolean doesCommitExist(String basePath, String commitTime) {
|
public static final boolean doesCommitExist(String basePath, String commitTime) {
|
||||||
return new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + commitTime + HoodieTableMetaClient.COMMIT_EXTENSION).exists();
|
return new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + commitTime + HoodieTimeline.COMMIT_EXTENSION).exists();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final boolean doesInflightExist(String basePath, String commitTime) {
|
public static final boolean doesInflightExist(String basePath, String commitTime) {
|
||||||
return new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + commitTime + HoodieTableMetaClient.INFLIGHT_FILE_SUFFIX).exists();
|
return new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + commitTime + HoodieTimeline.INFLIGHT_EXTENSION).exists();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String makeInflightTestFileName(String instant) {
|
public static String makeInflightTestFileName(String instant) {
|
||||||
return instant + TEST_EXTENSION + HoodieTableMetaClient.INFLIGHT_FILE_SUFFIX;
|
return instant + TEST_EXTENSION + HoodieTimeline.INFLIGHT_EXTENSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String makeTestFileName(String instant) {
|
public static String makeTestFileName(String instant) {
|
||||||
@@ -123,9 +127,12 @@ public class HoodieTestUtils {
|
|||||||
assert !iter1.hasNext() && !iter2.hasNext();
|
assert !iter1.hasNext() && !iter2.hasNext();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends Serializable> T serializeDeserialize(T object, Class<T> clazz) {
|
public static <T extends Serializable> T serializeDeserialize(T object, Class<T> clazz)
|
||||||
|
throws IOException, ClassNotFoundException {
|
||||||
// Using Kyro as the default serializer in Spark Jobs
|
// Using Kyro as the default serializer in Spark Jobs
|
||||||
Kryo kryo = new Kryo();
|
Kryo kryo = new Kryo();
|
||||||
|
kryo.register(HoodieTableMetaClient.class, new JavaSerializer());
|
||||||
|
|
||||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||||
Output output = new Output(baos);
|
Output output = new Output(baos);
|
||||||
kryo.writeObject(output, object);
|
kryo.writeObject(output, object);
|
||||||
|
|||||||
@@ -16,14 +16,12 @@
|
|||||||
|
|
||||||
package com.uber.hoodie.common.table;
|
package com.uber.hoodie.common.table;
|
||||||
|
|
||||||
import com.esotericsoftware.kryo.Kryo;
|
|
||||||
import com.esotericsoftware.kryo.io.Input;
|
|
||||||
import com.esotericsoftware.kryo.io.Output;
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.uber.hoodie.common.model.HoodieTestUtils;
|
import com.uber.hoodie.common.model.HoodieTestUtils;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieArchivedCommitTimeline;
|
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieArchivedTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.io.ArrayFile;
|
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.io.SequenceFile;
|
import org.apache.hadoop.io.SequenceFile;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
@@ -31,13 +29,15 @@ import org.junit.Before;
|
|||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.rules.TemporaryFolder;
|
import org.junit.rules.TemporaryFolder;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.assertArrayEquals;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
public class HoodieTableMetaClientTest {
|
public class HoodieTableMetaClientTest {
|
||||||
private HoodieTableMetaClient metaClient;
|
private HoodieTableMetaClient metaClient;
|
||||||
@@ -61,48 +61,53 @@ public class HoodieTableMetaClientTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void checkSerDe() throws IOException {
|
public void checkSerDe() throws IOException, ClassNotFoundException {
|
||||||
// check if this object is serialized and se-serialized, we are able to read from the file system
|
// check if this object is serialized and de-serialized, we are able to read from the file system
|
||||||
HoodieTableMetaClient deseralizedMetaClient =
|
HoodieTableMetaClient deseralizedMetaClient =
|
||||||
HoodieTestUtils.serializeDeserialize(metaClient, HoodieTableMetaClient.class);
|
HoodieTestUtils.serializeDeserialize(metaClient, HoodieTableMetaClient.class);
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
assertNotNull(deseralizedMetaClient);
|
||||||
commitTimeline.saveInstantAsInflight("1");
|
HoodieActiveTimeline commitTimeline = deseralizedMetaClient.getActiveTimeline();
|
||||||
commitTimeline.saveInstantAsComplete("1", Optional.of("test-detail".getBytes()));
|
HoodieInstant instant =
|
||||||
|
new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
|
||||||
|
commitTimeline.createInflight(instant);
|
||||||
|
commitTimeline.saveAsComplete(instant, Optional.of("test-detail".getBytes()));
|
||||||
commitTimeline = commitTimeline.reload();
|
commitTimeline = commitTimeline.reload();
|
||||||
assertEquals("Commit should be 1", "1", commitTimeline.getInstants().findFirst().get());
|
HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant);
|
||||||
|
assertEquals("Commit should be 1 and completed", completedInstant,
|
||||||
|
commitTimeline.getInstants().findFirst().get());
|
||||||
assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(),
|
assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(),
|
||||||
commitTimeline.readInstantDetails("1").get());
|
commitTimeline.getInstantDetails(completedInstant).get());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void checkCommitTimeline() throws IOException {
|
public void checkCommitTimeline() throws IOException {
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
|
||||||
assertFalse("Should be empty commit timeline",
|
HoodieTimeline activeCommitTimeline = activeTimeline.getCommitTimeline();
|
||||||
commitTimeline.getInstants().findFirst().isPresent());
|
assertTrue("Should be empty commit timeline", activeCommitTimeline.empty());
|
||||||
assertFalse("Should be empty commit timeline",
|
|
||||||
commitTimeline.getInflightInstants().findFirst().isPresent());
|
HoodieInstant instant =
|
||||||
commitTimeline.saveInstantAsInflight("1");
|
new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
|
||||||
commitTimeline.saveInstantAsComplete("1", Optional.of("test-detail".getBytes()));
|
activeTimeline.createInflight(instant);
|
||||||
|
activeTimeline.saveAsComplete(instant, Optional.of("test-detail".getBytes()));
|
||||||
|
|
||||||
// Commit timeline should not auto-reload every time getActiveCommitTimeline(), it should be cached
|
// Commit timeline should not auto-reload every time getActiveCommitTimeline(), it should be cached
|
||||||
commitTimeline = metaClient.getActiveCommitTimeline();
|
activeTimeline = metaClient.getActiveTimeline();
|
||||||
assertFalse("Should be empty commit timeline",
|
activeCommitTimeline = activeTimeline.getCommitTimeline();
|
||||||
commitTimeline.getInstants().findFirst().isPresent());
|
assertTrue("Should be empty commit timeline", activeCommitTimeline.empty());
|
||||||
assertFalse("Should be empty commit timeline",
|
|
||||||
commitTimeline.getInflightInstants().findFirst().isPresent());
|
|
||||||
|
|
||||||
commitTimeline = commitTimeline.reload();
|
HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant);
|
||||||
assertTrue("Should be the 1 commit we made",
|
activeTimeline = activeTimeline.reload();
|
||||||
commitTimeline.getInstants().findFirst().isPresent());
|
activeCommitTimeline = activeTimeline.getCommitTimeline();
|
||||||
assertEquals("Commit should be 1", "1", commitTimeline.getInstants().findFirst().get());
|
assertFalse("Should be the 1 commit we made", activeCommitTimeline.empty());
|
||||||
|
assertEquals("Commit should be 1", completedInstant,
|
||||||
|
activeCommitTimeline.getInstants().findFirst().get());
|
||||||
assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(),
|
assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(),
|
||||||
commitTimeline.readInstantDetails("1").get());
|
activeCommitTimeline.getInstantDetails(completedInstant).get());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void checkArchiveCommitTimeline() throws IOException {
|
public void checkArchiveCommitTimeline() throws IOException {
|
||||||
Path archiveLogPath =
|
Path archiveLogPath = HoodieArchivedTimeline.getArchiveLogPath(metaClient.getMetaPath());
|
||||||
HoodieArchivedCommitTimeline.getArchiveLogPath(metaClient.getMetaPath());
|
|
||||||
SequenceFile.Writer writer = SequenceFile
|
SequenceFile.Writer writer = SequenceFile
|
||||||
.createWriter(HoodieTestUtils.fs.getConf(), SequenceFile.Writer.file(archiveLogPath),
|
.createWriter(HoodieTestUtils.fs.getConf(), SequenceFile.Writer.file(archiveLogPath),
|
||||||
SequenceFile.Writer.keyClass(Text.class),
|
SequenceFile.Writer.keyClass(Text.class),
|
||||||
@@ -114,13 +119,24 @@ public class HoodieTableMetaClientTest {
|
|||||||
|
|
||||||
IOUtils.closeStream(writer);
|
IOUtils.closeStream(writer);
|
||||||
|
|
||||||
HoodieTimeline archivedTimeline = metaClient.getArchivedCommitTimeline();
|
HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline();
|
||||||
assertEquals(Lists.newArrayList("1", "2", "3"),
|
|
||||||
|
HoodieInstant instant1 =
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "1");
|
||||||
|
HoodieInstant instant2 =
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "2");
|
||||||
|
HoodieInstant instant3 =
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "3");
|
||||||
|
|
||||||
|
assertEquals(Lists.newArrayList(instant1, instant2, instant3),
|
||||||
archivedTimeline.getInstants().collect(Collectors.toList()));
|
archivedTimeline.getInstants().collect(Collectors.toList()));
|
||||||
System.out.println(new String( archivedTimeline.readInstantDetails("1").get()));
|
|
||||||
assertArrayEquals(new Text("data1").getBytes(), archivedTimeline.readInstantDetails("1").get());
|
assertArrayEquals(new Text("data1").getBytes(),
|
||||||
assertArrayEquals(new Text("data2").getBytes(), archivedTimeline.readInstantDetails("2").get());
|
archivedTimeline.getInstantDetails(instant1).get());
|
||||||
assertArrayEquals(new Text("data3").getBytes(), archivedTimeline.readInstantDetails("3").get());
|
assertArrayEquals(new Text("data2").getBytes(),
|
||||||
|
archivedTimeline.getInstantDetails(instant2).get());
|
||||||
|
assertArrayEquals(new Text("data3").getBytes(),
|
||||||
|
archivedTimeline.getInstantDetails(instant3).get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,126 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.uber.hoodie.common.table.string;
|
||||||
|
|
||||||
|
import com.uber.hoodie.common.model.HoodieTestUtils;
|
||||||
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.ExpectedException;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
public class HoodieActiveTimelineTest {
|
||||||
|
private HoodieActiveTimeline timeline;
|
||||||
|
private HoodieTableMetaClient metaClient;
|
||||||
|
@Rule
|
||||||
|
public final ExpectedException exception = ExpectedException.none();
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
this.metaClient = HoodieTestUtils.initOnTemp();
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
HoodieTestUtils.fs.delete(new Path(this.metaClient.getBasePath()), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLoadingInstantsFromFiles() throws IOException {
|
||||||
|
HoodieInstant instant1 =
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "1");
|
||||||
|
HoodieInstant instant2 =
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "3");
|
||||||
|
HoodieInstant instant3 =
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "5");
|
||||||
|
HoodieInstant instant4 =
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "8");
|
||||||
|
HoodieInstant instant5 =
|
||||||
|
new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "9");
|
||||||
|
|
||||||
|
timeline = new HoodieActiveTimeline(HoodieTestUtils.fs, metaClient.getMetaPath());
|
||||||
|
timeline.saveAsComplete(instant1, Optional.empty());
|
||||||
|
timeline.saveAsComplete(instant2, Optional.empty());
|
||||||
|
timeline.saveAsComplete(instant3, Optional.empty());
|
||||||
|
timeline.saveAsComplete(instant4, Optional.empty());
|
||||||
|
timeline.createInflight(instant5);
|
||||||
|
timeline = timeline.reload();
|
||||||
|
|
||||||
|
assertEquals("Total instants should be 5", 5, timeline.countInstants());
|
||||||
|
HoodieTestUtils.assertStreamEquals("Check the instants stream",
|
||||||
|
Stream.of(instant1, instant2, instant3, instant4, instant5), timeline.getInstants());
|
||||||
|
HoodieTestUtils.assertStreamEquals("Check the instants stream",
|
||||||
|
Stream.of(instant1, instant2, instant3, instant4, instant5),
|
||||||
|
timeline.getCommitTimeline().getInstants());
|
||||||
|
HoodieTestUtils.assertStreamEquals("Check the instants stream",
|
||||||
|
Stream.of(instant1, instant2, instant3, instant4),
|
||||||
|
timeline.getCommitTimeline().filterCompletedInstants().getInstants());
|
||||||
|
HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant5),
|
||||||
|
timeline.getCommitTimeline().filterInflights().getInstants());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTimelineOperationsBasic() throws Exception {
|
||||||
|
timeline = new HoodieActiveTimeline(HoodieTestUtils.fs, metaClient.getMetaPath());
|
||||||
|
assertTrue(timeline.empty());
|
||||||
|
assertEquals("", 0, timeline.countInstants());
|
||||||
|
assertEquals("", Optional.empty(), timeline.firstInstant());
|
||||||
|
assertEquals("", Optional.empty(), timeline.nthInstant(5));
|
||||||
|
assertEquals("", Optional.empty(), timeline.nthInstant(-1));
|
||||||
|
assertEquals("", Optional.empty(), timeline.lastInstant());
|
||||||
|
assertFalse("", timeline.containsInstant(
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "01")));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTimelineOperations() throws Exception {
|
||||||
|
timeline = new MockHoodieTimeline(
|
||||||
|
Stream.of("01", "03", "05", "07", "09", "11", "13", "15", "17", "19"),
|
||||||
|
Stream.of("21", "23"));
|
||||||
|
HoodieTestUtils.assertStreamEquals("", Stream.of("05", "07", "09", "11"),
|
||||||
|
timeline.getCommitTimeline().filterCompletedInstants().findInstantsInRange("04", "11")
|
||||||
|
.getInstants().map(HoodieInstant::getTimestamp));
|
||||||
|
HoodieTestUtils.assertStreamEquals("", Stream.of("09", "11"),
|
||||||
|
timeline.getCommitTimeline().filterCompletedInstants().findInstantsAfter("07", 2)
|
||||||
|
.getInstants().map(HoodieInstant::getTimestamp));
|
||||||
|
assertFalse(timeline.empty());
|
||||||
|
assertFalse(timeline.getCommitTimeline().filterInflights().empty());
|
||||||
|
assertEquals("", 12, timeline.countInstants());
|
||||||
|
HoodieTimeline activeCommitTimeline = timeline.getCommitTimeline().filterCompletedInstants();
|
||||||
|
assertEquals("", 10, activeCommitTimeline.countInstants());
|
||||||
|
|
||||||
|
assertEquals("", "01", activeCommitTimeline.firstInstant().get().getTimestamp());
|
||||||
|
assertEquals("", "11", activeCommitTimeline.nthInstant(5).get().getTimestamp());
|
||||||
|
assertEquals("", "19", activeCommitTimeline.lastInstant().get().getTimestamp());
|
||||||
|
assertEquals("", "09", activeCommitTimeline.nthFromLastInstant(5).get().getTimestamp());
|
||||||
|
assertTrue("", activeCommitTimeline.containsInstant(
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "09")));
|
||||||
|
assertFalse("", activeCommitTimeline.isBeforeTimelineStarts("02"));
|
||||||
|
assertTrue("", activeCommitTimeline.isBeforeTimelineStarts("00"));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,104 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.uber.hoodie.common.table.string;
|
|
||||||
|
|
||||||
import com.uber.hoodie.common.model.HoodieTestUtils;
|
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.junit.After;
|
|
||||||
import org.junit.Before;
|
|
||||||
import org.junit.Rule;
|
|
||||||
import org.junit.Test;
|
|
||||||
import org.junit.rules.ExpectedException;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import static org.junit.Assert.*;
|
|
||||||
|
|
||||||
public class HoodieDefaultTimelineTest {
|
|
||||||
private HoodieTimeline timeline;
|
|
||||||
private HoodieTableMetaClient metaClient;
|
|
||||||
@Rule
|
|
||||||
public final ExpectedException exception = ExpectedException.none();
|
|
||||||
|
|
||||||
@Before
|
|
||||||
public void setUp() throws Exception {
|
|
||||||
this.metaClient = HoodieTestUtils.initOnTemp();
|
|
||||||
}
|
|
||||||
|
|
||||||
@After
|
|
||||||
public void tearDown() throws Exception {
|
|
||||||
HoodieTestUtils.fs.delete(new Path(this.metaClient.getBasePath()), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testLoadingInstantsFromFiles() throws IOException {
|
|
||||||
timeline =
|
|
||||||
new MockHoodieTimeline(HoodieTestUtils.fs, metaClient.getMetaPath(), ".test");
|
|
||||||
timeline.saveInstantAsComplete("1", Optional.empty());
|
|
||||||
timeline.saveInstantAsComplete("3", Optional.empty());
|
|
||||||
timeline.saveInstantAsComplete("5", Optional.empty());
|
|
||||||
timeline.saveInstantAsComplete("8", Optional.empty());
|
|
||||||
timeline.saveInstantAsInflight("9");
|
|
||||||
timeline = timeline.reload();
|
|
||||||
|
|
||||||
assertEquals("Total instants should be 4", 4, timeline.getTotalInstants());
|
|
||||||
HoodieTestUtils
|
|
||||||
.assertStreamEquals("Check the instants stream", Stream.of("1", "3", "5", "8"),
|
|
||||||
timeline.getInstants());
|
|
||||||
assertTrue("Inflights should be present in the timeline", timeline.hasInflightInstants());
|
|
||||||
HoodieTestUtils.assertStreamEquals("Check the inflights stream", Stream.of("9"),
|
|
||||||
timeline.getInflightInstants());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testTimelineOperationsBasic() throws Exception {
|
|
||||||
timeline = new MockHoodieTimeline(Stream.empty(), Stream.empty());
|
|
||||||
assertFalse(timeline.hasInstants());
|
|
||||||
assertFalse(timeline.hasInflightInstants());
|
|
||||||
assertEquals("", 0, timeline.getTotalInstants());
|
|
||||||
assertEquals("", Optional.empty(), timeline.firstInstant());
|
|
||||||
assertEquals("", Optional.empty(), timeline.nthInstant(5));
|
|
||||||
assertEquals("", Optional.empty(), timeline.nthInstant(-1));
|
|
||||||
assertEquals("", Optional.empty(), timeline.lastInstant());
|
|
||||||
assertFalse("", timeline.containsInstant("01"));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testTimelineOperations() throws Exception {
|
|
||||||
timeline = new MockHoodieTimeline(
|
|
||||||
Stream.of("01", "03", "05", "07", "09", "11", "13", "15", "17", "19"),
|
|
||||||
Stream.of("21", "23"));
|
|
||||||
HoodieTestUtils.assertStreamEquals("", Stream.of("05", "07", "09", "11"),
|
|
||||||
timeline.findInstantsInRange("04", "11"));
|
|
||||||
HoodieTestUtils
|
|
||||||
.assertStreamEquals("", Stream.of("09", "11"), timeline.findInstantsAfter("07", 2));
|
|
||||||
assertTrue(timeline.hasInstants());
|
|
||||||
assertTrue(timeline.hasInflightInstants());
|
|
||||||
assertEquals("", 10, timeline.getTotalInstants());
|
|
||||||
assertEquals("", "01", timeline.firstInstant().get());
|
|
||||||
assertEquals("", "11", timeline.nthInstant(5).get());
|
|
||||||
assertEquals("", "19", timeline.lastInstant().get());
|
|
||||||
assertEquals("", "09", timeline.nthFromLastInstant(5).get());
|
|
||||||
assertTrue("", timeline.containsInstant("09"));
|
|
||||||
assertFalse("", timeline.isInstantBeforeTimelineStarts("02"));
|
|
||||||
assertTrue("", timeline.isInstantBeforeTimelineStarts("00"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -16,51 +16,29 @@
|
|||||||
|
|
||||||
package com.uber.hoodie.common.table.string;
|
package com.uber.hoodie.common.table.string;
|
||||||
|
|
||||||
import com.uber.hoodie.common.model.HoodieTestUtils;
|
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.table.timeline.HoodieDefaultTimeline;
|
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Optional;
|
import java.util.Comparator;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
public class MockHoodieTimeline extends HoodieDefaultTimeline {
|
public class MockHoodieTimeline extends HoodieActiveTimeline {
|
||||||
private String fileExt;
|
public MockHoodieTimeline(Stream<String> completed, Stream<String> inflights)
|
||||||
|
|
||||||
public MockHoodieTimeline(FileSystem fs, String metaPath, String fileExtension)
|
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(fs, metaPath, fileExtension);
|
super();
|
||||||
this.fileExt = fileExtension;
|
this.instants = Stream.concat(completed
|
||||||
}
|
.map(s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)),
|
||||||
|
inflights.map(
|
||||||
public MockHoodieTimeline(Stream<String> instants, Stream<String> inflights)
|
s -> new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, s)))
|
||||||
throws IOException {
|
.sorted(Comparator.comparing(new Function<HoodieInstant, String>() {
|
||||||
super(instants, inflights);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public HoodieTimeline reload() throws IOException {
|
public String apply(HoodieInstant hoodieInstant) {
|
||||||
return new MockHoodieTimeline(fs, metaPath, fileExt);
|
return hoodieInstant.getFileName();
|
||||||
}
|
}
|
||||||
|
})).collect(Collectors.toList());
|
||||||
@Override
|
|
||||||
public Optional<byte[]> readInstantDetails(String instant) {
|
|
||||||
return Optional.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getInflightFileName(String instant) {
|
|
||||||
return HoodieTestUtils.makeInflightTestFileName(instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getCompletedFileName(String instant) {
|
|
||||||
return HoodieTestUtils.makeTestFileName(instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getTimelineName() {
|
|
||||||
return "mock-test";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,6 +23,8 @@ import com.uber.hoodie.common.model.HoodieTestUtils;
|
|||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
@@ -40,6 +42,7 @@ import java.util.stream.Collectors;
|
|||||||
|
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
@SuppressWarnings("ResultOfMethodCallIgnored")
|
||||||
public class ReadOptimizedTableViewTest {
|
public class ReadOptimizedTableViewTest {
|
||||||
private HoodieTableMetaClient metaClient;
|
private HoodieTableMetaClient metaClient;
|
||||||
private String basePath;
|
private String basePath;
|
||||||
@@ -77,8 +80,10 @@ public class ReadOptimizedTableViewTest {
|
|||||||
fsView.getLatestDataFilesForFileId(partitionPath, fileId).findFirst().isPresent());
|
fsView.getLatestDataFilesForFileId(partitionPath, fileId).findFirst().isPresent());
|
||||||
|
|
||||||
// Make this commit safe
|
// Make this commit safe
|
||||||
HoodieTimeline commitTimeline = metaClient.getActiveCommitTimeline();
|
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
|
||||||
commitTimeline.saveInstantAsComplete(commitTime1, Optional.empty());
|
HoodieInstant instant1 =
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime1);
|
||||||
|
commitTimeline.saveAsComplete(instant1, Optional.empty());
|
||||||
refreshFsView();
|
refreshFsView();
|
||||||
assertEquals("", fileName1,
|
assertEquals("", fileName1,
|
||||||
fsView.getLatestDataFilesForFileId(partitionPath, fileId).findFirst().get()
|
fsView.getLatestDataFilesForFileId(partitionPath, fileId).findFirst().get()
|
||||||
@@ -94,7 +99,9 @@ public class ReadOptimizedTableViewTest {
|
|||||||
.getFileName());
|
.getFileName());
|
||||||
|
|
||||||
// Make it safe
|
// Make it safe
|
||||||
commitTimeline.saveInstantAsComplete(commitTime2, Optional.empty());
|
HoodieInstant instant2 =
|
||||||
|
new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime2);
|
||||||
|
commitTimeline.saveAsComplete(instant2, Optional.empty());
|
||||||
refreshFsView();
|
refreshFsView();
|
||||||
assertEquals("", fileName2,
|
assertEquals("", fileName2,
|
||||||
fsView.getLatestDataFilesForFileId(partitionPath, fileId).findFirst().get()
|
fsView.getLatestDataFilesForFileId(partitionPath, fileId).findFirst().get()
|
||||||
@@ -140,7 +147,7 @@ public class ReadOptimizedTableViewTest {
|
|||||||
|
|
||||||
refreshFsView();
|
refreshFsView();
|
||||||
List<HoodieDataFile> statuses1 =
|
List<HoodieDataFile> statuses1 =
|
||||||
fsView.streamLatestVersionInPartition("2016/05/01", commitTime4)
|
fsView.getLatestVersionInPartition("2016/05/01", commitTime4)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
assertEquals(statuses1.size(), 3);
|
assertEquals(statuses1.size(), 3);
|
||||||
Set<String> filenames = Sets.newHashSet();
|
Set<String> filenames = Sets.newHashSet();
|
||||||
@@ -153,7 +160,7 @@ public class ReadOptimizedTableViewTest {
|
|||||||
|
|
||||||
// Reset the max commit time
|
// Reset the max commit time
|
||||||
List<HoodieDataFile> statuses2 =
|
List<HoodieDataFile> statuses2 =
|
||||||
fsView.streamLatestVersionInPartition("2016/05/01", commitTime3)
|
fsView.getLatestVersionInPartition("2016/05/01", commitTime3)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
assertEquals(statuses2.size(), 3);
|
assertEquals(statuses2.size(), 3);
|
||||||
filenames = Sets.newHashSet();
|
filenames = Sets.newHashSet();
|
||||||
@@ -204,7 +211,7 @@ public class ReadOptimizedTableViewTest {
|
|||||||
|
|
||||||
refreshFsView();
|
refreshFsView();
|
||||||
List<List<HoodieDataFile>> statuses1 =
|
List<List<HoodieDataFile>> statuses1 =
|
||||||
fsView.streamEveryVersionInPartition("2016/05/01").collect(Collectors.toList());
|
fsView.getEveryVersionInPartition("2016/05/01").collect(Collectors.toList());
|
||||||
assertEquals(statuses1.size(), 3);
|
assertEquals(statuses1.size(), 3);
|
||||||
|
|
||||||
for (List<HoodieDataFile> status : statuses1) {
|
for (List<HoodieDataFile> status : statuses1) {
|
||||||
@@ -269,8 +276,8 @@ public class ReadOptimizedTableViewTest {
|
|||||||
assertEquals(statuses.length, 7);
|
assertEquals(statuses.length, 7);
|
||||||
|
|
||||||
refreshFsView();
|
refreshFsView();
|
||||||
List<HoodieDataFile> statuses1 =
|
List<HoodieDataFile> statuses1 = fsView
|
||||||
fsView.streamLatestVersionInRange(statuses, Lists.newArrayList(commitTime2, commitTime3))
|
.getLatestVersionInRange(statuses, Lists.newArrayList(commitTime2, commitTime3))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
assertEquals(statuses1.size(), 2);
|
assertEquals(statuses1.size(), 2);
|
||||||
Set<String> filenames = Sets.newHashSet();
|
Set<String> filenames = Sets.newHashSet();
|
||||||
@@ -320,7 +327,7 @@ public class ReadOptimizedTableViewTest {
|
|||||||
|
|
||||||
refreshFsView();
|
refreshFsView();
|
||||||
List<HoodieDataFile> statuses1 =
|
List<HoodieDataFile> statuses1 =
|
||||||
fsView.streamLatestVersionsBeforeOrOn(statuses, commitTime2)
|
fsView.getLatestVersionsBeforeOrOn(statuses, commitTime2)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
assertEquals(statuses1.size(), 2);
|
assertEquals(statuses1.size(), 2);
|
||||||
Set<String> filenames = Sets.newHashSet();
|
Set<String> filenames = Sets.newHashSet();
|
||||||
@@ -371,8 +378,7 @@ public class ReadOptimizedTableViewTest {
|
|||||||
|
|
||||||
refreshFsView();
|
refreshFsView();
|
||||||
List<HoodieDataFile> statuses1 =
|
List<HoodieDataFile> statuses1 =
|
||||||
fsView.streamLatestVersions(statuses)
|
fsView.getLatestVersions(statuses).collect(Collectors.toList());
|
||||||
.collect(Collectors.toList());
|
|
||||||
assertEquals(statuses1.size(), 3);
|
assertEquals(statuses1.size(), 3);
|
||||||
Set<String> filenames = Sets.newHashSet();
|
Set<String> filenames = Sets.newHashSet();
|
||||||
for (HoodieDataFile status : statuses1) {
|
for (HoodieDataFile status : statuses1) {
|
||||||
|
|||||||
@@ -79,6 +79,11 @@
|
|||||||
<groupId>org.apache.avro</groupId>
|
<groupId>org.apache.avro</groupId>
|
||||||
<artifactId>avro</artifactId>
|
<artifactId>avro</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.esotericsoftware</groupId>
|
||||||
|
<artifactId>kryo</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import com.uber.hoodie.common.model.HoodieRecord;
|
|||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import com.uber.hoodie.exception.InvalidDatasetException;
|
import com.uber.hoodie.exception.InvalidDatasetException;
|
||||||
@@ -95,7 +96,7 @@ public class HoodieInputFormat extends MapredParquetInputFormat
|
|||||||
String tableName = metadata.getTableConfig().getTableName();
|
String tableName = metadata.getTableConfig().getTableName();
|
||||||
String mode = HoodieHiveUtil.readMode(Job.getInstance(job), tableName);
|
String mode = HoodieHiveUtil.readMode(Job.getInstance(job), tableName);
|
||||||
TableFileSystemView fsView = new ReadOptimizedTableView(FSUtils.getFs(), metadata);
|
TableFileSystemView fsView = new ReadOptimizedTableView(FSUtils.getFs(), metadata);
|
||||||
HoodieTimeline timeline = metadata.getActiveCommitTimeline();
|
HoodieTimeline timeline = metadata.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
if (HoodieHiveUtil.INCREMENTAL_SCAN_MODE.equals(mode)) {
|
if (HoodieHiveUtil.INCREMENTAL_SCAN_MODE.equals(mode)) {
|
||||||
// this is of the form commitTs_partition_sequenceNumber
|
// this is of the form commitTs_partition_sequenceNumber
|
||||||
String lastIncrementalTs = HoodieHiveUtil.readStartCommitTime(Job.getInstance(job), tableName);
|
String lastIncrementalTs = HoodieHiveUtil.readStartCommitTime(Job.getInstance(job), tableName);
|
||||||
@@ -103,10 +104,10 @@ public class HoodieInputFormat extends MapredParquetInputFormat
|
|||||||
Integer maxCommits = HoodieHiveUtil.readMaxCommits(Job.getInstance(job), tableName);
|
Integer maxCommits = HoodieHiveUtil.readMaxCommits(Job.getInstance(job), tableName);
|
||||||
LOG.info("Last Incremental timestamp was set as " + lastIncrementalTs);
|
LOG.info("Last Incremental timestamp was set as " + lastIncrementalTs);
|
||||||
List<String> commitsToReturn =
|
List<String> commitsToReturn =
|
||||||
timeline.findInstantsAfter(lastIncrementalTs, maxCommits)
|
timeline.findInstantsAfter(lastIncrementalTs, maxCommits).getInstants()
|
||||||
.collect(Collectors.toList());
|
.map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||||
List<HoodieDataFile> filteredFiles =
|
List<HoodieDataFile> filteredFiles =
|
||||||
fsView.streamLatestVersionInRange(value, commitsToReturn)
|
fsView.getLatestVersionInRange(value, commitsToReturn)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
for (HoodieDataFile filteredFile : filteredFiles) {
|
for (HoodieDataFile filteredFile : filteredFiles) {
|
||||||
LOG.info("Processing incremental hoodie file - " + filteredFile.getPath());
|
LOG.info("Processing incremental hoodie file - " + filteredFile.getPath());
|
||||||
@@ -116,7 +117,7 @@ public class HoodieInputFormat extends MapredParquetInputFormat
|
|||||||
"Total paths to process after hoodie incremental filter " + filteredFiles.size());
|
"Total paths to process after hoodie incremental filter " + filteredFiles.size());
|
||||||
} else {
|
} else {
|
||||||
// filter files on the latest commit found
|
// filter files on the latest commit found
|
||||||
List<HoodieDataFile> filteredFiles = fsView.streamLatestVersions(value).collect(Collectors.toList());
|
List<HoodieDataFile> filteredFiles = fsView.getLatestVersions(value).collect(Collectors.toList());
|
||||||
LOG.info("Total paths to process after hoodie filter " + filteredFiles.size());
|
LOG.info("Total paths to process after hoodie filter " + filteredFiles.size());
|
||||||
for (HoodieDataFile filteredFile : filteredFiles) {
|
for (HoodieDataFile filteredFile : filteredFiles) {
|
||||||
LOG.info("Processing latest hoodie file - " + filteredFile.getPath());
|
LOG.info("Processing latest hoodie file - " + filteredFile.getPath());
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ package com.uber.hoodie.utilities;
|
|||||||
import com.beust.jcommander.JCommander;
|
import com.beust.jcommander.JCommander;
|
||||||
import com.beust.jcommander.Parameter;
|
import com.beust.jcommander.Parameter;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.exception.HoodieException;
|
import com.uber.hoodie.exception.HoodieException;
|
||||||
import com.uber.hoodie.utilities.exception.HoodieIncrementalPullException;
|
import com.uber.hoodie.utilities.exception.HoodieIncrementalPullException;
|
||||||
import com.uber.hoodie.utilities.exception.HoodieIncrementalPullSQLException;
|
import com.uber.hoodie.utilities.exception.HoodieIncrementalPullSQLException;
|
||||||
@@ -267,8 +268,12 @@ public class HiveIncrementalPuller {
|
|||||||
}
|
}
|
||||||
HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, targetDataPath);
|
HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, targetDataPath);
|
||||||
|
|
||||||
Optional<String> lastCommit = metadata.getActiveCommitTimeline().lastInstant();
|
Optional<HoodieInstant>
|
||||||
return lastCommit.orElse("0");
|
lastCommit = metadata.getActiveTimeline().getCommitTimeline().filterCompletedInstants().lastInstant();
|
||||||
|
if(lastCommit.isPresent()) {
|
||||||
|
return lastCommit.get().getTimestamp();
|
||||||
|
}
|
||||||
|
return "0";
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean ensureTempPathExists(FileSystem fs, String lastCommitTime)
|
private boolean ensureTempPathExists(FileSystem fs, String lastCommitTime)
|
||||||
@@ -299,13 +304,14 @@ public class HiveIncrementalPuller {
|
|||||||
|
|
||||||
private String getLastCommitTimePulled(FileSystem fs, String sourceTableLocation) throws IOException {
|
private String getLastCommitTimePulled(FileSystem fs, String sourceTableLocation) throws IOException {
|
||||||
HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, sourceTableLocation);
|
HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, sourceTableLocation);
|
||||||
List<String> commitsToSync = metadata.getActiveCommitTimeline()
|
List<String> commitsToSync = metadata.getActiveTimeline().getCommitTimeline().filterCompletedInstants()
|
||||||
.findInstantsAfter(config.fromCommitTime, config.maxCommits)
|
.findInstantsAfter(config.fromCommitTime, config.maxCommits).getInstants().map(HoodieInstant::getTimestamp)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (commitsToSync.isEmpty()) {
|
if (commitsToSync.isEmpty()) {
|
||||||
log.warn("Nothing to sync. All commits in " + config.sourceTable + " are " + metadata
|
log.warn("Nothing to sync. All commits in " + config.sourceTable + " are " + metadata
|
||||||
.getActiveCommitTimeline().getInstants().collect(Collectors.toList())
|
.getActiveTimeline().getCommitTimeline().filterCompletedInstants().getInstants()
|
||||||
+ " and from commit time is " + config.fromCommitTime);
|
.collect(Collectors.toList()) + " and from commit time is "
|
||||||
|
+ config.fromCommitTime);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
log.info("Syncing commits " + commitsToSync);
|
log.info("Syncing commits " + commitsToSync);
|
||||||
|
|||||||
@@ -66,11 +66,11 @@ public class HoodieDeltaStreamer implements Serializable {
|
|||||||
JavaSparkContext sc = getSparkContext(cfg);
|
JavaSparkContext sc = getSparkContext(cfg);
|
||||||
FileSystem fs = FSUtils.getFs();
|
FileSystem fs = FSUtils.getFs();
|
||||||
HoodieTableMetaClient targetHoodieMetadata = new HoodieTableMetaClient(fs, cfg.targetPath);
|
HoodieTableMetaClient targetHoodieMetadata = new HoodieTableMetaClient(fs, cfg.targetPath);
|
||||||
HoodieTimeline timeline = targetHoodieMetadata.getActiveCommitTimeline();
|
HoodieTimeline timeline = targetHoodieMetadata.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
|
||||||
String lastCommitPulled = findLastCommitPulled(fs, cfg.dataPath);
|
String lastCommitPulled = findLastCommitPulled(fs, cfg.dataPath);
|
||||||
log.info("Last commit pulled on the source dataset is " + lastCommitPulled);
|
log.info("Last commit pulled on the source dataset is " + lastCommitPulled);
|
||||||
if (!timeline.getInstants().iterator().hasNext() && timeline
|
if (!timeline.getInstants().iterator().hasNext() && timeline
|
||||||
.compareInstants(timeline.lastInstant().get(), lastCommitPulled,
|
.compareTimestamps(timeline.lastInstant().get().getTimestamp(), lastCommitPulled,
|
||||||
HoodieTimeline.GREATER)) {
|
HoodieTimeline.GREATER)) {
|
||||||
// this should never be the case
|
// this should never be the case
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ import com.uber.hoodie.common.table.HoodieTableConfig;
|
|||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||||
|
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||||
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
import com.uber.hoodie.common.table.view.ReadOptimizedTableView;
|
||||||
import com.uber.hoodie.common.util.FSUtils;
|
import com.uber.hoodie.common.util.FSUtils;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
@@ -66,7 +67,8 @@ public class HoodieSnapshotCopier implements Serializable {
|
|||||||
final HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs, baseDir);
|
final HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs, baseDir);
|
||||||
final TableFileSystemView fsView = new ReadOptimizedTableView(fs, tableMetadata);
|
final TableFileSystemView fsView = new ReadOptimizedTableView(fs, tableMetadata);
|
||||||
// Get the latest commit
|
// Get the latest commit
|
||||||
final Optional<String> latestCommit = tableMetadata.getActiveCommitTimeline().lastInstant();
|
final Optional<HoodieInstant>
|
||||||
|
latestCommit = tableMetadata.getActiveTimeline().getCommitTimeline().filterCompletedInstants().lastInstant();
|
||||||
if(!latestCommit.isPresent()) {
|
if(!latestCommit.isPresent()) {
|
||||||
logger.warn("No commits present. Nothing to snapshot");
|
logger.warn("No commits present. Nothing to snapshot");
|
||||||
} else {
|
} else {
|
||||||
@@ -91,7 +93,7 @@ public class HoodieSnapshotCopier implements Serializable {
|
|||||||
FileSystem fs = FSUtils.getFs();
|
FileSystem fs = FSUtils.getFs();
|
||||||
List<Tuple2<String, String>> filePaths = new ArrayList<>();
|
List<Tuple2<String, String>> filePaths = new ArrayList<>();
|
||||||
for (HoodieDataFile hoodieDataFile : fsView
|
for (HoodieDataFile hoodieDataFile : fsView
|
||||||
.streamLatestVersionInPartition(partition, latestCommit.get())
|
.getLatestVersionInPartition(partition, latestCommit.get().getTimestamp())
|
||||||
.collect(Collectors.toList())) {
|
.collect(Collectors.toList())) {
|
||||||
filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath()));
|
filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath()));
|
||||||
}
|
}
|
||||||
@@ -124,8 +126,8 @@ public class HoodieSnapshotCopier implements Serializable {
|
|||||||
} else {
|
} else {
|
||||||
String commitTime =
|
String commitTime =
|
||||||
FSUtils.getCommitFromCommitFile(commitFilePath.getName());
|
FSUtils.getCommitFromCommitFile(commitFilePath.getName());
|
||||||
return tableMetadata.getActiveCommitTimeline()
|
return tableMetadata.getActiveTimeline().getCommitTimeline()
|
||||||
.compareInstants(commitTime, latestCommit.get(), HoodieTimeline.GREATER);
|
.compareTimestamps(commitTime, latestCommit.get().getTimestamp(), HoodieTimeline.GREATER);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user