[HUDI-65] commitTime rename to instantTime (#1431)
This commit is contained in:
@@ -86,7 +86,7 @@ public class CleansCommand implements CommandMarker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean")
|
@CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean")
|
||||||
public String showCleanPartitions(@CliOption(key = {"clean"}, help = "clean to show") final String commitTime,
|
public String showCleanPartitions(@CliOption(key = {"clean"}, help = "clean to show") final String instantTime,
|
||||||
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
|
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
|
||||||
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
|
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
|
||||||
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
|
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
|
||||||
@@ -96,10 +96,10 @@ public class CleansCommand implements CommandMarker {
|
|||||||
|
|
||||||
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
|
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
|
||||||
HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
|
HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
|
||||||
HoodieInstant cleanInstant = new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, commitTime);
|
HoodieInstant cleanInstant = new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, instantTime);
|
||||||
|
|
||||||
if (!timeline.containsInstant(cleanInstant)) {
|
if (!timeline.containsInstant(cleanInstant)) {
|
||||||
return "Clean " + commitTime + " not found in metadata " + timeline;
|
return "Clean " + instantTime + " not found in metadata " + timeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
HoodieCleanMetadata cleanMetadata =
|
HoodieCleanMetadata cleanMetadata =
|
||||||
|
|||||||
@@ -220,18 +220,18 @@ public class CommitsCommand implements CommandMarker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@CliCommand(value = "commit rollback", help = "Rollback a commit")
|
@CliCommand(value = "commit rollback", help = "Rollback a commit")
|
||||||
public String rollbackCommit(@CliOption(key = {"commit"}, help = "Commit to rollback") final String commitTime,
|
public String rollbackCommit(@CliOption(key = {"commit"}, help = "Commit to rollback") final String instantTime,
|
||||||
@CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath)
|
@CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
|
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
|
||||||
HoodieTimeline completedTimeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
|
HoodieTimeline completedTimeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
|
||||||
HoodieTimeline filteredTimeline = completedTimeline.filter(instant -> instant.getTimestamp().equals(commitTime));
|
HoodieTimeline filteredTimeline = completedTimeline.filter(instant -> instant.getTimestamp().equals(instantTime));
|
||||||
if (filteredTimeline.empty()) {
|
if (filteredTimeline.empty()) {
|
||||||
return "Commit " + commitTime + " not found in Commits " + completedTimeline;
|
return "Commit " + instantTime + " not found in Commits " + completedTimeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||||
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), commitTime,
|
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), instantTime,
|
||||||
HoodieCLI.getTableMetaClient().getBasePath());
|
HoodieCLI.getTableMetaClient().getBasePath());
|
||||||
Process process = sparkLauncher.launch();
|
Process process = sparkLauncher.launch();
|
||||||
InputStreamConsumer.captureOutput(process);
|
InputStreamConsumer.captureOutput(process);
|
||||||
@@ -239,16 +239,16 @@ public class CommitsCommand implements CommandMarker {
|
|||||||
// Refresh the current
|
// Refresh the current
|
||||||
refreshCommits();
|
refreshCommits();
|
||||||
if (exitCode != 0) {
|
if (exitCode != 0) {
|
||||||
return "Commit " + commitTime + " failed to roll back";
|
return "Commit " + instantTime + " failed to roll back";
|
||||||
}
|
}
|
||||||
return "Commit " + commitTime + " rolled back";
|
return "Commit " + instantTime + " rolled back";
|
||||||
}
|
}
|
||||||
|
|
||||||
@CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit")
|
@CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit")
|
||||||
public String showCommitPartitions(
|
public String showCommitPartitions(
|
||||||
@CliOption(key = {"createView"}, mandatory = false, help = "view name to store output table",
|
@CliOption(key = {"createView"}, mandatory = false, help = "view name to store output table",
|
||||||
unspecifiedDefaultValue = "") final String exportTableName,
|
unspecifiedDefaultValue = "") final String exportTableName,
|
||||||
@CliOption(key = {"commit"}, help = "Commit to show") final String commitTime,
|
@CliOption(key = {"commit"}, help = "Commit to show") final String instantTime,
|
||||||
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
|
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
|
||||||
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
|
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
|
||||||
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
|
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
|
||||||
@@ -258,10 +258,10 @@ public class CommitsCommand implements CommandMarker {
|
|||||||
|
|
||||||
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
|
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
|
||||||
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
|
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
|
||||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
|
||||||
|
|
||||||
if (!timeline.containsInstant(commitInstant)) {
|
if (!timeline.containsInstant(commitInstant)) {
|
||||||
return "Commit " + commitTime + " not found in Commits " + timeline;
|
return "Commit " + instantTime + " not found in Commits " + timeline;
|
||||||
}
|
}
|
||||||
HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get(),
|
HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get(),
|
||||||
HoodieCommitMetadata.class);
|
HoodieCommitMetadata.class);
|
||||||
@@ -306,7 +306,7 @@ public class CommitsCommand implements CommandMarker {
|
|||||||
public String showCommitFiles(
|
public String showCommitFiles(
|
||||||
@CliOption(key = {"createView"}, mandatory = false, help = "view name to store output table",
|
@CliOption(key = {"createView"}, mandatory = false, help = "view name to store output table",
|
||||||
unspecifiedDefaultValue = "") final String exportTableName,
|
unspecifiedDefaultValue = "") final String exportTableName,
|
||||||
@CliOption(key = {"commit"}, help = "Commit to show") final String commitTime,
|
@CliOption(key = {"commit"}, help = "Commit to show") final String instantTime,
|
||||||
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
|
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
|
||||||
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
|
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
|
||||||
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
|
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
|
||||||
@@ -316,10 +316,10 @@ public class CommitsCommand implements CommandMarker {
|
|||||||
|
|
||||||
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
|
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
|
||||||
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
|
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
|
||||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
|
||||||
|
|
||||||
if (!timeline.containsInstant(commitInstant)) {
|
if (!timeline.containsInstant(commitInstant)) {
|
||||||
return "Commit " + commitTime + " not found in Commits " + timeline;
|
return "Commit " + instantTime + " not found in Commits " + timeline;
|
||||||
}
|
}
|
||||||
HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get(),
|
HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get(),
|
||||||
HoodieCommitMetadata.class);
|
HoodieCommitMetadata.class);
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ public class SavepointsCommand implements CommandMarker {
|
|||||||
|
|
||||||
@CliCommand(value = "savepoint rollback", help = "Savepoint a commit")
|
@CliCommand(value = "savepoint rollback", help = "Savepoint a commit")
|
||||||
public String rollbackToSavepoint(
|
public String rollbackToSavepoint(
|
||||||
@CliOption(key = {"savepoint"}, help = "Savepoint to rollback") final String commitTime,
|
@CliOption(key = {"savepoint"}, help = "Savepoint to rollback") final String instantTime,
|
||||||
@CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath)
|
@CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
|
HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
|
||||||
@@ -101,14 +101,14 @@ public class SavepointsCommand implements CommandMarker {
|
|||||||
}
|
}
|
||||||
HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
|
HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
|
||||||
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
|
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
|
||||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
|
||||||
|
|
||||||
if (!timeline.containsInstant(commitInstant)) {
|
if (!timeline.containsInstant(commitInstant)) {
|
||||||
return "Commit " + commitTime + " not found in Commits " + timeline;
|
return "Commit " + instantTime + " not found in Commits " + timeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||||
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), commitTime,
|
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), instantTime,
|
||||||
metaClient.getBasePath());
|
metaClient.getBasePath());
|
||||||
Process process = sparkLauncher.launch();
|
Process process = sparkLauncher.launch();
|
||||||
InputStreamConsumer.captureOutput(process);
|
InputStreamConsumer.captureOutput(process);
|
||||||
@@ -116,9 +116,9 @@ public class SavepointsCommand implements CommandMarker {
|
|||||||
// Refresh the current
|
// Refresh the current
|
||||||
refreshMetaClient();
|
refreshMetaClient();
|
||||||
if (exitCode != 0) {
|
if (exitCode != 0) {
|
||||||
return "Savepoint " + commitTime + " failed to roll back";
|
return "Savepoint " + instantTime + " failed to roll back";
|
||||||
}
|
}
|
||||||
return "Savepoint " + commitTime + " rolled back";
|
return "Savepoint " + instantTime + " rolled back";
|
||||||
}
|
}
|
||||||
|
|
||||||
@CliCommand(value = "savepoints refresh", help = "Refresh the savepoints")
|
@CliCommand(value = "savepoints refresh", help = "Refresh the savepoints")
|
||||||
@@ -128,24 +128,24 @@ public class SavepointsCommand implements CommandMarker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@CliCommand(value = "savepoint delete", help = "Delete the savepoint")
|
@CliCommand(value = "savepoint delete", help = "Delete the savepoint")
|
||||||
public String deleteSavepoint(@CliOption(key = {"commit"}, help = "Delete a savepoint") final String commitTime) throws Exception {
|
public String deleteSavepoint(@CliOption(key = {"commit"}, help = "Delete a savepoint") final String instantTime) throws Exception {
|
||||||
HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
|
HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
|
||||||
HoodieTimeline completedInstants = metaClient.getActiveTimeline().getSavePointTimeline().filterCompletedInstants();
|
HoodieTimeline completedInstants = metaClient.getActiveTimeline().getSavePointTimeline().filterCompletedInstants();
|
||||||
if (completedInstants.empty()) {
|
if (completedInstants.empty()) {
|
||||||
throw new HoodieException("There are no completed savepoint to run delete");
|
throw new HoodieException("There are no completed savepoint to run delete");
|
||||||
}
|
}
|
||||||
HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, commitTime);
|
HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, instantTime);
|
||||||
|
|
||||||
if (!completedInstants.containsInstant(savePoint)) {
|
if (!completedInstants.containsInstant(savePoint)) {
|
||||||
return "Commit " + commitTime + " not found in Commits " + completedInstants;
|
return "Commit " + instantTime + " not found in Commits " + completedInstants;
|
||||||
}
|
}
|
||||||
|
|
||||||
try (JavaSparkContext jsc = SparkUtil.initJavaSparkConf("Delete Savepoint")) {
|
try (JavaSparkContext jsc = SparkUtil.initJavaSparkConf("Delete Savepoint")) {
|
||||||
HoodieWriteClient client = createHoodieClient(jsc, metaClient.getBasePath());
|
HoodieWriteClient client = createHoodieClient(jsc, metaClient.getBasePath());
|
||||||
client.deleteSavepoint(commitTime);
|
client.deleteSavepoint(instantTime);
|
||||||
refreshMetaClient();
|
refreshMetaClient();
|
||||||
}
|
}
|
||||||
return "Savepoint " + commitTime + " deleted";
|
return "Savepoint " + instantTime + " deleted";
|
||||||
}
|
}
|
||||||
|
|
||||||
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {
|
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {
|
||||||
|
|||||||
@@ -282,13 +282,13 @@ public class SparkMain {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int rollback(JavaSparkContext jsc, String commitTime, String basePath) throws Exception {
|
private static int rollback(JavaSparkContext jsc, String instantTime, String basePath) throws Exception {
|
||||||
HoodieWriteClient client = createHoodieClient(jsc, basePath);
|
HoodieWriteClient client = createHoodieClient(jsc, basePath);
|
||||||
if (client.rollback(commitTime)) {
|
if (client.rollback(instantTime)) {
|
||||||
LOG.info(String.format("The commit \"%s\" rolled back.", commitTime));
|
LOG.info(String.format("The commit \"%s\" rolled back.", instantTime));
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
LOG.info(String.format("The commit \"%s\" failed to roll back.", commitTime));
|
LOG.info(String.format("The commit \"%s\" failed to roll back.", instantTime));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -74,14 +74,14 @@ public class StatsCommand implements CommandMarker {
|
|||||||
|
|
||||||
List<Comparable[]> rows = new ArrayList<>();
|
List<Comparable[]> rows = new ArrayList<>();
|
||||||
DecimalFormat df = new DecimalFormat("#.00");
|
DecimalFormat df = new DecimalFormat("#.00");
|
||||||
for (HoodieInstant commitTime : timeline.getInstants().collect(Collectors.toList())) {
|
for (HoodieInstant instantTime : timeline.getInstants().collect(Collectors.toList())) {
|
||||||
String waf = "0";
|
String waf = "0";
|
||||||
HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitTime).get(),
|
HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(instantTime).get(),
|
||||||
HoodieCommitMetadata.class);
|
HoodieCommitMetadata.class);
|
||||||
if (commit.fetchTotalUpdateRecordsWritten() > 0) {
|
if (commit.fetchTotalUpdateRecordsWritten() > 0) {
|
||||||
waf = df.format((float) commit.fetchTotalRecordsWritten() / commit.fetchTotalUpdateRecordsWritten());
|
waf = df.format((float) commit.fetchTotalRecordsWritten() / commit.fetchTotalUpdateRecordsWritten());
|
||||||
}
|
}
|
||||||
rows.add(new Comparable[] {commitTime.getTimestamp(), commit.fetchTotalUpdateRecordsWritten(),
|
rows.add(new Comparable[] {instantTime.getTimestamp(), commit.fetchTotalUpdateRecordsWritten(),
|
||||||
commit.fetchTotalRecordsWritten(), waf});
|
commit.fetchTotalRecordsWritten(), waf});
|
||||||
totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
|
totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
|
||||||
totalRecordsWritten += commit.fetchTotalRecordsWritten();
|
totalRecordsWritten += commit.fetchTotalRecordsWritten();
|
||||||
@@ -97,8 +97,8 @@ public class StatsCommand implements CommandMarker {
|
|||||||
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
|
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Comparable[] printFileSizeHistogram(String commitTime, Snapshot s) {
|
private Comparable[] printFileSizeHistogram(String instantTime, Snapshot s) {
|
||||||
return new Comparable[] {commitTime, s.getMin(), s.getValue(0.1), s.getMedian(), s.getMean(), s.get95thPercentile(),
|
return new Comparable[] {instantTime, s.getMin(), s.getValue(0.1), s.getMedian(), s.getMean(), s.get95thPercentile(),
|
||||||
s.getMax(), s.size(), s.getStdDev()};
|
s.getMax(), s.size(), s.getStdDev()};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,19 +121,19 @@ public class StatsCommand implements CommandMarker {
|
|||||||
Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
|
Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
|
||||||
HashMap<String, Histogram> commitHistoMap = new HashMap<>();
|
HashMap<String, Histogram> commitHistoMap = new HashMap<>();
|
||||||
for (FileStatus fileStatus : statuses) {
|
for (FileStatus fileStatus : statuses) {
|
||||||
String commitTime = FSUtils.getCommitTime(fileStatus.getPath().getName());
|
String instantTime = FSUtils.getCommitTime(fileStatus.getPath().getName());
|
||||||
long sz = fileStatus.getLen();
|
long sz = fileStatus.getLen();
|
||||||
if (!commitHistoMap.containsKey(commitTime)) {
|
if (!commitHistoMap.containsKey(instantTime)) {
|
||||||
commitHistoMap.put(commitTime, new Histogram(new UniformReservoir(MAX_FILES)));
|
commitHistoMap.put(instantTime, new Histogram(new UniformReservoir(MAX_FILES)));
|
||||||
}
|
}
|
||||||
commitHistoMap.get(commitTime).update(sz);
|
commitHistoMap.get(instantTime).update(sz);
|
||||||
globalHistogram.update(sz);
|
globalHistogram.update(sz);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Comparable[]> rows = new ArrayList<>();
|
List<Comparable[]> rows = new ArrayList<>();
|
||||||
for (String commitTime : commitHistoMap.keySet()) {
|
for (String instantTime : commitHistoMap.keySet()) {
|
||||||
Snapshot s = commitHistoMap.get(commitTime).getSnapshot();
|
Snapshot s = commitHistoMap.get(instantTime).getSnapshot();
|
||||||
rows.add(printFileSizeHistogram(commitTime, s));
|
rows.add(printFileSizeHistogram(instantTime, s));
|
||||||
}
|
}
|
||||||
Snapshot s = globalHistogram.getSnapshot();
|
Snapshot s = globalHistogram.getSnapshot();
|
||||||
rows.add(printFileSizeHistogram("ALL", s));
|
rows.add(printFileSizeHistogram("ALL", s));
|
||||||
|
|||||||
@@ -147,11 +147,11 @@ class DedupeSparkJob(basePath: String,
|
|||||||
|
|
||||||
// 2. Remove duplicates from the bad files
|
// 2. Remove duplicates from the bad files
|
||||||
dupeFixPlan.foreach { case (fileName, keysToSkip) =>
|
dupeFixPlan.foreach { case (fileName, keysToSkip) =>
|
||||||
val commitTime = FSUtils.getCommitTime(fileNameToPathMap(fileName).getName)
|
val instantTime = FSUtils.getCommitTime(fileNameToPathMap(fileName).getName)
|
||||||
val badFilePath = new Path(s"$repairOutputPath/${fileNameToPathMap(fileName).getName}.bad")
|
val badFilePath = new Path(s"$repairOutputPath/${fileNameToPathMap(fileName).getName}.bad")
|
||||||
val newFilePath = new Path(s"$repairOutputPath/${fileNameToPathMap(fileName).getName}")
|
val newFilePath = new Path(s"$repairOutputPath/${fileNameToPathMap(fileName).getName}")
|
||||||
LOG.info(" Skipping and writing new file for : " + fileName)
|
LOG.info(" Skipping and writing new file for : " + fileName)
|
||||||
SparkHelpers.skipKeysAndWriteNewFile(commitTime, fs, badFilePath, newFilePath, dupeFixPlan(fileName))
|
SparkHelpers.skipKeysAndWriteNewFile(instantTime, fs, badFilePath, newFilePath, dupeFixPlan(fileName))
|
||||||
fs.delete(badFilePath, false)
|
fs.delete(badFilePath, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -38,14 +38,14 @@ import scala.collection.mutable._
|
|||||||
|
|
||||||
object SparkHelpers {
|
object SparkHelpers {
|
||||||
@throws[Exception]
|
@throws[Exception]
|
||||||
def skipKeysAndWriteNewFile(commitTime: String, fs: FileSystem, sourceFile: Path, destinationFile: Path, keysToSkip: Set[String]) {
|
def skipKeysAndWriteNewFile(instantTime: String, fs: FileSystem, sourceFile: Path, destinationFile: Path, keysToSkip: Set[String]) {
|
||||||
val sourceRecords = ParquetUtils.readAvroRecords(fs.getConf, sourceFile)
|
val sourceRecords = ParquetUtils.readAvroRecords(fs.getConf, sourceFile)
|
||||||
val schema: Schema = sourceRecords.get(0).getSchema
|
val schema: Schema = sourceRecords.get(0).getSchema
|
||||||
val filter: BloomFilter = BloomFilterFactory.createBloomFilter(HoodieIndexConfig.DEFAULT_BLOOM_FILTER_NUM_ENTRIES.toInt, HoodieIndexConfig.DEFAULT_BLOOM_FILTER_FPP.toDouble,
|
val filter: BloomFilter = BloomFilterFactory.createBloomFilter(HoodieIndexConfig.DEFAULT_BLOOM_FILTER_NUM_ENTRIES.toInt, HoodieIndexConfig.DEFAULT_BLOOM_FILTER_FPP.toDouble,
|
||||||
HoodieIndexConfig.DEFAULT_HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES.toInt, HoodieIndexConfig.DEFAULT_BLOOM_INDEX_FILTER_TYPE);
|
HoodieIndexConfig.DEFAULT_HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES.toInt, HoodieIndexConfig.DEFAULT_BLOOM_INDEX_FILTER_TYPE);
|
||||||
val writeSupport: HoodieAvroWriteSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter)
|
val writeSupport: HoodieAvroWriteSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter)
|
||||||
val parquetConfig: HoodieParquetConfig = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP, HoodieStorageConfig.DEFAULT_PARQUET_BLOCK_SIZE_BYTES.toInt, HoodieStorageConfig.DEFAULT_PARQUET_PAGE_SIZE_BYTES.toInt, HoodieStorageConfig.DEFAULT_PARQUET_FILE_MAX_BYTES.toInt, fs.getConf, HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO.toDouble)
|
val parquetConfig: HoodieParquetConfig = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP, HoodieStorageConfig.DEFAULT_PARQUET_BLOCK_SIZE_BYTES.toInt, HoodieStorageConfig.DEFAULT_PARQUET_PAGE_SIZE_BYTES.toInt, HoodieStorageConfig.DEFAULT_PARQUET_FILE_MAX_BYTES.toInt, fs.getConf, HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO.toDouble)
|
||||||
val writer = new HoodieParquetWriter[HoodieJsonPayload, IndexedRecord](commitTime, destinationFile, parquetConfig, schema)
|
val writer = new HoodieParquetWriter[HoodieJsonPayload, IndexedRecord](instantTime, destinationFile, parquetConfig, schema)
|
||||||
for (rec <- sourceRecords) {
|
for (rec <- sourceRecords) {
|
||||||
val key: String = rec.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString
|
val key: String = rec.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString
|
||||||
if (!keysToSkip.contains(key)) {
|
if (!keysToSkip.contains(key)) {
|
||||||
|
|||||||
@@ -96,23 +96,23 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload> e
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Commit changes performed at the given commitTime marker.
|
* Commit changes performed at the given instantTime marker.
|
||||||
*/
|
*/
|
||||||
public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses) {
|
public boolean commit(String instantTime, JavaRDD<WriteStatus> writeStatuses) {
|
||||||
return commit(commitTime, writeStatuses, Option.empty());
|
return commit(instantTime, writeStatuses, Option.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Commit changes performed at the given commitTime marker.
|
* Commit changes performed at the given instantTime marker.
|
||||||
*/
|
*/
|
||||||
public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses,
|
public boolean commit(String instantTime, JavaRDD<WriteStatus> writeStatuses,
|
||||||
Option<Map<String, String>> extraMetadata) {
|
Option<Map<String, String>> extraMetadata) {
|
||||||
HoodieTableMetaClient metaClient = createMetaClient(false);
|
HoodieTableMetaClient metaClient = createMetaClient(false);
|
||||||
return commit(commitTime, writeStatuses, extraMetadata, metaClient.getCommitActionType());
|
return commit(instantTime, writeStatuses, extraMetadata, metaClient.getCommitActionType());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> table,
|
protected JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> table,
|
||||||
String commitTime) {
|
String instantTime) {
|
||||||
// cache writeStatusRDD before updating index, so that all actions before this are not triggered again for future
|
// cache writeStatusRDD before updating index, so that all actions before this are not triggered again for future
|
||||||
// RDD actions that are performed after updating the index.
|
// RDD actions that are performed after updating the index.
|
||||||
writeStatusRDD = writeStatusRDD.persist(config.getWriteStatusStorageLevel());
|
writeStatusRDD = writeStatusRDD.persist(config.getWriteStatusStorageLevel());
|
||||||
@@ -121,26 +121,26 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload> e
|
|||||||
JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, jsc, table);
|
JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, jsc, table);
|
||||||
metrics.updateIndexMetrics(UPDATE_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
metrics.updateIndexMetrics(UPDATE_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
||||||
// Trigger the insert and collect statuses
|
// Trigger the insert and collect statuses
|
||||||
commitOnAutoCommit(commitTime, statuses, table.getMetaClient().getCommitActionType());
|
commitOnAutoCommit(instantTime, statuses, table.getMetaClient().getCommitActionType());
|
||||||
return statuses;
|
return statuses;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void commitOnAutoCommit(String commitTime, JavaRDD<WriteStatus> resultRDD, String actionType) {
|
protected void commitOnAutoCommit(String instantTime, JavaRDD<WriteStatus> resultRDD, String actionType) {
|
||||||
if (config.shouldAutoCommit()) {
|
if (config.shouldAutoCommit()) {
|
||||||
LOG.info("Auto commit enabled: Committing " + commitTime);
|
LOG.info("Auto commit enabled: Committing " + instantTime);
|
||||||
boolean commitResult = commit(commitTime, resultRDD, Option.empty(), actionType);
|
boolean commitResult = commit(instantTime, resultRDD, Option.empty(), actionType);
|
||||||
if (!commitResult) {
|
if (!commitResult) {
|
||||||
throw new HoodieCommitException("Failed to commit " + commitTime);
|
throw new HoodieCommitException("Failed to commit " + instantTime);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOG.info("Auto commit disabled for " + commitTime);
|
LOG.info("Auto commit disabled for " + instantTime);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses,
|
private boolean commit(String instantTime, JavaRDD<WriteStatus> writeStatuses,
|
||||||
Option<Map<String, String>> extraMetadata, String actionType) {
|
Option<Map<String, String>> extraMetadata, String actionType) {
|
||||||
|
|
||||||
LOG.info("Commiting " + commitTime);
|
LOG.info("Commiting " + instantTime);
|
||||||
// Create a Hoodie table which encapsulated the commits and files visible
|
// Create a Hoodie table which encapsulated the commits and files visible
|
||||||
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
||||||
|
|
||||||
@@ -152,7 +152,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload> e
|
|||||||
updateMetadataAndRollingStats(actionType, metadata, stats);
|
updateMetadataAndRollingStats(actionType, metadata, stats);
|
||||||
|
|
||||||
// Finalize write
|
// Finalize write
|
||||||
finalizeWrite(table, commitTime, stats);
|
finalizeWrite(table, instantTime, stats);
|
||||||
|
|
||||||
// add in extra metadata
|
// add in extra metadata
|
||||||
if (extraMetadata.isPresent()) {
|
if (extraMetadata.isPresent()) {
|
||||||
@@ -162,23 +162,23 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload> e
|
|||||||
metadata.setOperationType(operationType);
|
metadata.setOperationType(operationType);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
activeTimeline.saveAsComplete(new HoodieInstant(true, actionType, commitTime),
|
activeTimeline.saveAsComplete(new HoodieInstant(true, actionType, instantTime),
|
||||||
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||||
|
|
||||||
postCommit(metadata, commitTime, extraMetadata);
|
postCommit(metadata, instantTime, extraMetadata);
|
||||||
|
|
||||||
if (writeContext != null) {
|
if (writeContext != null) {
|
||||||
long durationInMs = metrics.getDurationInMs(writeContext.stop());
|
long durationInMs = metrics.getDurationInMs(writeContext.stop());
|
||||||
metrics.updateCommitMetrics(HoodieActiveTimeline.COMMIT_FORMATTER.parse(commitTime).getTime(), durationInMs,
|
metrics.updateCommitMetrics(HoodieActiveTimeline.COMMIT_FORMATTER.parse(instantTime).getTime(), durationInMs,
|
||||||
metadata, actionType);
|
metadata, actionType);
|
||||||
writeContext = null;
|
writeContext = null;
|
||||||
}
|
}
|
||||||
LOG.info("Committed " + commitTime);
|
LOG.info("Committed " + instantTime);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + commitTime,
|
throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + instantTime,
|
||||||
e);
|
e);
|
||||||
} catch (ParseException e) {
|
} catch (ParseException e) {
|
||||||
throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + commitTime
|
throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + instantTime
|
||||||
+ "Instant time is not of valid format", e);
|
+ "Instant time is not of valid format", e);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@@ -167,13 +167,13 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Upsert a batch of new records into Hoodie table at the supplied commitTime.
|
* Upsert a batch of new records into Hoodie table at the supplied instantTime.
|
||||||
*
|
*
|
||||||
* @param records JavaRDD of hoodieRecords to upsert
|
* @param records JavaRDD of hoodieRecords to upsert
|
||||||
* @param commitTime Instant time of the commit
|
* @param instantTime Instant time of the commit
|
||||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||||
*/
|
*/
|
||||||
public JavaRDD<WriteStatus> upsert(JavaRDD<HoodieRecord<T>> records, final String commitTime) {
|
public JavaRDD<WriteStatus> upsert(JavaRDD<HoodieRecord<T>> records, final String instantTime) {
|
||||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.UPSERT);
|
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.UPSERT);
|
||||||
setOperationType(WriteOperationType.UPSERT);
|
setOperationType(WriteOperationType.UPSERT);
|
||||||
try {
|
try {
|
||||||
@@ -185,34 +185,34 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
// perform index loop up to get existing location of records
|
// perform index loop up to get existing location of records
|
||||||
JavaRDD<HoodieRecord<T>> taggedRecords = getIndex().tagLocation(dedupedRecords, jsc, table);
|
JavaRDD<HoodieRecord<T>> taggedRecords = getIndex().tagLocation(dedupedRecords, jsc, table);
|
||||||
metrics.updateIndexMetrics(LOOKUP_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
metrics.updateIndexMetrics(LOOKUP_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
||||||
return upsertRecordsInternal(taggedRecords, commitTime, table, true);
|
return upsertRecordsInternal(taggedRecords, instantTime, table, true);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (e instanceof HoodieUpsertException) {
|
if (e instanceof HoodieUpsertException) {
|
||||||
throw (HoodieUpsertException) e;
|
throw (HoodieUpsertException) e;
|
||||||
}
|
}
|
||||||
throw new HoodieUpsertException("Failed to upsert for commit time " + commitTime, e);
|
throw new HoodieUpsertException("Failed to upsert for commit time " + instantTime, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Upserts the given prepared records into the Hoodie table, at the supplied commitTime.
|
* Upserts the given prepared records into the Hoodie table, at the supplied instantTime.
|
||||||
* <p>
|
* <p>
|
||||||
* This implementation requires that the input records are already tagged, and de-duped if needed.
|
* This implementation requires that the input records are already tagged, and de-duped if needed.
|
||||||
*
|
*
|
||||||
* @param preppedRecords Prepared HoodieRecords to upsert
|
* @param preppedRecords Prepared HoodieRecords to upsert
|
||||||
* @param commitTime Instant time of the commit
|
* @param instantTime Instant time of the commit
|
||||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||||
*/
|
*/
|
||||||
public JavaRDD<WriteStatus> upsertPreppedRecords(JavaRDD<HoodieRecord<T>> preppedRecords, final String commitTime) {
|
public JavaRDD<WriteStatus> upsertPreppedRecords(JavaRDD<HoodieRecord<T>> preppedRecords, final String instantTime) {
|
||||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.UPSERT_PREPPED);
|
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.UPSERT_PREPPED);
|
||||||
setOperationType(WriteOperationType.UPSERT_PREPPED);
|
setOperationType(WriteOperationType.UPSERT_PREPPED);
|
||||||
try {
|
try {
|
||||||
return upsertRecordsInternal(preppedRecords, commitTime, table, true);
|
return upsertRecordsInternal(preppedRecords, instantTime, table, true);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (e instanceof HoodieUpsertException) {
|
if (e instanceof HoodieUpsertException) {
|
||||||
throw (HoodieUpsertException) e;
|
throw (HoodieUpsertException) e;
|
||||||
}
|
}
|
||||||
throw new HoodieUpsertException("Failed to upsert prepared records for commit time " + commitTime, e);
|
throw new HoodieUpsertException("Failed to upsert prepared records for commit time " + instantTime, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -223,10 +223,10 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
* alignment, as with upsert(), by profiling the workload
|
* alignment, as with upsert(), by profiling the workload
|
||||||
*
|
*
|
||||||
* @param records HoodieRecords to insert
|
* @param records HoodieRecords to insert
|
||||||
* @param commitTime Instant time of the commit
|
* @param instantTime Instant time of the commit
|
||||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||||
*/
|
*/
|
||||||
public JavaRDD<WriteStatus> insert(JavaRDD<HoodieRecord<T>> records, final String commitTime) {
|
public JavaRDD<WriteStatus> insert(JavaRDD<HoodieRecord<T>> records, final String instantTime) {
|
||||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.INSERT);
|
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.INSERT);
|
||||||
setOperationType(WriteOperationType.INSERT);
|
setOperationType(WriteOperationType.INSERT);
|
||||||
try {
|
try {
|
||||||
@@ -234,36 +234,36 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
JavaRDD<HoodieRecord<T>> dedupedRecords =
|
JavaRDD<HoodieRecord<T>> dedupedRecords =
|
||||||
combineOnCondition(config.shouldCombineBeforeInsert(), records, config.getInsertShuffleParallelism());
|
combineOnCondition(config.shouldCombineBeforeInsert(), records, config.getInsertShuffleParallelism());
|
||||||
|
|
||||||
return upsertRecordsInternal(dedupedRecords, commitTime, table, false);
|
return upsertRecordsInternal(dedupedRecords, instantTime, table, false);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (e instanceof HoodieInsertException) {
|
if (e instanceof HoodieInsertException) {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
throw new HoodieInsertException("Failed to insert for commit time " + commitTime, e);
|
throw new HoodieInsertException("Failed to insert for commit time " + instantTime, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Inserts the given prepared records into the Hoodie table, at the supplied commitTime.
|
* Inserts the given prepared records into the Hoodie table, at the supplied instantTime.
|
||||||
* <p>
|
* <p>
|
||||||
* This implementation skips the index check, skips de-duping and is able to leverage benefits such as small file
|
* This implementation skips the index check, skips de-duping and is able to leverage benefits such as small file
|
||||||
* handling/blocking alignment, as with insert(), by profiling the workload. The prepared HoodieRecords should be
|
* handling/blocking alignment, as with insert(), by profiling the workload. The prepared HoodieRecords should be
|
||||||
* de-duped if needed.
|
* de-duped if needed.
|
||||||
*
|
*
|
||||||
* @param preppedRecords HoodieRecords to insert
|
* @param preppedRecords HoodieRecords to insert
|
||||||
* @param commitTime Instant time of the commit
|
* @param instantTime Instant time of the commit
|
||||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||||
*/
|
*/
|
||||||
public JavaRDD<WriteStatus> insertPreppedRecords(JavaRDD<HoodieRecord<T>> preppedRecords, final String commitTime) {
|
public JavaRDD<WriteStatus> insertPreppedRecords(JavaRDD<HoodieRecord<T>> preppedRecords, final String instantTime) {
|
||||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.INSERT_PREPPED);
|
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.INSERT_PREPPED);
|
||||||
setOperationType(WriteOperationType.INSERT_PREPPED);
|
setOperationType(WriteOperationType.INSERT_PREPPED);
|
||||||
try {
|
try {
|
||||||
return upsertRecordsInternal(preppedRecords, commitTime, table, false);
|
return upsertRecordsInternal(preppedRecords, instantTime, table, false);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (e instanceof HoodieInsertException) {
|
if (e instanceof HoodieInsertException) {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
throw new HoodieInsertException("Failed to insert prepared records for commit time " + commitTime, e);
|
throw new HoodieInsertException("Failed to insert prepared records for commit time " + instantTime, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -275,11 +275,11 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
* the numbers of files with less memory compared to the {@link HoodieWriteClient#insert(JavaRDD, String)}
|
* the numbers of files with less memory compared to the {@link HoodieWriteClient#insert(JavaRDD, String)}
|
||||||
*
|
*
|
||||||
* @param records HoodieRecords to insert
|
* @param records HoodieRecords to insert
|
||||||
* @param commitTime Instant time of the commit
|
* @param instantTime Instant time of the commit
|
||||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||||
*/
|
*/
|
||||||
public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final String commitTime) {
|
public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final String instantTime) {
|
||||||
return bulkInsert(records, commitTime, Option.empty());
|
return bulkInsert(records, instantTime, Option.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -292,12 +292,12 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
* {@link UserDefinedBulkInsertPartitioner}.
|
* {@link UserDefinedBulkInsertPartitioner}.
|
||||||
*
|
*
|
||||||
* @param records HoodieRecords to insert
|
* @param records HoodieRecords to insert
|
||||||
* @param commitTime Instant time of the commit
|
* @param instantTime Instant time of the commit
|
||||||
* @param bulkInsertPartitioner If specified then it will be used to partition input records before they are inserted
|
* @param bulkInsertPartitioner If specified then it will be used to partition input records before they are inserted
|
||||||
* into hoodie.
|
* into hoodie.
|
||||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||||
*/
|
*/
|
||||||
public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final String commitTime,
|
public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final String instantTime,
|
||||||
Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
|
Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
|
||||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.BULK_INSERT);
|
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.BULK_INSERT);
|
||||||
setOperationType(WriteOperationType.BULK_INSERT);
|
setOperationType(WriteOperationType.BULK_INSERT);
|
||||||
@@ -306,12 +306,12 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
JavaRDD<HoodieRecord<T>> dedupedRecords =
|
JavaRDD<HoodieRecord<T>> dedupedRecords =
|
||||||
combineOnCondition(config.shouldCombineBeforeInsert(), records, config.getInsertShuffleParallelism());
|
combineOnCondition(config.shouldCombineBeforeInsert(), records, config.getInsertShuffleParallelism());
|
||||||
|
|
||||||
return bulkInsertInternal(dedupedRecords, commitTime, table, bulkInsertPartitioner);
|
return bulkInsertInternal(dedupedRecords, instantTime, table, bulkInsertPartitioner);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (e instanceof HoodieInsertException) {
|
if (e instanceof HoodieInsertException) {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
throw new HoodieInsertException("Failed to bulk insert for commit time " + commitTime, e);
|
throw new HoodieInsertException("Failed to bulk insert for commit time " + instantTime, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -326,34 +326,34 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
* {@link UserDefinedBulkInsertPartitioner}.
|
* {@link UserDefinedBulkInsertPartitioner}.
|
||||||
*
|
*
|
||||||
* @param preppedRecords HoodieRecords to insert
|
* @param preppedRecords HoodieRecords to insert
|
||||||
* @param commitTime Instant time of the commit
|
* @param instantTime Instant time of the commit
|
||||||
* @param bulkInsertPartitioner If specified then it will be used to partition input records before they are inserted
|
* @param bulkInsertPartitioner If specified then it will be used to partition input records before they are inserted
|
||||||
* into hoodie.
|
* into hoodie.
|
||||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||||
*/
|
*/
|
||||||
public JavaRDD<WriteStatus> bulkInsertPreppedRecords(JavaRDD<HoodieRecord<T>> preppedRecords, final String commitTime,
|
public JavaRDD<WriteStatus> bulkInsertPreppedRecords(JavaRDD<HoodieRecord<T>> preppedRecords, final String instantTime,
|
||||||
Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
|
Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
|
||||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.BULK_INSERT_PREPPED);
|
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.BULK_INSERT_PREPPED);
|
||||||
setOperationType(WriteOperationType.BULK_INSERT_PREPPED);
|
setOperationType(WriteOperationType.BULK_INSERT_PREPPED);
|
||||||
try {
|
try {
|
||||||
return bulkInsertInternal(preppedRecords, commitTime, table, bulkInsertPartitioner);
|
return bulkInsertInternal(preppedRecords, instantTime, table, bulkInsertPartitioner);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (e instanceof HoodieInsertException) {
|
if (e instanceof HoodieInsertException) {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
throw new HoodieInsertException("Failed to bulk insert prepared records for commit time " + commitTime, e);
|
throw new HoodieInsertException("Failed to bulk insert prepared records for commit time " + instantTime, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deletes a list of {@link HoodieKey}s from the Hoodie table, at the supplied commitTime {@link HoodieKey}s will be
|
* Deletes a list of {@link HoodieKey}s from the Hoodie table, at the supplied instantTime {@link HoodieKey}s will be
|
||||||
* de-duped and non existent keys will be removed before deleting.
|
* de-duped and non existent keys will be removed before deleting.
|
||||||
*
|
*
|
||||||
* @param keys {@link List} of {@link HoodieKey}s to be deleted
|
* @param keys {@link List} of {@link HoodieKey}s to be deleted
|
||||||
* @param commitTime Commit time handle
|
* @param instantTime Commit time handle
|
||||||
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
* @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
|
||||||
*/
|
*/
|
||||||
public JavaRDD<WriteStatus> delete(JavaRDD<HoodieKey> keys, final String commitTime) {
|
public JavaRDD<WriteStatus> delete(JavaRDD<HoodieKey> keys, final String instantTime) {
|
||||||
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.DELETE);
|
HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.DELETE);
|
||||||
setOperationType(WriteOperationType.DELETE);
|
setOperationType(WriteOperationType.DELETE);
|
||||||
try {
|
try {
|
||||||
@@ -370,23 +370,23 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
JavaRDD<HoodieRecord<T>> taggedValidRecords = taggedRecords.filter(HoodieRecord::isCurrentLocationKnown);
|
JavaRDD<HoodieRecord<T>> taggedValidRecords = taggedRecords.filter(HoodieRecord::isCurrentLocationKnown);
|
||||||
if (!taggedValidRecords.isEmpty()) {
|
if (!taggedValidRecords.isEmpty()) {
|
||||||
metrics.updateIndexMetrics(LOOKUP_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
metrics.updateIndexMetrics(LOOKUP_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
||||||
return upsertRecordsInternal(taggedValidRecords, commitTime, table, true);
|
return upsertRecordsInternal(taggedValidRecords, instantTime, table, true);
|
||||||
} else {
|
} else {
|
||||||
// if entire set of keys are non existent
|
// if entire set of keys are non existent
|
||||||
saveWorkloadProfileMetadataToInflight(new WorkloadProfile(jsc.emptyRDD()), table, commitTime);
|
saveWorkloadProfileMetadataToInflight(new WorkloadProfile(jsc.emptyRDD()), table, instantTime);
|
||||||
JavaRDD<WriteStatus> writeStatusRDD = jsc.emptyRDD();
|
JavaRDD<WriteStatus> writeStatusRDD = jsc.emptyRDD();
|
||||||
commitOnAutoCommit(commitTime, writeStatusRDD, table.getMetaClient().getCommitActionType());
|
commitOnAutoCommit(instantTime, writeStatusRDD, table.getMetaClient().getCommitActionType());
|
||||||
return writeStatusRDD;
|
return writeStatusRDD;
|
||||||
}
|
}
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (e instanceof HoodieUpsertException) {
|
if (e instanceof HoodieUpsertException) {
|
||||||
throw (HoodieUpsertException) e;
|
throw (HoodieUpsertException) e;
|
||||||
}
|
}
|
||||||
throw new HoodieUpsertException("Failed to delete for commit time " + commitTime, e);
|
throw new HoodieUpsertException("Failed to delete for commit time " + instantTime, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private JavaRDD<WriteStatus> bulkInsertInternal(JavaRDD<HoodieRecord<T>> dedupedRecords, String commitTime,
|
private JavaRDD<WriteStatus> bulkInsertInternal(JavaRDD<HoodieRecord<T>> dedupedRecords, String instantTime,
|
||||||
HoodieTable<T> table, Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
|
HoodieTable<T> table, Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
|
||||||
final JavaRDD<HoodieRecord<T>> repartitionedRecords;
|
final JavaRDD<HoodieRecord<T>> repartitionedRecords;
|
||||||
final int parallelism = config.getBulkInsertShuffleParallelism();
|
final int parallelism = config.getBulkInsertShuffleParallelism();
|
||||||
@@ -407,13 +407,13 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
IntStream.range(0, parallelism).mapToObj(i -> FSUtils.createNewFileIdPfx()).collect(Collectors.toList());
|
IntStream.range(0, parallelism).mapToObj(i -> FSUtils.createNewFileIdPfx()).collect(Collectors.toList());
|
||||||
|
|
||||||
table.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED,
|
table.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED,
|
||||||
table.getMetaClient().getCommitActionType(), commitTime), Option.empty());
|
table.getMetaClient().getCommitActionType(), instantTime), Option.empty());
|
||||||
|
|
||||||
JavaRDD<WriteStatus> writeStatusRDD = repartitionedRecords
|
JavaRDD<WriteStatus> writeStatusRDD = repartitionedRecords
|
||||||
.mapPartitionsWithIndex(new BulkInsertMapFunction<T>(commitTime, config, table, fileIDPrefixes), true)
|
.mapPartitionsWithIndex(new BulkInsertMapFunction<T>(instantTime, config, table, fileIDPrefixes), true)
|
||||||
.flatMap(List::iterator);
|
.flatMap(List::iterator);
|
||||||
|
|
||||||
return updateIndexAndCommitIfNeeded(writeStatusRDD, table, commitTime);
|
return updateIndexAndCommitIfNeeded(writeStatusRDD, table, instantTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
private JavaRDD<HoodieRecord<T>> combineOnCondition(boolean condition, JavaRDD<HoodieRecord<T>> records,
|
private JavaRDD<HoodieRecord<T>> combineOnCondition(boolean condition, JavaRDD<HoodieRecord<T>> records,
|
||||||
@@ -427,7 +427,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
* are unknown across batches Inserts (which are new parquet files) are rolled back based on commit time. // TODO :
|
* are unknown across batches Inserts (which are new parquet files) are rolled back based on commit time. // TODO :
|
||||||
* Create a new WorkloadProfile metadata file instead of using HoodieCommitMetadata
|
* Create a new WorkloadProfile metadata file instead of using HoodieCommitMetadata
|
||||||
*/
|
*/
|
||||||
private void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, HoodieTable<T> table, String commitTime)
|
private void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, HoodieTable<T> table, String instantTime)
|
||||||
throws HoodieCommitException {
|
throws HoodieCommitException {
|
||||||
try {
|
try {
|
||||||
HoodieCommitMetadata metadata = new HoodieCommitMetadata();
|
HoodieCommitMetadata metadata = new HoodieCommitMetadata();
|
||||||
@@ -446,15 +446,15 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
|
|
||||||
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
||||||
String commitActionType = table.getMetaClient().getCommitActionType();
|
String commitActionType = table.getMetaClient().getCommitActionType();
|
||||||
HoodieInstant requested = new HoodieInstant(State.REQUESTED, commitActionType, commitTime);
|
HoodieInstant requested = new HoodieInstant(State.REQUESTED, commitActionType, instantTime);
|
||||||
activeTimeline.transitionRequestedToInflight(requested,
|
activeTimeline.transitionRequestedToInflight(requested,
|
||||||
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||||
} catch (IOException io) {
|
} catch (IOException io) {
|
||||||
throw new HoodieCommitException("Failed to commit " + commitTime + " unable to save inflight metadata ", io);
|
throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", io);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private JavaRDD<WriteStatus> upsertRecordsInternal(JavaRDD<HoodieRecord<T>> preppedRecords, String commitTime,
|
private JavaRDD<WriteStatus> upsertRecordsInternal(JavaRDD<HoodieRecord<T>> preppedRecords, String instantTime,
|
||||||
HoodieTable<T> hoodieTable, final boolean isUpsert) {
|
HoodieTable<T> hoodieTable, final boolean isUpsert) {
|
||||||
|
|
||||||
// Cache the tagged records, so we don't end up computing both
|
// Cache the tagged records, so we don't end up computing both
|
||||||
@@ -469,7 +469,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
if (hoodieTable.isWorkloadProfileNeeded()) {
|
if (hoodieTable.isWorkloadProfileNeeded()) {
|
||||||
profile = new WorkloadProfile(preppedRecords);
|
profile = new WorkloadProfile(preppedRecords);
|
||||||
LOG.info("Workload profile :" + profile);
|
LOG.info("Workload profile :" + profile);
|
||||||
saveWorkloadProfileMetadataToInflight(profile, hoodieTable, commitTime);
|
saveWorkloadProfileMetadataToInflight(profile, hoodieTable, instantTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
// partition using the insert partitioner
|
// partition using the insert partitioner
|
||||||
@@ -477,13 +477,13 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
JavaRDD<HoodieRecord<T>> partitionedRecords = partition(preppedRecords, partitioner);
|
JavaRDD<HoodieRecord<T>> partitionedRecords = partition(preppedRecords, partitioner);
|
||||||
JavaRDD<WriteStatus> writeStatusRDD = partitionedRecords.mapPartitionsWithIndex((partition, recordItr) -> {
|
JavaRDD<WriteStatus> writeStatusRDD = partitionedRecords.mapPartitionsWithIndex((partition, recordItr) -> {
|
||||||
if (isUpsert) {
|
if (isUpsert) {
|
||||||
return hoodieTable.handleUpsertPartition(commitTime, partition, recordItr, partitioner);
|
return hoodieTable.handleUpsertPartition(instantTime, partition, recordItr, partitioner);
|
||||||
} else {
|
} else {
|
||||||
return hoodieTable.handleInsertPartition(commitTime, partition, recordItr, partitioner);
|
return hoodieTable.handleInsertPartition(instantTime, partition, recordItr, partitioner);
|
||||||
}
|
}
|
||||||
}, true).flatMap(List::iterator);
|
}, true).flatMap(List::iterator);
|
||||||
|
|
||||||
return updateIndexAndCommitIfNeeded(writeStatusRDD, hoodieTable, commitTime);
|
return updateIndexAndCommitIfNeeded(writeStatusRDD, hoodieTable, instantTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Partitioner getPartitioner(HoodieTable table, boolean isUpsert, WorkloadProfile profile) {
|
private Partitioner getPartitioner(HoodieTable table, boolean isUpsert, WorkloadProfile profile) {
|
||||||
@@ -551,7 +551,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Savepoint a specific commit. Latest version of data files as of the passed in commitTime will be referenced in the
|
* Savepoint a specific commit. Latest version of data files as of the passed in instantTime will be referenced in the
|
||||||
* savepoint and will never be cleaned. The savepointed commit will never be rolledback or archived.
|
* savepoint and will never be cleaned. The savepointed commit will never be rolledback or archived.
|
||||||
* <p>
|
* <p>
|
||||||
* This gives an option to rollback the state to the savepoint anytime. Savepoint needs to be manually created and
|
* This gives an option to rollback the state to the savepoint anytime. Savepoint needs to be manually created and
|
||||||
@@ -559,19 +559,19 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
* <p>
|
* <p>
|
||||||
* Savepoint should be on a commit that could not have been cleaned.
|
* Savepoint should be on a commit that could not have been cleaned.
|
||||||
*
|
*
|
||||||
* @param commitTime - commit that should be savepointed
|
* @param instantTime - commit that should be savepointed
|
||||||
* @param user - User creating the savepoint
|
* @param user - User creating the savepoint
|
||||||
* @param comment - Comment for the savepoint
|
* @param comment - Comment for the savepoint
|
||||||
* @return true if the savepoint was created successfully
|
* @return true if the savepoint was created successfully
|
||||||
*/
|
*/
|
||||||
public boolean savepoint(String commitTime, String user, String comment) {
|
public boolean savepoint(String instantTime, String user, String comment) {
|
||||||
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
||||||
if (table.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) {
|
if (table.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) {
|
||||||
throw new UnsupportedOperationException("Savepointing is not supported or MergeOnRead table types");
|
throw new UnsupportedOperationException("Savepointing is not supported or MergeOnRead table types");
|
||||||
}
|
}
|
||||||
Option<HoodieInstant> cleanInstant = table.getCompletedCleanTimeline().lastInstant();
|
Option<HoodieInstant> cleanInstant = table.getCompletedCleanTimeline().lastInstant();
|
||||||
|
|
||||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
|
||||||
if (!table.getCompletedCommitsTimeline().containsInstant(commitInstant)) {
|
if (!table.getCompletedCommitsTimeline().containsInstant(commitInstant)) {
|
||||||
throw new HoodieSavepointException("Could not savepoint non-existing commit " + commitInstant);
|
throw new HoodieSavepointException("Could not savepoint non-existing commit " + commitInstant);
|
||||||
}
|
}
|
||||||
@@ -589,8 +589,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
|
|
||||||
// Cannot allow savepoint time on a commit that could have been cleaned
|
// Cannot allow savepoint time on a commit that could have been cleaned
|
||||||
ValidationUtils.checkArgument(
|
ValidationUtils.checkArgument(
|
||||||
HoodieTimeline.compareTimestamps(commitTime, lastCommitRetained, HoodieTimeline.GREATER_OR_EQUAL),
|
HoodieTimeline.compareTimestamps(instantTime, lastCommitRetained, HoodieTimeline.GREATER_OR_EQUAL),
|
||||||
"Could not savepoint commit " + commitTime + " as this is beyond the lookup window " + lastCommitRetained);
|
"Could not savepoint commit " + instantTime + " as this is beyond the lookup window " + lastCommitRetained);
|
||||||
|
|
||||||
Map<String, List<String>> latestFilesMap = jsc
|
Map<String, List<String>> latestFilesMap = jsc
|
||||||
.parallelize(FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath(),
|
.parallelize(FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath(),
|
||||||
@@ -599,7 +599,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
// Scan all partitions files with this commit time
|
// Scan all partitions files with this commit time
|
||||||
LOG.info("Collecting latest files in partition path " + partitionPath);
|
LOG.info("Collecting latest files in partition path " + partitionPath);
|
||||||
BaseFileOnlyView view = table.getBaseFileOnlyView();
|
BaseFileOnlyView view = table.getBaseFileOnlyView();
|
||||||
List<String> latestFiles = view.getLatestBaseFilesBeforeOrOn(partitionPath, commitTime)
|
List<String> latestFiles = view.getLatestBaseFilesBeforeOrOn(partitionPath, instantTime)
|
||||||
.map(HoodieBaseFile::getFileName).collect(Collectors.toList());
|
.map(HoodieBaseFile::getFileName).collect(Collectors.toList());
|
||||||
return new Tuple2<>(partitionPath, latestFiles);
|
return new Tuple2<>(partitionPath, latestFiles);
|
||||||
}).collectAsMap();
|
}).collectAsMap();
|
||||||
@@ -607,14 +607,14 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
HoodieSavepointMetadata metadata = AvroUtils.convertSavepointMetadata(user, comment, latestFilesMap);
|
HoodieSavepointMetadata metadata = AvroUtils.convertSavepointMetadata(user, comment, latestFilesMap);
|
||||||
// Nothing to save in the savepoint
|
// Nothing to save in the savepoint
|
||||||
table.getActiveTimeline().createNewInstant(
|
table.getActiveTimeline().createNewInstant(
|
||||||
new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, commitTime));
|
new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, instantTime));
|
||||||
table.getActiveTimeline()
|
table.getActiveTimeline()
|
||||||
.saveAsComplete(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, commitTime),
|
.saveAsComplete(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, instantTime),
|
||||||
AvroUtils.serializeSavepointMetadata(metadata));
|
AvroUtils.serializeSavepointMetadata(metadata));
|
||||||
LOG.info("Savepoint " + commitTime + " created");
|
LOG.info("Savepoint " + instantTime + " created");
|
||||||
return true;
|
return true;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new HoodieSavepointException("Failed to savepoint " + commitTime, e);
|
throw new HoodieSavepointException("Failed to savepoint " + instantTime, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -687,15 +687,15 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
// to be running. Rollback to savepoint also removes any pending compaction actions that are generated after
|
// to be running. Rollback to savepoint also removes any pending compaction actions that are generated after
|
||||||
// savepoint time. Allowing pending compaction to be retained is not safe as those workload could be referencing
|
// savepoint time. Allowing pending compaction to be retained is not safe as those workload could be referencing
|
||||||
// file-slices that will be rolled-back as part of this operation
|
// file-slices that will be rolled-back as part of this operation
|
||||||
HoodieTimeline commitTimeline = table.getMetaClient().getCommitsAndCompactionTimeline();
|
HoodieTimeline instantTimeline = table.getMetaClient().getCommitsAndCompactionTimeline();
|
||||||
|
|
||||||
HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, savepointTime);
|
HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, savepointTime);
|
||||||
boolean isSavepointPresent = table.getCompletedSavepointTimeline().containsInstant(savePoint);
|
boolean isSavepointPresent = table.getCompletedSavepointTimeline().containsInstant(savePoint);
|
||||||
if (!isSavepointPresent) {
|
if (!isSavepointPresent) {
|
||||||
throw new HoodieRollbackException("No savepoint for commitTime " + savepointTime);
|
throw new HoodieRollbackException("No savepoint for instantTime " + savepointTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<String> commitsToRollback = commitTimeline.findInstantsAfter(savepointTime, Integer.MAX_VALUE).getInstants()
|
List<String> commitsToRollback = instantTimeline.findInstantsAfter(savepointTime, Integer.MAX_VALUE).getInstants()
|
||||||
.map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
.map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||||
LOG.info("Rolling back commits " + commitsToRollback);
|
LOG.info("Rolling back commits " + commitsToRollback);
|
||||||
|
|
||||||
@@ -716,11 +716,11 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
* this commit (2) clean indexing data, (3) clean new generated parquet files. (4) Finally delete .commit or .inflight
|
* this commit (2) clean indexing data, (3) clean new generated parquet files. (4) Finally delete .commit or .inflight
|
||||||
* file.
|
* file.
|
||||||
*
|
*
|
||||||
* @param commitTime Instant time of the commit
|
* @param instantTime Instant time of the commit
|
||||||
* @return {@code true} If rollback the record changes successfully. {@code false} otherwise
|
* @return {@code true} If rollback the record changes successfully. {@code false} otherwise
|
||||||
*/
|
*/
|
||||||
public boolean rollback(final String commitTime) throws HoodieRollbackException {
|
public boolean rollback(final String instantTime) throws HoodieRollbackException {
|
||||||
rollbackInternal(commitTime);
|
rollbackInternal(instantTime);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -852,9 +852,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
// Only rollback pending commit/delta-commits. Do not touch compaction commits
|
// Only rollback pending commit/delta-commits. Do not touch compaction commits
|
||||||
rollbackPendingCommits();
|
rollbackPendingCommits();
|
||||||
}
|
}
|
||||||
String commitTime = HoodieActiveTimeline.createNewInstantTime();
|
String instantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||||
startCommit(commitTime);
|
startCommit(instantTime);
|
||||||
return commitTime;
|
return instantTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -35,14 +35,14 @@ import java.util.List;
|
|||||||
public class BulkInsertMapFunction<T extends HoodieRecordPayload>
|
public class BulkInsertMapFunction<T extends HoodieRecordPayload>
|
||||||
implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<List<WriteStatus>>> {
|
implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<List<WriteStatus>>> {
|
||||||
|
|
||||||
private String commitTime;
|
private String instantTime;
|
||||||
private HoodieWriteConfig config;
|
private HoodieWriteConfig config;
|
||||||
private HoodieTable<T> hoodieTable;
|
private HoodieTable<T> hoodieTable;
|
||||||
private List<String> fileIDPrefixes;
|
private List<String> fileIDPrefixes;
|
||||||
|
|
||||||
public BulkInsertMapFunction(String commitTime, HoodieWriteConfig config, HoodieTable<T> hoodieTable,
|
public BulkInsertMapFunction(String instantTime, HoodieWriteConfig config, HoodieTable<T> hoodieTable,
|
||||||
List<String> fileIDPrefixes) {
|
List<String> fileIDPrefixes) {
|
||||||
this.commitTime = commitTime;
|
this.instantTime = instantTime;
|
||||||
this.config = config;
|
this.config = config;
|
||||||
this.hoodieTable = hoodieTable;
|
this.hoodieTable = hoodieTable;
|
||||||
this.fileIDPrefixes = fileIDPrefixes;
|
this.fileIDPrefixes = fileIDPrefixes;
|
||||||
@@ -50,7 +50,7 @@ public class BulkInsertMapFunction<T extends HoodieRecordPayload>
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<List<WriteStatus>> call(Integer partition, Iterator<HoodieRecord<T>> sortedRecordItr) {
|
public Iterator<List<WriteStatus>> call(Integer partition, Iterator<HoodieRecord<T>> sortedRecordItr) {
|
||||||
return new CopyOnWriteLazyInsertIterable<>(sortedRecordItr, config, commitTime, hoodieTable,
|
return new CopyOnWriteLazyInsertIterable<>(sortedRecordItr, config, instantTime, hoodieTable,
|
||||||
fileIDPrefixes.get(partition));
|
fileIDPrefixes.get(partition));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,16 +46,16 @@ public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload>
|
|||||||
extends LazyIterableIterator<HoodieRecord<T>, List<WriteStatus>> {
|
extends LazyIterableIterator<HoodieRecord<T>, List<WriteStatus>> {
|
||||||
|
|
||||||
protected final HoodieWriteConfig hoodieConfig;
|
protected final HoodieWriteConfig hoodieConfig;
|
||||||
protected final String commitTime;
|
protected final String instantTime;
|
||||||
protected final HoodieTable<T> hoodieTable;
|
protected final HoodieTable<T> hoodieTable;
|
||||||
protected final String idPrefix;
|
protected final String idPrefix;
|
||||||
protected int numFilesWritten;
|
protected int numFilesWritten;
|
||||||
|
|
||||||
public CopyOnWriteLazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
|
public CopyOnWriteLazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
|
||||||
String commitTime, HoodieTable<T> hoodieTable, String idPrefix) {
|
String instantTime, HoodieTable<T> hoodieTable, String idPrefix) {
|
||||||
super(sortedRecordItr);
|
super(sortedRecordItr);
|
||||||
this.hoodieConfig = config;
|
this.hoodieConfig = config;
|
||||||
this.commitTime = commitTime;
|
this.instantTime = instantTime;
|
||||||
this.hoodieTable = hoodieTable;
|
this.hoodieTable = hoodieTable;
|
||||||
this.idPrefix = idPrefix;
|
this.idPrefix = idPrefix;
|
||||||
this.numFilesWritten = 0;
|
this.numFilesWritten = 0;
|
||||||
@@ -136,7 +136,7 @@ public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload>
|
|||||||
final HoodieRecord insertPayload = payload.record;
|
final HoodieRecord insertPayload = payload.record;
|
||||||
// lazily initialize the handle, for the first time
|
// lazily initialize the handle, for the first time
|
||||||
if (handle == null) {
|
if (handle == null) {
|
||||||
handle = new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable, insertPayload.getPartitionPath(),
|
handle = new HoodieCreateHandle(hoodieConfig, instantTime, hoodieTable, insertPayload.getPartitionPath(),
|
||||||
getNextFileId(idPrefix));
|
getNextFileId(idPrefix));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -147,7 +147,7 @@ public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload>
|
|||||||
// handle is full.
|
// handle is full.
|
||||||
statuses.add(handle.close());
|
statuses.add(handle.close());
|
||||||
// Need to handle the rejected payload & open new handle
|
// Need to handle the rejected payload & open new handle
|
||||||
handle = new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable, insertPayload.getPartitionPath(),
|
handle = new HoodieCreateHandle(hoodieConfig, instantTime, hoodieTable, insertPayload.getPartitionPath(),
|
||||||
getNextFileId(idPrefix));
|
getNextFileId(idPrefix));
|
||||||
handle.write(insertPayload, payload.insertValue, payload.exception); // we should be able to write 1 payload.
|
handle.write(insertPayload, payload.insertValue, payload.exception); // we should be able to write 1 payload.
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -35,8 +35,8 @@ import java.util.List;
|
|||||||
public class MergeOnReadLazyInsertIterable<T extends HoodieRecordPayload> extends CopyOnWriteLazyInsertIterable<T> {
|
public class MergeOnReadLazyInsertIterable<T extends HoodieRecordPayload> extends CopyOnWriteLazyInsertIterable<T> {
|
||||||
|
|
||||||
public MergeOnReadLazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
|
public MergeOnReadLazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
|
||||||
String commitTime, HoodieTable<T> hoodieTable, String idPfx) {
|
String instantTime, HoodieTable<T> hoodieTable, String idPfx) {
|
||||||
super(sortedRecordItr, config, commitTime, hoodieTable, idPfx);
|
super(sortedRecordItr, config, instantTime, hoodieTable, idPfx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -52,7 +52,7 @@ public class MergeOnReadLazyInsertIterable<T extends HoodieRecordPayload> extend
|
|||||||
List<WriteStatus> statuses = new ArrayList<>();
|
List<WriteStatus> statuses = new ArrayList<>();
|
||||||
// lazily initialize the handle, for the first time
|
// lazily initialize the handle, for the first time
|
||||||
if (handle == null) {
|
if (handle == null) {
|
||||||
handle = new HoodieAppendHandle(hoodieConfig, commitTime, hoodieTable,
|
handle = new HoodieAppendHandle(hoodieConfig, instantTime, hoodieTable,
|
||||||
insertPayload.getPartitionPath(), getNextFileId(idPrefix));
|
insertPayload.getPartitionPath(), getNextFileId(idPrefix));
|
||||||
}
|
}
|
||||||
if (handle.canWrite(insertPayload)) {
|
if (handle.canWrite(insertPayload)) {
|
||||||
@@ -63,7 +63,7 @@ public class MergeOnReadLazyInsertIterable<T extends HoodieRecordPayload> extend
|
|||||||
handle.close();
|
handle.close();
|
||||||
statuses.add(handle.getWriteStatus());
|
statuses.add(handle.getWriteStatus());
|
||||||
// Need to handle the rejected payload & open new handle
|
// Need to handle the rejected payload & open new handle
|
||||||
handle = new HoodieAppendHandle(hoodieConfig, commitTime, hoodieTable,
|
handle = new HoodieAppendHandle(hoodieConfig, instantTime, hoodieTable,
|
||||||
insertPayload.getPartitionPath(), getNextFileId(idPrefix));
|
insertPayload.getPartitionPath(), getNextFileId(idPrefix));
|
||||||
handle.write(insertPayload, payload.insertValue, payload.exception); // we should be able to write 1 payload.
|
handle.write(insertPayload, payload.insertValue, payload.exception); // we should be able to write 1 payload.
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -98,9 +98,9 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
|
|||||||
HoodieTable<T> hoodieTable) throws HoodieIndexException;
|
HoodieTable<T> hoodieTable) throws HoodieIndexException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rollback the efffects of the commit made at commitTime.
|
* Rollback the efffects of the commit made at instantTime.
|
||||||
*/
|
*/
|
||||||
public abstract boolean rollbackCommit(String commitTime);
|
public abstract boolean rollbackCommit(String instantTime);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the `partitionPath`. Such an
|
* An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the `partitionPath`. Such an
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ public class InMemoryHashIndex<T extends HoodieRecordPayload> extends HoodieInde
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean rollbackCommit(String commitTime) {
|
public boolean rollbackCommit(String instantTime) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -223,7 +223,7 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean rollbackCommit(String commitTime) {
|
public boolean rollbackCommit(String instantTime) {
|
||||||
// Nope, don't need to do anything.
|
// Nope, don't need to do anything.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -465,7 +465,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean rollbackCommit(String commitTime) {
|
public boolean rollbackCommit(String instantTime) {
|
||||||
// Rollback in HbaseIndex is managed via method {@link #checkIfValidCommit()}
|
// Rollback in HbaseIndex is managed via method {@link #checkIfValidCommit()}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -100,17 +100,17 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
|
|||||||
// Total number of new records inserted into the delta file
|
// Total number of new records inserted into the delta file
|
||||||
private long insertRecordsWritten = 0;
|
private long insertRecordsWritten = 0;
|
||||||
|
|
||||||
public HoodieAppendHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
public HoodieAppendHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T> hoodieTable,
|
||||||
String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordItr) {
|
String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordItr) {
|
||||||
super(config, commitTime, partitionPath, fileId, hoodieTable);
|
super(config, instantTime, partitionPath, fileId, hoodieTable);
|
||||||
writeStatus.setStat(new HoodieDeltaWriteStat());
|
writeStatus.setStat(new HoodieDeltaWriteStat());
|
||||||
this.fileId = fileId;
|
this.fileId = fileId;
|
||||||
this.recordItr = recordItr;
|
this.recordItr = recordItr;
|
||||||
}
|
}
|
||||||
|
|
||||||
public HoodieAppendHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
public HoodieAppendHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T> hoodieTable,
|
||||||
String partitionPath, String fileId) {
|
String partitionPath, String fileId) {
|
||||||
this(config, commitTime, hoodieTable, partitionPath, fileId, null);
|
this(config, instantTime, hoodieTable, partitionPath, fileId, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void init(HoodieRecord record) {
|
private void init(HoodieRecord record) {
|
||||||
@@ -118,7 +118,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
|
|||||||
// extract some information from the first record
|
// extract some information from the first record
|
||||||
SliceView rtView = hoodieTable.getSliceView();
|
SliceView rtView = hoodieTable.getSliceView();
|
||||||
Option<FileSlice> fileSlice = rtView.getLatestFileSlice(partitionPath, fileId);
|
Option<FileSlice> fileSlice = rtView.getLatestFileSlice(partitionPath, fileId);
|
||||||
// Set the base commit time as the current commitTime for new inserts into log files
|
// Set the base commit time as the current instantTime for new inserts into log files
|
||||||
String baseInstantTime = instantTime;
|
String baseInstantTime = instantTime;
|
||||||
if (fileSlice.isPresent()) {
|
if (fileSlice.isPresent()) {
|
||||||
baseInstantTime = fileSlice.get().getBaseInstantTime();
|
baseInstantTime = fileSlice.get().getBaseInstantTime();
|
||||||
|
|||||||
@@ -55,21 +55,21 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
|
|||||||
private Iterator<HoodieRecord<T>> recordIterator;
|
private Iterator<HoodieRecord<T>> recordIterator;
|
||||||
private boolean useWriterSchema = false;
|
private boolean useWriterSchema = false;
|
||||||
|
|
||||||
public HoodieCreateHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T> hoodieTable,
|
||||||
String partitionPath, String fileId) {
|
String partitionPath, String fileId) {
|
||||||
super(config, commitTime, partitionPath, fileId, hoodieTable);
|
super(config, instantTime, partitionPath, fileId, hoodieTable);
|
||||||
writeStatus.setFileId(fileId);
|
writeStatus.setFileId(fileId);
|
||||||
writeStatus.setPartitionPath(partitionPath);
|
writeStatus.setPartitionPath(partitionPath);
|
||||||
|
|
||||||
this.path = makeNewPath(partitionPath);
|
this.path = makeNewPath(partitionPath);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, commitTime,
|
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, instantTime,
|
||||||
new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
|
new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
|
||||||
partitionMetadata.trySave(TaskContext.getPartitionId());
|
partitionMetadata.trySave(TaskContext.getPartitionId());
|
||||||
createMarkerFile(partitionPath);
|
createMarkerFile(partitionPath);
|
||||||
this.storageWriter =
|
this.storageWriter =
|
||||||
HoodieStorageWriterFactory.getStorageWriter(commitTime, path, hoodieTable, config, writerSchema);
|
HoodieStorageWriterFactory.getStorageWriter(instantTime, path, hoodieTable, config, writerSchema);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new HoodieInsertException("Failed to initialize HoodieStorageWriter for path " + path, e);
|
throw new HoodieInsertException("Failed to initialize HoodieStorageWriter for path " + path, e);
|
||||||
}
|
}
|
||||||
@@ -79,9 +79,9 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
|
|||||||
/**
|
/**
|
||||||
* Called by the compactor code path.
|
* Called by the compactor code path.
|
||||||
*/
|
*/
|
||||||
public HoodieCreateHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T> hoodieTable,
|
||||||
String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordIterator) {
|
String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordIterator) {
|
||||||
this(config, commitTime, hoodieTable, partitionPath, fileId);
|
this(config, instantTime, hoodieTable, partitionPath, fileId);
|
||||||
this.recordIterator = recordIterator;
|
this.recordIterator = recordIterator;
|
||||||
this.useWriterSchema = true;
|
this.useWriterSchema = true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -69,9 +69,9 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
|
|||||||
private long insertRecordsWritten = 0;
|
private long insertRecordsWritten = 0;
|
||||||
private boolean useWriterSchema;
|
private boolean useWriterSchema;
|
||||||
|
|
||||||
public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
public HoodieMergeHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T> hoodieTable,
|
||||||
Iterator<HoodieRecord<T>> recordItr, String partitionPath, String fileId) {
|
Iterator<HoodieRecord<T>> recordItr, String partitionPath, String fileId) {
|
||||||
super(config, commitTime, partitionPath, fileId, hoodieTable);
|
super(config, instantTime, partitionPath, fileId, hoodieTable);
|
||||||
init(fileId, recordItr);
|
init(fileId, recordItr);
|
||||||
init(fileId, partitionPath, hoodieTable.getBaseFileOnlyView().getLatestBaseFile(partitionPath, fileId).get());
|
init(fileId, partitionPath, hoodieTable.getBaseFileOnlyView().getLatestBaseFile(partitionPath, fileId).get());
|
||||||
}
|
}
|
||||||
@@ -79,10 +79,10 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
|
|||||||
/**
|
/**
|
||||||
* Called by compactor code path.
|
* Called by compactor code path.
|
||||||
*/
|
*/
|
||||||
public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
|
public HoodieMergeHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T> hoodieTable,
|
||||||
Map<String, HoodieRecord<T>> keyToNewRecords, String partitionPath, String fileId,
|
Map<String, HoodieRecord<T>> keyToNewRecords, String partitionPath, String fileId,
|
||||||
HoodieBaseFile dataFileToBeMerged) {
|
HoodieBaseFile dataFileToBeMerged) {
|
||||||
super(config, commitTime, partitionPath, fileId, hoodieTable);
|
super(config, instantTime, partitionPath, fileId, hoodieTable);
|
||||||
this.keyToNewRecords = keyToNewRecords;
|
this.keyToNewRecords = keyToNewRecords;
|
||||||
this.useWriterSchema = true;
|
this.useWriterSchema = true;
|
||||||
init(fileId, this.partitionPath, dataFileToBeMerged);
|
init(fileId, this.partitionPath, dataFileToBeMerged);
|
||||||
|
|||||||
@@ -50,10 +50,10 @@ public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends Indexe
|
|||||||
private final HoodieWrapperFileSystem fs;
|
private final HoodieWrapperFileSystem fs;
|
||||||
private final long maxFileSize;
|
private final long maxFileSize;
|
||||||
private final HoodieAvroWriteSupport writeSupport;
|
private final HoodieAvroWriteSupport writeSupport;
|
||||||
private final String commitTime;
|
private final String instantTime;
|
||||||
private final Schema schema;
|
private final Schema schema;
|
||||||
|
|
||||||
public HoodieParquetWriter(String commitTime, Path file, HoodieParquetConfig parquetConfig, Schema schema)
|
public HoodieParquetWriter(String instantTime, Path file, HoodieParquetConfig parquetConfig, Schema schema)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()),
|
super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()),
|
||||||
ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(), parquetConfig.getCompressionCodecName(),
|
ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(), parquetConfig.getCompressionCodecName(),
|
||||||
@@ -70,7 +70,7 @@ public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends Indexe
|
|||||||
this.maxFileSize = parquetConfig.getMaxFileSize()
|
this.maxFileSize = parquetConfig.getMaxFileSize()
|
||||||
+ Math.round(parquetConfig.getMaxFileSize() * parquetConfig.getCompressionRatio());
|
+ Math.round(parquetConfig.getMaxFileSize() * parquetConfig.getCompressionRatio());
|
||||||
this.writeSupport = parquetConfig.getWriteSupport();
|
this.writeSupport = parquetConfig.getWriteSupport();
|
||||||
this.commitTime = commitTime;
|
this.instantTime = instantTime;
|
||||||
this.schema = schema;
|
this.schema = schema;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -85,10 +85,10 @@ public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends Indexe
|
|||||||
@Override
|
@Override
|
||||||
public void writeAvroWithMetadata(R avroRecord, HoodieRecord record) throws IOException {
|
public void writeAvroWithMetadata(R avroRecord, HoodieRecord record) throws IOException {
|
||||||
String seqId =
|
String seqId =
|
||||||
HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(), recordIndex.getAndIncrement());
|
HoodieRecord.generateSequenceId(instantTime, TaskContext.getPartitionId(), recordIndex.getAndIncrement());
|
||||||
HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord, record.getRecordKey(), record.getPartitionPath(),
|
HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord, record.getRecordKey(), record.getPartitionPath(),
|
||||||
file.getName());
|
file.getName());
|
||||||
HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord, commitTime, seqId);
|
HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord, instantTime, seqId);
|
||||||
super.write(avroRecord);
|
super.write(avroRecord);
|
||||||
writeSupport.add(record.getRecordKey());
|
writeSupport.add(record.getRecordKey());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,18 +39,18 @@ import static org.apache.hudi.common.model.HoodieFileFormat.PARQUET;
|
|||||||
public class HoodieStorageWriterFactory {
|
public class HoodieStorageWriterFactory {
|
||||||
|
|
||||||
public static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> getStorageWriter(
|
public static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> getStorageWriter(
|
||||||
String commitTime, Path path, HoodieTable<T> hoodieTable, HoodieWriteConfig config, Schema schema)
|
String instantTime, Path path, HoodieTable<T> hoodieTable, HoodieWriteConfig config, Schema schema)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final String name = path.getName();
|
final String name = path.getName();
|
||||||
final String extension = FSUtils.isLogFile(path) ? HOODIE_LOG.getFileExtension() : FSUtils.getFileExtension(name);
|
final String extension = FSUtils.isLogFile(path) ? HOODIE_LOG.getFileExtension() : FSUtils.getFileExtension(name);
|
||||||
if (PARQUET.getFileExtension().equals(extension)) {
|
if (PARQUET.getFileExtension().equals(extension)) {
|
||||||
return newParquetStorageWriter(commitTime, path, config, schema, hoodieTable);
|
return newParquetStorageWriter(instantTime, path, config, schema, hoodieTable);
|
||||||
}
|
}
|
||||||
throw new UnsupportedOperationException(extension + " format not supported yet.");
|
throw new UnsupportedOperationException(extension + " format not supported yet.");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> newParquetStorageWriter(
|
private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> newParquetStorageWriter(
|
||||||
String commitTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable)
|
String instantTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
BloomFilter filter = BloomFilterFactory
|
BloomFilter filter = BloomFilterFactory
|
||||||
.createBloomFilter(config.getBloomFilterNumEntries(), config.getBloomFilterFPP(),
|
.createBloomFilter(config.getBloomFilterNumEntries(), config.getBloomFilterFPP(),
|
||||||
@@ -63,6 +63,6 @@ public class HoodieStorageWriterFactory {
|
|||||||
config.getParquetBlockSize(), config.getParquetPageSize(), config.getParquetMaxFileSize(),
|
config.getParquetBlockSize(), config.getParquetPageSize(), config.getParquetMaxFileSize(),
|
||||||
hoodieTable.getHadoopConf(), config.getParquetCompressionRatio());
|
hoodieTable.getHadoopConf(), config.getParquetCompressionRatio());
|
||||||
|
|
||||||
return new HoodieParquetWriter<>(commitTime, path, parquetConfig, schema);
|
return new HoodieParquetWriter<>(instantTime, path, parquetConfig, schema);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -245,18 +245,18 @@ public class CleanHelper<T extends HoodieRecordPayload<T>> implements Serializab
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the latest version < commitTime. This version file could still be used by queries.
|
* Gets the latest version < instantTime. This version file could still be used by queries.
|
||||||
*/
|
*/
|
||||||
private String getLatestVersionBeforeCommit(List<FileSlice> fileSliceList, HoodieInstant commitTime) {
|
private String getLatestVersionBeforeCommit(List<FileSlice> fileSliceList, HoodieInstant instantTime) {
|
||||||
for (FileSlice file : fileSliceList) {
|
for (FileSlice file : fileSliceList) {
|
||||||
String fileCommitTime = file.getBaseInstantTime();
|
String fileCommitTime = file.getBaseInstantTime();
|
||||||
if (HoodieTimeline.compareTimestamps(commitTime.getTimestamp(), fileCommitTime, HoodieTimeline.GREATER)) {
|
if (HoodieTimeline.compareTimestamps(instantTime.getTimestamp(), fileCommitTime, HoodieTimeline.GREATER)) {
|
||||||
// fileList is sorted on the reverse, so the first commit we find <= commitTime is the
|
// fileList is sorted on the reverse, so the first commit we find <= instantTime is the
|
||||||
// one we want
|
// one we want
|
||||||
return fileCommitTime;
|
return fileCommitTime;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// There is no version of this file which is <= commitTime
|
// There is no version of this file which is <= instantTime
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -160,7 +160,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public HoodieCompactionPlan scheduleCompaction(JavaSparkContext jsc, String commitTime) {
|
public HoodieCompactionPlan scheduleCompaction(JavaSparkContext jsc, String instantTime) {
|
||||||
throw new HoodieNotSupportedException("Compaction is not supported from a CopyOnWrite table");
|
throw new HoodieNotSupportedException("Compaction is not supported from a CopyOnWrite table");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -170,7 +170,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
throw new HoodieNotSupportedException("Compaction is not supported from a CopyOnWrite table");
|
throw new HoodieNotSupportedException("Compaction is not supported from a CopyOnWrite table");
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String partitionPath, String fileId,
|
public Iterator<List<WriteStatus>> handleUpdate(String instantTime, String partitionPath, String fileId,
|
||||||
Iterator<HoodieRecord<T>> recordItr)
|
Iterator<HoodieRecord<T>> recordItr)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records
|
// This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records
|
||||||
@@ -179,22 +179,22 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
return Collections.singletonList((List<WriteStatus>) Collections.EMPTY_LIST).iterator();
|
return Collections.singletonList((List<WriteStatus>) Collections.EMPTY_LIST).iterator();
|
||||||
}
|
}
|
||||||
// these are updates
|
// these are updates
|
||||||
HoodieMergeHandle upsertHandle = getUpdateHandle(commitTime, partitionPath, fileId, recordItr);
|
HoodieMergeHandle upsertHandle = getUpdateHandle(instantTime, partitionPath, fileId, recordItr);
|
||||||
return handleUpdateInternal(upsertHandle, commitTime, fileId);
|
return handleUpdateInternal(upsertHandle, instantTime, fileId);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String partitionPath, String fileId,
|
public Iterator<List<WriteStatus>> handleUpdate(String instantTime, String partitionPath, String fileId,
|
||||||
Map<String, HoodieRecord<T>> keyToNewRecords, HoodieBaseFile oldDataFile) throws IOException {
|
Map<String, HoodieRecord<T>> keyToNewRecords, HoodieBaseFile oldDataFile) throws IOException {
|
||||||
// these are updates
|
// these are updates
|
||||||
HoodieMergeHandle upsertHandle = getUpdateHandle(commitTime, partitionPath, fileId, keyToNewRecords, oldDataFile);
|
HoodieMergeHandle upsertHandle = getUpdateHandle(instantTime, partitionPath, fileId, keyToNewRecords, oldDataFile);
|
||||||
return handleUpdateInternal(upsertHandle, commitTime, fileId);
|
return handleUpdateInternal(upsertHandle, instantTime, fileId);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle upsertHandle, String commitTime,
|
protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle upsertHandle, String instantTime,
|
||||||
String fileId) throws IOException {
|
String fileId) throws IOException {
|
||||||
if (upsertHandle.getOldFilePath() == null) {
|
if (upsertHandle.getOldFilePath() == null) {
|
||||||
throw new HoodieUpsertException(
|
throw new HoodieUpsertException(
|
||||||
"Error in finding the old file path at commit " + commitTime + " for fileId: " + fileId);
|
"Error in finding the old file path at commit " + instantTime + " for fileId: " + fileId);
|
||||||
} else {
|
} else {
|
||||||
AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getWriterSchema());
|
AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getWriterSchema());
|
||||||
BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
|
BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
|
||||||
@@ -221,46 +221,46 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus())).iterator();
|
return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus())).iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected HoodieMergeHandle getUpdateHandle(String commitTime, String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordItr) {
|
protected HoodieMergeHandle getUpdateHandle(String instantTime, String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordItr) {
|
||||||
return new HoodieMergeHandle<>(config, commitTime, this, recordItr, partitionPath, fileId);
|
return new HoodieMergeHandle<>(config, instantTime, this, recordItr, partitionPath, fileId);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected HoodieMergeHandle getUpdateHandle(String commitTime, String partitionPath, String fileId,
|
protected HoodieMergeHandle getUpdateHandle(String instantTime, String partitionPath, String fileId,
|
||||||
Map<String, HoodieRecord<T>> keyToNewRecords, HoodieBaseFile dataFileToBeMerged) {
|
Map<String, HoodieRecord<T>> keyToNewRecords, HoodieBaseFile dataFileToBeMerged) {
|
||||||
return new HoodieMergeHandle<>(config, commitTime, this, keyToNewRecords,
|
return new HoodieMergeHandle<>(config, instantTime, this, keyToNewRecords,
|
||||||
partitionPath, fileId, dataFileToBeMerged);
|
partitionPath, fileId, dataFileToBeMerged);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterator<List<WriteStatus>> handleInsert(String commitTime, String idPfx, Iterator<HoodieRecord<T>> recordItr)
|
public Iterator<List<WriteStatus>> handleInsert(String instantTime, String idPfx, Iterator<HoodieRecord<T>> recordItr)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records
|
// This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records
|
||||||
if (!recordItr.hasNext()) {
|
if (!recordItr.hasNext()) {
|
||||||
LOG.info("Empty partition");
|
LOG.info("Empty partition");
|
||||||
return Collections.singletonList((List<WriteStatus>) Collections.EMPTY_LIST).iterator();
|
return Collections.singletonList((List<WriteStatus>) Collections.EMPTY_LIST).iterator();
|
||||||
}
|
}
|
||||||
return new CopyOnWriteLazyInsertIterable<>(recordItr, config, commitTime, this, idPfx);
|
return new CopyOnWriteLazyInsertIterable<>(recordItr, config, instantTime, this, idPfx);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterator<List<WriteStatus>> handleInsert(String commitTime, String partitionPath, String fileId,
|
public Iterator<List<WriteStatus>> handleInsert(String instantTime, String partitionPath, String fileId,
|
||||||
Iterator<HoodieRecord<T>> recordItr) {
|
Iterator<HoodieRecord<T>> recordItr) {
|
||||||
HoodieCreateHandle createHandle =
|
HoodieCreateHandle createHandle =
|
||||||
new HoodieCreateHandle(config, commitTime, this, partitionPath, fileId, recordItr);
|
new HoodieCreateHandle(config, instantTime, this, partitionPath, fileId, recordItr);
|
||||||
createHandle.write();
|
createHandle.write();
|
||||||
return Collections.singletonList(Collections.singletonList(createHandle.close())).iterator();
|
return Collections.singletonList(Collections.singletonList(createHandle.close())).iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
@Override
|
@Override
|
||||||
public Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime, Integer partition, Iterator recordItr,
|
public Iterator<List<WriteStatus>> handleUpsertPartition(String instantTime, Integer partition, Iterator recordItr,
|
||||||
Partitioner partitioner) {
|
Partitioner partitioner) {
|
||||||
UpsertPartitioner upsertPartitioner = (UpsertPartitioner) partitioner;
|
UpsertPartitioner upsertPartitioner = (UpsertPartitioner) partitioner;
|
||||||
BucketInfo binfo = upsertPartitioner.getBucketInfo(partition);
|
BucketInfo binfo = upsertPartitioner.getBucketInfo(partition);
|
||||||
BucketType btype = binfo.bucketType;
|
BucketType btype = binfo.bucketType;
|
||||||
try {
|
try {
|
||||||
if (btype.equals(BucketType.INSERT)) {
|
if (btype.equals(BucketType.INSERT)) {
|
||||||
return handleInsert(commitTime, binfo.fileIdPrefix, recordItr);
|
return handleInsert(instantTime, binfo.fileIdPrefix, recordItr);
|
||||||
} else if (btype.equals(BucketType.UPDATE)) {
|
} else if (btype.equals(BucketType.UPDATE)) {
|
||||||
return handleUpdate(commitTime, binfo.partitionPath, binfo.fileIdPrefix, recordItr);
|
return handleUpdate(instantTime, binfo.partitionPath, binfo.fileIdPrefix, recordItr);
|
||||||
} else {
|
} else {
|
||||||
throw new HoodieUpsertException("Unknown bucketType " + btype + " for partition :" + partition);
|
throw new HoodieUpsertException("Unknown bucketType " + btype + " for partition :" + partition);
|
||||||
}
|
}
|
||||||
@@ -272,9 +272,9 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<List<WriteStatus>> handleInsertPartition(String commitTime, Integer partition, Iterator recordItr,
|
public Iterator<List<WriteStatus>> handleInsertPartition(String instantTime, Integer partition, Iterator recordItr,
|
||||||
Partitioner partitioner) {
|
Partitioner partitioner) {
|
||||||
return handleUpsertPartition(commitTime, partition, recordItr, partitioner);
|
return handleUpsertPartition(instantTime, partition, recordItr, partitioner);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -98,16 +98,16 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String partitionPath,
|
public Iterator<List<WriteStatus>> handleUpdate(String instantTime, String partitionPath,
|
||||||
String fileId, Iterator<HoodieRecord<T>> recordItr)
|
String fileId, Iterator<HoodieRecord<T>> recordItr)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
LOG.info("Merging updates for commit " + commitTime + " for file " + fileId);
|
LOG.info("Merging updates for commit " + instantTime + " for file " + fileId);
|
||||||
|
|
||||||
if (!index.canIndexLogFiles() && mergeOnReadUpsertPartitioner.getSmallFileIds().contains(fileId)) {
|
if (!index.canIndexLogFiles() && mergeOnReadUpsertPartitioner.getSmallFileIds().contains(fileId)) {
|
||||||
LOG.info("Small file corrections for updates for commit " + commitTime + " for file " + fileId);
|
LOG.info("Small file corrections for updates for commit " + instantTime + " for file " + fileId);
|
||||||
return super.handleUpdate(commitTime, partitionPath, fileId, recordItr);
|
return super.handleUpdate(instantTime, partitionPath, fileId, recordItr);
|
||||||
} else {
|
} else {
|
||||||
HoodieAppendHandle<T> appendHandle = new HoodieAppendHandle<>(config, commitTime, this,
|
HoodieAppendHandle<T> appendHandle = new HoodieAppendHandle<>(config, instantTime, this,
|
||||||
partitionPath, fileId, recordItr);
|
partitionPath, fileId, recordItr);
|
||||||
appendHandle.doAppend();
|
appendHandle.doAppend();
|
||||||
appendHandle.close();
|
appendHandle.close();
|
||||||
@@ -116,13 +116,13 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<List<WriteStatus>> handleInsert(String commitTime, String idPfx, Iterator<HoodieRecord<T>> recordItr)
|
public Iterator<List<WriteStatus>> handleInsert(String instantTime, String idPfx, Iterator<HoodieRecord<T>> recordItr)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// If canIndexLogFiles, write inserts to log files else write inserts to parquet files
|
// If canIndexLogFiles, write inserts to log files else write inserts to parquet files
|
||||||
if (index.canIndexLogFiles()) {
|
if (index.canIndexLogFiles()) {
|
||||||
return new MergeOnReadLazyInsertIterable<>(recordItr, config, commitTime, this, idPfx);
|
return new MergeOnReadLazyInsertIterable<>(recordItr, config, instantTime, this, idPfx);
|
||||||
} else {
|
} else {
|
||||||
return super.handleInsert(commitTime, idPfx, recordItr);
|
return super.handleInsert(instantTime, idPfx, recordItr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -248,13 +248,13 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
|||||||
/**
|
/**
|
||||||
* Perform the ultimate IO for a given upserted (RDD) partition.
|
* Perform the ultimate IO for a given upserted (RDD) partition.
|
||||||
*/
|
*/
|
||||||
public abstract Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime, Integer partition,
|
public abstract Iterator<List<WriteStatus>> handleUpsertPartition(String instantTime, Integer partition,
|
||||||
Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
|
Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform the ultimate IO for a given inserted (RDD) partition.
|
* Perform the ultimate IO for a given inserted (RDD) partition.
|
||||||
*/
|
*/
|
||||||
public abstract Iterator<List<WriteStatus>> handleInsertPartition(String commitTime, Integer partition,
|
public abstract Iterator<List<WriteStatus>> handleInsertPartition(String instantTime, Integer partition,
|
||||||
Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
|
Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -99,12 +99,12 @@ public class HoodieMergeOnReadTableCompactor implements HoodieCompactor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private List<WriteStatus> compact(HoodieCopyOnWriteTable hoodieCopyOnWriteTable, HoodieTableMetaClient metaClient,
|
private List<WriteStatus> compact(HoodieCopyOnWriteTable hoodieCopyOnWriteTable, HoodieTableMetaClient metaClient,
|
||||||
HoodieWriteConfig config, CompactionOperation operation, String commitTime) throws IOException {
|
HoodieWriteConfig config, CompactionOperation operation, String instantTime) throws IOException {
|
||||||
FileSystem fs = metaClient.getFs();
|
FileSystem fs = metaClient.getFs();
|
||||||
|
|
||||||
Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
|
Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
|
||||||
LOG.info("Compacting base " + operation.getDataFileName() + " with delta files " + operation.getDeltaFileNames()
|
LOG.info("Compacting base " + operation.getDataFileName() + " with delta files " + operation.getDeltaFileNames()
|
||||||
+ " for commit " + commitTime);
|
+ " for commit " + instantTime);
|
||||||
// TODO - FIX THIS
|
// TODO - FIX THIS
|
||||||
// Reads the entire avro file. Always only specific blocks should be read from the avro file
|
// Reads the entire avro file. Always only specific blocks should be read from the avro file
|
||||||
// (failure recover).
|
// (failure recover).
|
||||||
@@ -136,11 +136,11 @@ public class HoodieMergeOnReadTableCompactor implements HoodieCompactor {
|
|||||||
// If the dataFile is present, there is a base parquet file present, perform updates else perform inserts into a
|
// If the dataFile is present, there is a base parquet file present, perform updates else perform inserts into a
|
||||||
// new base parquet file.
|
// new base parquet file.
|
||||||
if (oldDataFileOpt.isPresent()) {
|
if (oldDataFileOpt.isPresent()) {
|
||||||
result = hoodieCopyOnWriteTable.handleUpdate(commitTime, operation.getPartitionPath(),
|
result = hoodieCopyOnWriteTable.handleUpdate(instantTime, operation.getPartitionPath(),
|
||||||
operation.getFileId(), scanner.getRecords(),
|
operation.getFileId(), scanner.getRecords(),
|
||||||
oldDataFileOpt.get());
|
oldDataFileOpt.get());
|
||||||
} else {
|
} else {
|
||||||
result = hoodieCopyOnWriteTable.handleInsert(commitTime, operation.getPartitionPath(), operation.getFileId(),
|
result = hoodieCopyOnWriteTable.handleInsert(instantTime, operation.getPartitionPath(), operation.getFileId(),
|
||||||
scanner.iterator());
|
scanner.iterator());
|
||||||
}
|
}
|
||||||
Iterable<List<WriteStatus>> resultIterable = () -> result;
|
Iterable<List<WriteStatus>> resultIterable = () -> result;
|
||||||
|
|||||||
@@ -212,13 +212,13 @@ public class TestHoodieClientBase extends HoodieClientTestHarness {
|
|||||||
* Ensure records have location field set.
|
* Ensure records have location field set.
|
||||||
*
|
*
|
||||||
* @param taggedRecords Tagged Records
|
* @param taggedRecords Tagged Records
|
||||||
* @param commitTime Commit Timestamp
|
* @param instantTime Commit Timestamp
|
||||||
*/
|
*/
|
||||||
protected void checkTaggedRecords(List<HoodieRecord> taggedRecords, String commitTime) {
|
protected void checkTaggedRecords(List<HoodieRecord> taggedRecords, String instantTime) {
|
||||||
for (HoodieRecord rec : taggedRecords) {
|
for (HoodieRecord rec : taggedRecords) {
|
||||||
assertTrue("Record " + rec + " found with no location.", rec.isCurrentLocationKnown());
|
assertTrue("Record " + rec + " found with no location.", rec.isCurrentLocationKnown());
|
||||||
assertEquals("All records should have commit time " + commitTime + ", since updates were made",
|
assertEquals("All records should have commit time " + instantTime + ", since updates were made",
|
||||||
rec.getCurrentLocation().getInstantTime(), commitTime);
|
rec.getCurrentLocation().getInstantTime(), instantTime);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -129,7 +129,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testAutoCommitOnBulkInsertPrepped() throws Exception {
|
public void testAutoCommitOnBulkInsertPrepped() throws Exception {
|
||||||
testAutoCommit((writeClient, recordRDD, commitTime) -> writeClient.bulkInsertPreppedRecords(recordRDD, commitTime,
|
testAutoCommit((writeClient, recordRDD, instantTime) -> writeClient.bulkInsertPreppedRecords(recordRDD, instantTime,
|
||||||
Option.empty()), true);
|
Option.empty()), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -357,9 +357,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
|
|
||||||
final List<HoodieRecord> recordsInFirstBatch = new ArrayList<>();
|
final List<HoodieRecord> recordsInFirstBatch = new ArrayList<>();
|
||||||
Function2<List<HoodieRecord>, String, Integer> recordGenFunction =
|
Function2<List<HoodieRecord>, String, Integer> recordGenFunction =
|
||||||
(String commitTime, Integer numRecordsInThisCommit) -> {
|
(String instantTime, Integer numRecordsInThisCommit) -> {
|
||||||
List<HoodieRecord> fewRecordsForInsert = dataGen.generateInserts(commitTime, 200);
|
List<HoodieRecord> fewRecordsForInsert = dataGen.generateInserts(instantTime, 200);
|
||||||
List<HoodieRecord> fewRecordsForDelete = dataGen.generateDeletes(commitTime, 100);
|
List<HoodieRecord> fewRecordsForDelete = dataGen.generateDeletes(instantTime, 100);
|
||||||
|
|
||||||
recordsInFirstBatch.addAll(fewRecordsForInsert);
|
recordsInFirstBatch.addAll(fewRecordsForInsert);
|
||||||
recordsInFirstBatch.addAll(fewRecordsForDelete);
|
recordsInFirstBatch.addAll(fewRecordsForDelete);
|
||||||
@@ -376,7 +376,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
newCommitTime = "004";
|
newCommitTime = "004";
|
||||||
final List<HoodieRecord> recordsInSecondBatch = new ArrayList<>();
|
final List<HoodieRecord> recordsInSecondBatch = new ArrayList<>();
|
||||||
|
|
||||||
recordGenFunction = (String commitTime, Integer numRecordsInThisCommit) -> {
|
recordGenFunction = (String instantTime, Integer numRecordsInThisCommit) -> {
|
||||||
List<HoodieRecord> fewRecordsForDelete = recordsInFirstBatch.subList(0, 50);
|
List<HoodieRecord> fewRecordsForDelete = recordsInFirstBatch.subList(0, 50);
|
||||||
List<HoodieRecord> fewRecordsForUpdate = recordsInFirstBatch.subList(50, 100);
|
List<HoodieRecord> fewRecordsForUpdate = recordsInFirstBatch.subList(50, 100);
|
||||||
recordsInSecondBatch.addAll(dataGen.generateDeletesFromExistingRecords(fewRecordsForDelete));
|
recordsInSecondBatch.addAll(dataGen.generateDeletesFromExistingRecords(fewRecordsForDelete));
|
||||||
@@ -704,18 +704,18 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
testDeletes(client, updateBatch3.getRight(), 10, file1, "007", 140, keysSoFar);
|
testDeletes(client, updateBatch3.getRight(), 10, file1, "007", 140, keysSoFar);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Pair<Set<String>, List<HoodieRecord>> testUpdates(String commitTime, HoodieWriteClient client,
|
private Pair<Set<String>, List<HoodieRecord>> testUpdates(String instantTime, HoodieWriteClient client,
|
||||||
int sizeToInsertAndUpdate, int expectedTotalRecords)
|
int sizeToInsertAndUpdate, int expectedTotalRecords)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
client.startCommitWithTime(commitTime);
|
client.startCommitWithTime(instantTime);
|
||||||
List<HoodieRecord> inserts = dataGen.generateInserts(commitTime, sizeToInsertAndUpdate);
|
List<HoodieRecord> inserts = dataGen.generateInserts(instantTime, sizeToInsertAndUpdate);
|
||||||
Set<String> keys = HoodieClientTestUtils.getRecordKeys(inserts);
|
Set<String> keys = HoodieClientTestUtils.getRecordKeys(inserts);
|
||||||
List<HoodieRecord> insertsAndUpdates = new ArrayList<>();
|
List<HoodieRecord> insertsAndUpdates = new ArrayList<>();
|
||||||
insertsAndUpdates.addAll(inserts);
|
insertsAndUpdates.addAll(inserts);
|
||||||
insertsAndUpdates.addAll(dataGen.generateUpdates(commitTime, inserts));
|
insertsAndUpdates.addAll(dataGen.generateUpdates(instantTime, inserts));
|
||||||
|
|
||||||
JavaRDD<HoodieRecord> insertAndUpdatesRDD = jsc.parallelize(insertsAndUpdates, 1);
|
JavaRDD<HoodieRecord> insertAndUpdatesRDD = jsc.parallelize(insertsAndUpdates, 1);
|
||||||
List<WriteStatus> statuses = client.upsert(insertAndUpdatesRDD, commitTime).collect();
|
List<WriteStatus> statuses = client.upsert(insertAndUpdatesRDD, instantTime).collect();
|
||||||
assertNoWriteErrors(statuses);
|
assertNoWriteErrors(statuses);
|
||||||
|
|
||||||
// Check the entire dataset has all records still
|
// Check the entire dataset has all records still
|
||||||
@@ -729,13 +729,13 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void testDeletes(HoodieWriteClient client, List<HoodieRecord> previousRecords, int sizeToDelete,
|
private void testDeletes(HoodieWriteClient client, List<HoodieRecord> previousRecords, int sizeToDelete,
|
||||||
String existingFile, String commitTime, int exepctedRecords, List<String> keys) {
|
String existingFile, String instantTime, int exepctedRecords, List<String> keys) {
|
||||||
client.startCommitWithTime(commitTime);
|
client.startCommitWithTime(instantTime);
|
||||||
|
|
||||||
List<HoodieKey> hoodieKeysToDelete = HoodieClientTestUtils
|
List<HoodieKey> hoodieKeysToDelete = HoodieClientTestUtils
|
||||||
.getKeysToDelete(HoodieClientTestUtils.getHoodieKeys(previousRecords), sizeToDelete);
|
.getKeysToDelete(HoodieClientTestUtils.getHoodieKeys(previousRecords), sizeToDelete);
|
||||||
JavaRDD<HoodieKey> deleteKeys = jsc.parallelize(hoodieKeysToDelete, 1);
|
JavaRDD<HoodieKey> deleteKeys = jsc.parallelize(hoodieKeysToDelete, 1);
|
||||||
List<WriteStatus> statuses = client.delete(deleteKeys, commitTime).collect();
|
List<WriteStatus> statuses = client.delete(deleteKeys, instantTime).collect();
|
||||||
|
|
||||||
assertNoWriteErrors(statuses);
|
assertNoWriteErrors(statuses);
|
||||||
|
|
||||||
@@ -757,7 +757,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
|
List<GenericRecord> records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile);
|
||||||
for (GenericRecord record : records) {
|
for (GenericRecord record : records) {
|
||||||
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||||
assertTrue("key expected to be part of " + commitTime, keys.contains(recordKey));
|
assertTrue("key expected to be part of " + instantTime, keys.contains(recordKey));
|
||||||
assertFalse("Key deleted", hoodieKeysToDelete.contains(recordKey));
|
assertFalse("Key deleted", hoodieKeysToDelete.contains(recordKey));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -802,21 +802,21 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||||
HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
||||||
|
|
||||||
String commitTime = "000";
|
String instantTime = "000";
|
||||||
client.startCommitWithTime(commitTime);
|
client.startCommitWithTime(instantTime);
|
||||||
|
|
||||||
List<HoodieRecord> records = dataGen.generateInserts(commitTime, 200);
|
List<HoodieRecord> records = dataGen.generateInserts(instantTime, 200);
|
||||||
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
|
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
|
||||||
|
|
||||||
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, commitTime);
|
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, instantTime);
|
||||||
|
|
||||||
assertTrue("Commit should succeed", client.commit(commitTime, result));
|
assertTrue("Commit should succeed", client.commit(instantTime, result));
|
||||||
assertTrue("After explicit commit, commit file should be created",
|
assertTrue("After explicit commit, commit file should be created",
|
||||||
HoodieTestUtils.doesCommitExist(basePath, commitTime));
|
HoodieTestUtils.doesCommitExist(basePath, instantTime));
|
||||||
|
|
||||||
// Get parquet file paths from commit metadata
|
// Get parquet file paths from commit metadata
|
||||||
String actionType = metaClient.getCommitActionType();
|
String actionType = metaClient.getCommitActionType();
|
||||||
HoodieInstant commitInstant = new HoodieInstant(false, actionType, commitTime);
|
HoodieInstant commitInstant = new HoodieInstant(false, actionType, instantTime);
|
||||||
HoodieTimeline commitTimeline = metaClient.getCommitTimeline().filterCompletedInstants();
|
HoodieTimeline commitTimeline = metaClient.getCommitTimeline().filterCompletedInstants();
|
||||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
|
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
|
||||||
.fromBytes(commitTimeline.getInstantDetails(commitInstant).get(), HoodieCommitMetadata.class);
|
.fromBytes(commitTimeline.getInstantDetails(commitInstant).get(), HoodieCommitMetadata.class);
|
||||||
@@ -824,7 +824,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
Collection<String> commitPathNames = commitMetadata.getFileIdAndFullPaths(basePath).values();
|
Collection<String> commitPathNames = commitMetadata.getFileIdAndFullPaths(basePath).values();
|
||||||
|
|
||||||
// Read from commit file
|
// Read from commit file
|
||||||
String filename = HoodieTestUtils.getCommitFilePath(basePath, commitTime);
|
String filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime);
|
||||||
FileInputStream inputStream = new FileInputStream(filename);
|
FileInputStream inputStream = new FileInputStream(filename);
|
||||||
String everything = FileIOUtils.readAsUTFString(inputStream);
|
String everything = FileIOUtils.readAsUTFString(inputStream);
|
||||||
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(everything, HoodieCommitMetadata.class);
|
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(everything, HoodieCommitMetadata.class);
|
||||||
@@ -848,20 +848,20 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
HoodieWriteClient client = getHoodieWriteClient(cfg);
|
HoodieWriteClient client = getHoodieWriteClient(cfg);
|
||||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||||
|
|
||||||
String commitTime = "000";
|
String instantTime = "000";
|
||||||
client.startCommitWithTime(commitTime);
|
client.startCommitWithTime(instantTime);
|
||||||
|
|
||||||
List<HoodieRecord> records = dataGen.generateInserts(commitTime, 200);
|
List<HoodieRecord> records = dataGen.generateInserts(instantTime, 200);
|
||||||
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
|
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
|
||||||
|
|
||||||
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, commitTime);
|
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, instantTime);
|
||||||
|
|
||||||
assertTrue("Commit should succeed", client.commit(commitTime, result));
|
assertTrue("Commit should succeed", client.commit(instantTime, result));
|
||||||
assertTrue("After explicit commit, commit file should be created",
|
assertTrue("After explicit commit, commit file should be created",
|
||||||
HoodieTestUtils.doesCommitExist(basePath, commitTime));
|
HoodieTestUtils.doesCommitExist(basePath, instantTime));
|
||||||
|
|
||||||
// Read from commit file
|
// Read from commit file
|
||||||
String filename = HoodieTestUtils.getCommitFilePath(basePath, commitTime);
|
String filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime);
|
||||||
FileInputStream inputStream = new FileInputStream(filename);
|
FileInputStream inputStream = new FileInputStream(filename);
|
||||||
String everything = FileIOUtils.readAsUTFString(inputStream);
|
String everything = FileIOUtils.readAsUTFString(inputStream);
|
||||||
HoodieCommitMetadata metadata =
|
HoodieCommitMetadata metadata =
|
||||||
@@ -879,19 +879,19 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
Assert.assertEquals(inserts, 200);
|
Assert.assertEquals(inserts, 200);
|
||||||
|
|
||||||
// Update + Inserts such that they just expand file1
|
// Update + Inserts such that they just expand file1
|
||||||
commitTime = "001";
|
instantTime = "001";
|
||||||
client.startCommitWithTime(commitTime);
|
client.startCommitWithTime(instantTime);
|
||||||
|
|
||||||
records = dataGen.generateUpdates(commitTime, records);
|
records = dataGen.generateUpdates(instantTime, records);
|
||||||
writeRecords = jsc.parallelize(records, 1);
|
writeRecords = jsc.parallelize(records, 1);
|
||||||
result = client.upsert(writeRecords, commitTime);
|
result = client.upsert(writeRecords, instantTime);
|
||||||
|
|
||||||
assertTrue("Commit should succeed", client.commit(commitTime, result));
|
assertTrue("Commit should succeed", client.commit(instantTime, result));
|
||||||
assertTrue("After explicit commit, commit file should be created",
|
assertTrue("After explicit commit, commit file should be created",
|
||||||
HoodieTestUtils.doesCommitExist(basePath, commitTime));
|
HoodieTestUtils.doesCommitExist(basePath, instantTime));
|
||||||
|
|
||||||
// Read from commit file
|
// Read from commit file
|
||||||
filename = HoodieTestUtils.getCommitFilePath(basePath, commitTime);
|
filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime);
|
||||||
inputStream = new FileInputStream(filename);
|
inputStream = new FileInputStream(filename);
|
||||||
everything = FileIOUtils.readAsUTFString(inputStream);
|
everything = FileIOUtils.readAsUTFString(inputStream);
|
||||||
metadata = HoodieCommitMetadata.fromJsonString(everything.toString(), HoodieCommitMetadata.class);
|
metadata = HoodieCommitMetadata.fromJsonString(everything.toString(), HoodieCommitMetadata.class);
|
||||||
@@ -918,37 +918,37 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
@Test
|
@Test
|
||||||
public void testConsistencyCheckDuringFinalize() throws Exception {
|
public void testConsistencyCheckDuringFinalize() throws Exception {
|
||||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||||
String commitTime = "000";
|
String instantTime = "000";
|
||||||
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
|
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
|
||||||
HoodieWriteClient client = getHoodieWriteClient(cfg);
|
HoodieWriteClient client = getHoodieWriteClient(cfg);
|
||||||
Pair<Path, JavaRDD<WriteStatus>> result = testConsistencyCheck(metaClient, commitTime);
|
Pair<Path, JavaRDD<WriteStatus>> result = testConsistencyCheck(metaClient, instantTime);
|
||||||
|
|
||||||
// Delete orphan marker and commit should succeed
|
// Delete orphan marker and commit should succeed
|
||||||
metaClient.getFs().delete(result.getKey(), false);
|
metaClient.getFs().delete(result.getKey(), false);
|
||||||
assertTrue("Commit should succeed", client.commit(commitTime, result.getRight()));
|
assertTrue("Commit should succeed", client.commit(instantTime, result.getRight()));
|
||||||
assertTrue("After explicit commit, commit file should be created",
|
assertTrue("After explicit commit, commit file should be created",
|
||||||
HoodieTestUtils.doesCommitExist(basePath, commitTime));
|
HoodieTestUtils.doesCommitExist(basePath, instantTime));
|
||||||
// Marker directory must be removed
|
// Marker directory must be removed
|
||||||
assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(commitTime))));
|
assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime))));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRollbackAfterConsistencyCheckFailure() throws Exception {
|
public void testRollbackAfterConsistencyCheckFailure() throws Exception {
|
||||||
String commitTime = "000";
|
String instantTime = "000";
|
||||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||||
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
|
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
|
||||||
HoodieWriteClient client = getHoodieWriteClient(cfg);
|
HoodieWriteClient client = getHoodieWriteClient(cfg);
|
||||||
testConsistencyCheck(metaClient, commitTime);
|
testConsistencyCheck(metaClient, instantTime);
|
||||||
|
|
||||||
// Rollback of this commit should succeed
|
// Rollback of this commit should succeed
|
||||||
client.rollback(commitTime);
|
client.rollback(instantTime);
|
||||||
assertFalse("After explicit rollback, commit file should not be present",
|
assertFalse("After explicit rollback, commit file should not be present",
|
||||||
HoodieTestUtils.doesCommitExist(basePath, commitTime));
|
HoodieTestUtils.doesCommitExist(basePath, instantTime));
|
||||||
// Marker directory must be removed after rollback
|
// Marker directory must be removed after rollback
|
||||||
assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(commitTime))));
|
assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime))));
|
||||||
}
|
}
|
||||||
|
|
||||||
private Pair<Path, JavaRDD<WriteStatus>> testConsistencyCheck(HoodieTableMetaClient metaClient, String commitTime)
|
private Pair<Path, JavaRDD<WriteStatus>> testConsistencyCheck(HoodieTableMetaClient metaClient, String instantTime)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false)
|
HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false)
|
||||||
.withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true)
|
.withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true)
|
||||||
@@ -956,24 +956,24 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase {
|
|||||||
.build();
|
.build();
|
||||||
HoodieWriteClient client = getHoodieWriteClient(cfg);
|
HoodieWriteClient client = getHoodieWriteClient(cfg);
|
||||||
|
|
||||||
client.startCommitWithTime(commitTime);
|
client.startCommitWithTime(instantTime);
|
||||||
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(dataGen.generateInserts(commitTime, 200), 1);
|
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(dataGen.generateInserts(instantTime, 200), 1);
|
||||||
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, commitTime);
|
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, instantTime);
|
||||||
result.collect();
|
result.collect();
|
||||||
|
|
||||||
// Create a dummy marker file to simulate the case that a marker file was created without data file.
|
// Create a dummy marker file to simulate the case that a marker file was created without data file.
|
||||||
// This should fail the commit
|
// This should fail the commit
|
||||||
String partitionPath = Arrays
|
String partitionPath = Arrays
|
||||||
.stream(fs.globStatus(new Path(String.format("%s/*/*/*/*", metaClient.getMarkerFolderPath(commitTime))),
|
.stream(fs.globStatus(new Path(String.format("%s/*/*/*/*", metaClient.getMarkerFolderPath(instantTime))),
|
||||||
path -> path.toString().endsWith(HoodieTableMetaClient.MARKER_EXTN)))
|
path -> path.toString().endsWith(HoodieTableMetaClient.MARKER_EXTN)))
|
||||||
.limit(1).map(status -> status.getPath().getParent().toString()).collect(Collectors.toList()).get(0);
|
.limit(1).map(status -> status.getPath().getParent().toString()).collect(Collectors.toList()).get(0);
|
||||||
Path markerFilePath = new Path(String.format("%s/%s", partitionPath,
|
Path markerFilePath = new Path(String.format("%s/%s", partitionPath,
|
||||||
FSUtils.makeMarkerFile(commitTime, "1-0-1", UUID.randomUUID().toString())));
|
FSUtils.makeMarkerFile(instantTime, "1-0-1", UUID.randomUUID().toString())));
|
||||||
metaClient.getFs().create(markerFilePath);
|
metaClient.getFs().create(markerFilePath);
|
||||||
LOG.info("Created a dummy marker path=" + markerFilePath);
|
LOG.info("Created a dummy marker path=" + markerFilePath);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
client.commit(commitTime, result);
|
client.commit(instantTime, result);
|
||||||
fail("Commit should fail due to consistency check");
|
fail("Commit should fail due to consistency check");
|
||||||
} catch (HoodieCommitException cme) {
|
} catch (HoodieCommitException cme) {
|
||||||
assertTrue(cme.getCause() instanceof HoodieIOException);
|
assertTrue(cme.getCause() instanceof HoodieIOException);
|
||||||
|
|||||||
@@ -74,8 +74,8 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
|
|||||||
@Test
|
@Test
|
||||||
public void testReadFilterExistAfterBulkInsertPrepped() throws Exception {
|
public void testReadFilterExistAfterBulkInsertPrepped() throws Exception {
|
||||||
testReadFilterExist(getConfigBuilder().withBulkInsertParallelism(1).build(),
|
testReadFilterExist(getConfigBuilder().withBulkInsertParallelism(1).build(),
|
||||||
(writeClient, recordRDD, commitTime) -> {
|
(writeClient, recordRDD, instantTime) -> {
|
||||||
return writeClient.bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty());
|
return writeClient.bulkInsertPreppedRecords(recordRDD, instantTime, Option.empty());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -178,8 +178,8 @@ public class TestHoodieReadClient extends TestHoodieClientBase {
|
|||||||
@Test
|
@Test
|
||||||
public void testTagLocationAfterBulkInsertPrepped() throws Exception {
|
public void testTagLocationAfterBulkInsertPrepped() throws Exception {
|
||||||
testTagLocation(
|
testTagLocation(
|
||||||
getConfigBuilder().withBulkInsertParallelism(1).build(), (writeClient, recordRDD, commitTime) -> writeClient
|
getConfigBuilder().withBulkInsertParallelism(1).build(), (writeClient, recordRDD, instantTime) -> writeClient
|
||||||
.bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty()),
|
.bulkInsertPreppedRecords(recordRDD, instantTime, Option.empty()),
|
||||||
HoodieWriteClient::upsertPreppedRecords, true);
|
HoodieWriteClient::upsertPreppedRecords, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -115,30 +115,30 @@ public class HoodieClientTestUtils {
|
|||||||
return toReturn;
|
return toReturn;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void fakeMetaFile(String basePath, String commitTime, String suffix) throws IOException {
|
private static void fakeMetaFile(String basePath, String instantTime, String suffix) throws IOException {
|
||||||
String parentPath = basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME;
|
String parentPath = basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME;
|
||||||
new File(parentPath).mkdirs();
|
new File(parentPath).mkdirs();
|
||||||
new File(parentPath + "/" + commitTime + suffix).createNewFile();
|
new File(parentPath + "/" + instantTime + suffix).createNewFile();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void fakeCommitFile(String basePath, String commitTime) throws IOException {
|
public static void fakeCommitFile(String basePath, String instantTime) throws IOException {
|
||||||
fakeMetaFile(basePath, commitTime, HoodieTimeline.COMMIT_EXTENSION);
|
fakeMetaFile(basePath, instantTime, HoodieTimeline.COMMIT_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void fakeInFlightFile(String basePath, String commitTime) throws IOException {
|
public static void fakeInFlightFile(String basePath, String instantTime) throws IOException {
|
||||||
fakeMetaFile(basePath, commitTime, HoodieTimeline.INFLIGHT_EXTENSION);
|
fakeMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void fakeDataFile(String basePath, String partitionPath, String commitTime, String fileId)
|
public static void fakeDataFile(String basePath, String partitionPath, String instantTime, String fileId)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
fakeDataFile(basePath, partitionPath, commitTime, fileId, 0);
|
fakeDataFile(basePath, partitionPath, instantTime, fileId, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void fakeDataFile(String basePath, String partitionPath, String commitTime, String fileId, long length)
|
public static void fakeDataFile(String basePath, String partitionPath, String instantTime, String fileId, long length)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
String parentPath = String.format("%s/%s", basePath, partitionPath);
|
String parentPath = String.format("%s/%s", basePath, partitionPath);
|
||||||
new File(parentPath).mkdirs();
|
new File(parentPath).mkdirs();
|
||||||
String path = String.format("%s/%s", parentPath, FSUtils.makeDataFileName(commitTime, "1-0-1", fileId));
|
String path = String.format("%s/%s", parentPath, FSUtils.makeDataFileName(instantTime, "1-0-1", fileId));
|
||||||
new File(path).createNewFile();
|
new File(path).createNewFile();
|
||||||
new RandomAccessFile(path, "rw").setLength(length);
|
new RandomAccessFile(path, "rw").setLength(length);
|
||||||
}
|
}
|
||||||
@@ -161,19 +161,19 @@ public class HoodieClientTestUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static Dataset<Row> readCommit(String basePath, SQLContext sqlContext, HoodieTimeline commitTimeline,
|
public static Dataset<Row> readCommit(String basePath, SQLContext sqlContext, HoodieTimeline commitTimeline,
|
||||||
String commitTime) {
|
String instantTime) {
|
||||||
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
|
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
|
||||||
if (!commitTimeline.containsInstant(commitInstant)) {
|
if (!commitTimeline.containsInstant(commitInstant)) {
|
||||||
throw new HoodieException("No commit exists at " + commitTime);
|
throw new HoodieException("No commit exists at " + instantTime);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
HashMap<String, String> paths =
|
HashMap<String, String> paths =
|
||||||
getLatestFileIDsToFullPath(basePath, commitTimeline, Arrays.asList(commitInstant));
|
getLatestFileIDsToFullPath(basePath, commitTimeline, Arrays.asList(commitInstant));
|
||||||
LOG.info("Path :" + paths.values());
|
LOG.info("Path :" + paths.values());
|
||||||
return sqlContext.read().parquet(paths.values().toArray(new String[paths.size()]))
|
return sqlContext.read().parquet(paths.values().toArray(new String[paths.size()]))
|
||||||
.filter(String.format("%s ='%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime));
|
.filter(String.format("%s ='%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, instantTime));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new HoodieException("Error reading commit " + commitTime, e);
|
throw new HoodieException("Error reading commit " + instantTime, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -225,16 +225,16 @@ public class HoodieClientTestUtils {
|
|||||||
}
|
}
|
||||||
HoodieAvroWriteSupport writeSupport =
|
HoodieAvroWriteSupport writeSupport =
|
||||||
new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
|
new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
|
||||||
String commitTime = FSUtils.getCommitTime(filename);
|
String instantTime = FSUtils.getCommitTime(filename);
|
||||||
HoodieParquetConfig config = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP,
|
HoodieParquetConfig config = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP,
|
||||||
ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 120 * 1024 * 1024,
|
ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 120 * 1024 * 1024,
|
||||||
HoodieTestUtils.getDefaultHadoopConf(), Double.valueOf(HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO));
|
HoodieTestUtils.getDefaultHadoopConf(), Double.valueOf(HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO));
|
||||||
HoodieParquetWriter writer =
|
HoodieParquetWriter writer =
|
||||||
new HoodieParquetWriter(commitTime, new Path(basePath + "/" + partitionPath + "/" + filename), config, schema);
|
new HoodieParquetWriter(instantTime, new Path(basePath + "/" + partitionPath + "/" + filename), config, schema);
|
||||||
int seqId = 1;
|
int seqId = 1;
|
||||||
for (HoodieRecord record : records) {
|
for (HoodieRecord record : records) {
|
||||||
GenericRecord avroRecord = (GenericRecord) record.getData().getInsertValue(schema).get();
|
GenericRecord avroRecord = (GenericRecord) record.getData().getInsertValue(schema).get();
|
||||||
HoodieAvroUtils.addCommitMetadataToRecord(avroRecord, commitTime, "" + seqId++);
|
HoodieAvroUtils.addCommitMetadataToRecord(avroRecord, instantTime, "" + seqId++);
|
||||||
HoodieAvroUtils.addHoodieKeyToRecord(avroRecord, record.getRecordKey(), record.getPartitionPath(), filename);
|
HoodieAvroUtils.addHoodieKeyToRecord(avroRecord, record.getRecordKey(), record.getPartitionPath(), filename);
|
||||||
writer.writeAvro(record.getRecordKey(), avroRecord);
|
writer.writeAvro(record.getRecordKey(), avroRecord);
|
||||||
filter.add(record.getRecordKey());
|
filter.add(record.getRecordKey());
|
||||||
@@ -243,7 +243,7 @@ public class HoodieClientTestUtils {
|
|||||||
|
|
||||||
if (createCommitTime) {
|
if (createCommitTime) {
|
||||||
HoodieTestUtils.createMetadataFolder(basePath);
|
HoodieTestUtils.createMetadataFolder(basePath);
|
||||||
HoodieTestUtils.createCommitFiles(basePath, commitTime);
|
HoodieTestUtils.createCommitFiles(basePath, instantTime);
|
||||||
}
|
}
|
||||||
return filename;
|
return filename;
|
||||||
}
|
}
|
||||||
@@ -251,10 +251,10 @@ public class HoodieClientTestUtils {
|
|||||||
public static String writeParquetFile(String basePath, String partitionPath, List<HoodieRecord> records,
|
public static String writeParquetFile(String basePath, String partitionPath, List<HoodieRecord> records,
|
||||||
Schema schema, BloomFilter filter, boolean createCommitTime) throws IOException, InterruptedException {
|
Schema schema, BloomFilter filter, boolean createCommitTime) throws IOException, InterruptedException {
|
||||||
Thread.sleep(1000);
|
Thread.sleep(1000);
|
||||||
String commitTime = HoodieTestUtils.makeNewCommitTime();
|
String instantTime = HoodieTestUtils.makeNewCommitTime();
|
||||||
String fileId = UUID.randomUUID().toString();
|
String fileId = UUID.randomUUID().toString();
|
||||||
String filename = FSUtils.makeDataFileName(commitTime, "1-0-1", fileId);
|
String filename = FSUtils.makeDataFileName(instantTime, "1-0-1", fileId);
|
||||||
HoodieTestUtils.createCommitFiles(basePath, commitTime);
|
HoodieTestUtils.createCommitFiles(basePath, instantTime);
|
||||||
return HoodieClientTestUtils.writeParquetFile(basePath, partitionPath, filename, records, schema, filter,
|
return HoodieClientTestUtils.writeParquetFile(basePath, partitionPath, filename, records, schema, filter,
|
||||||
createCommitTime);
|
createCommitTime);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -129,12 +129,12 @@ public class HoodieTestDataGenerator {
|
|||||||
* retaining the key if optionally provided.
|
* retaining the key if optionally provided.
|
||||||
*
|
*
|
||||||
* @param key Hoodie key.
|
* @param key Hoodie key.
|
||||||
* @param commitTime Commit time to use.
|
* @param instantTime Instant time to use.
|
||||||
* @return Raw paylaod of a test record.
|
* @return Raw paylaod of a test record.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public static TestRawTripPayload generateRandomValue(HoodieKey key, String commitTime) throws IOException {
|
public static TestRawTripPayload generateRandomValue(HoodieKey key, String instantTime) throws IOException {
|
||||||
return generateRandomValue(key, commitTime, false);
|
return generateRandomValue(key, instantTime, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -142,15 +142,15 @@ public class HoodieTestDataGenerator {
|
|||||||
* retaining the key if optionally provided.
|
* retaining the key if optionally provided.
|
||||||
*
|
*
|
||||||
* @param key Hoodie key.
|
* @param key Hoodie key.
|
||||||
* @param commitTime Commit time to use.
|
* @param instantTime Commit time to use.
|
||||||
* @param isFlattened whether the schema of the record should be flattened.
|
* @param isFlattened whether the schema of the record should be flattened.
|
||||||
* @return Raw paylaod of a test record.
|
* @return Raw paylaod of a test record.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public static TestRawTripPayload generateRandomValue(
|
public static TestRawTripPayload generateRandomValue(
|
||||||
HoodieKey key, String commitTime, boolean isFlattened) throws IOException {
|
HoodieKey key, String instantTime, boolean isFlattened) throws IOException {
|
||||||
GenericRecord rec = generateGenericRecord(
|
GenericRecord rec = generateGenericRecord(
|
||||||
key.getRecordKey(), "rider-" + commitTime, "driver-" + commitTime, 0.0,
|
key.getRecordKey(), "rider-" + instantTime, "driver-" + instantTime, 0.0,
|
||||||
false, isFlattened);
|
false, isFlattened);
|
||||||
return new TestRawTripPayload(rec.toString(), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA);
|
return new TestRawTripPayload(rec.toString(), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA);
|
||||||
}
|
}
|
||||||
@@ -158,8 +158,8 @@ public class HoodieTestDataGenerator {
|
|||||||
/**
|
/**
|
||||||
* Generates a new avro record of the above schema format for a delete.
|
* Generates a new avro record of the above schema format for a delete.
|
||||||
*/
|
*/
|
||||||
public static TestRawTripPayload generateRandomDeleteValue(HoodieKey key, String commitTime) throws IOException {
|
public static TestRawTripPayload generateRandomDeleteValue(HoodieKey key, String instantTime) throws IOException {
|
||||||
GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + commitTime, "driver-" + commitTime, 0.0,
|
GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + instantTime, "driver-" + instantTime, 0.0,
|
||||||
true, false);
|
true, false);
|
||||||
return new TestRawTripPayload(rec.toString(), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA);
|
return new TestRawTripPayload(rec.toString(), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA);
|
||||||
}
|
}
|
||||||
@@ -167,8 +167,8 @@ public class HoodieTestDataGenerator {
|
|||||||
/**
|
/**
|
||||||
* Generates a new avro record of the above schema format, retaining the key if optionally provided.
|
* Generates a new avro record of the above schema format, retaining the key if optionally provided.
|
||||||
*/
|
*/
|
||||||
public static HoodieAvroPayload generateAvroPayload(HoodieKey key, String commitTime) {
|
public static HoodieAvroPayload generateAvroPayload(HoodieKey key, String instantTime) {
|
||||||
GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + commitTime, "driver-" + commitTime, 0.0);
|
GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + instantTime, "driver-" + instantTime, 0.0);
|
||||||
return new HoodieAvroPayload(Option.of(rec));
|
return new HoodieAvroPayload(Option.of(rec));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -208,9 +208,9 @@ public class HoodieTestDataGenerator {
|
|||||||
return rec;
|
return rec;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void createCommitFile(String basePath, String commitTime, Configuration configuration) {
|
public static void createCommitFile(String basePath, String instantTime, Configuration configuration) {
|
||||||
Arrays.asList(HoodieTimeline.makeCommitFileName(commitTime), HoodieTimeline.makeInflightCommitFileName(commitTime),
|
Arrays.asList(HoodieTimeline.makeCommitFileName(instantTime), HoodieTimeline.makeInflightCommitFileName(instantTime),
|
||||||
HoodieTimeline.makeRequestedCommitFileName(commitTime))
|
HoodieTimeline.makeRequestedCommitFileName(instantTime))
|
||||||
.forEach(f -> {
|
.forEach(f -> {
|
||||||
Path commitFile = new Path(
|
Path commitFile = new Path(
|
||||||
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + f);
|
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + f);
|
||||||
@@ -235,10 +235,10 @@ public class HoodieTestDataGenerator {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void createCompactionRequestedFile(String basePath, String commitTime, Configuration configuration)
|
public static void createCompactionRequestedFile(String basePath, String instantTime, Configuration configuration)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
||||||
+ HoodieTimeline.makeRequestedCompactionFileName(commitTime));
|
+ HoodieTimeline.makeRequestedCompactionFileName(instantTime));
|
||||||
FileSystem fs = FSUtils.getFs(basePath, configuration);
|
FileSystem fs = FSUtils.getFs(basePath, configuration);
|
||||||
FSDataOutputStream os = fs.create(commitFile, true);
|
FSDataOutputStream os = fs.create(commitFile, true);
|
||||||
os.close();
|
os.close();
|
||||||
@@ -256,10 +256,10 @@ public class HoodieTestDataGenerator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void createSavepointFile(String basePath, String commitTime, Configuration configuration)
|
public static void createSavepointFile(String basePath, String instantTime, Configuration configuration)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
||||||
+ HoodieTimeline.makeSavePointFileName(commitTime));
|
+ HoodieTimeline.makeSavePointFileName(instantTime));
|
||||||
FileSystem fs = FSUtils.getFs(basePath, configuration);
|
FileSystem fs = FSUtils.getFs(basePath, configuration);
|
||||||
try (FSDataOutputStream os = fs.create(commitFile, true)) {
|
try (FSDataOutputStream os = fs.create(commitFile, true)) {
|
||||||
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
|
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
|
||||||
@@ -272,28 +272,28 @@ public class HoodieTestDataGenerator {
|
|||||||
* Generates new inserts with nested schema, uniformly across the partition paths above.
|
* Generates new inserts with nested schema, uniformly across the partition paths above.
|
||||||
* It also updates the list of existing keys.
|
* It also updates the list of existing keys.
|
||||||
*/
|
*/
|
||||||
public List<HoodieRecord> generateInserts(String commitTime, Integer n) {
|
public List<HoodieRecord> generateInserts(String instantTime, Integer n) {
|
||||||
return generateInserts(commitTime, n, false);
|
return generateInserts(instantTime, n, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates new inserts, uniformly across the partition paths above.
|
* Generates new inserts, uniformly across the partition paths above.
|
||||||
* It also updates the list of existing keys.
|
* It also updates the list of existing keys.
|
||||||
*
|
*
|
||||||
* @param commitTime Commit time to use.
|
* @param instantTime Commit time to use.
|
||||||
* @param n Number of records.
|
* @param n Number of records.
|
||||||
* @param isFlattened whether the schema of the generated record is flattened
|
* @param isFlattened whether the schema of the generated record is flattened
|
||||||
* @return List of {@link HoodieRecord}s
|
* @return List of {@link HoodieRecord}s
|
||||||
*/
|
*/
|
||||||
public List<HoodieRecord> generateInserts(String commitTime, Integer n, boolean isFlattened) {
|
public List<HoodieRecord> generateInserts(String instantTime, Integer n, boolean isFlattened) {
|
||||||
return generateInsertsStream(commitTime, n, isFlattened).collect(Collectors.toList());
|
return generateInsertsStream(instantTime, n, isFlattened).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates new inserts, uniformly across the partition paths above. It also updates the list of existing keys.
|
* Generates new inserts, uniformly across the partition paths above. It also updates the list of existing keys.
|
||||||
*/
|
*/
|
||||||
public Stream<HoodieRecord> generateInsertsStream(
|
public Stream<HoodieRecord> generateInsertsStream(
|
||||||
String commitTime, Integer n, boolean isFlattened) {
|
String instantTime, Integer n, boolean isFlattened) {
|
||||||
int currSize = getNumExistingKeys();
|
int currSize = getNumExistingKeys();
|
||||||
|
|
||||||
return IntStream.range(0, n).boxed().map(i -> {
|
return IntStream.range(0, n).boxed().map(i -> {
|
||||||
@@ -305,30 +305,30 @@ public class HoodieTestDataGenerator {
|
|||||||
existingKeys.put(currSize + i, kp);
|
existingKeys.put(currSize + i, kp);
|
||||||
numExistingKeys++;
|
numExistingKeys++;
|
||||||
try {
|
try {
|
||||||
return new HoodieRecord(key, generateRandomValue(key, commitTime, isFlattened));
|
return new HoodieRecord(key, generateRandomValue(key, instantTime, isFlattened));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new HoodieIOException(e.getMessage(), e);
|
throw new HoodieIOException(e.getMessage(), e);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<HoodieRecord> generateSameKeyInserts(String commitTime, List<HoodieRecord> origin) throws IOException {
|
public List<HoodieRecord> generateSameKeyInserts(String instantTime, List<HoodieRecord> origin) throws IOException {
|
||||||
List<HoodieRecord> copy = new ArrayList<>();
|
List<HoodieRecord> copy = new ArrayList<>();
|
||||||
for (HoodieRecord r : origin) {
|
for (HoodieRecord r : origin) {
|
||||||
HoodieKey key = r.getKey();
|
HoodieKey key = r.getKey();
|
||||||
HoodieRecord record = new HoodieRecord(key, generateRandomValue(key, commitTime));
|
HoodieRecord record = new HoodieRecord(key, generateRandomValue(key, instantTime));
|
||||||
copy.add(record);
|
copy.add(record);
|
||||||
}
|
}
|
||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<HoodieRecord> generateInsertsWithHoodieAvroPayload(String commitTime, int limit) {
|
public List<HoodieRecord> generateInsertsWithHoodieAvroPayload(String instantTime, int limit) {
|
||||||
List<HoodieRecord> inserts = new ArrayList<>();
|
List<HoodieRecord> inserts = new ArrayList<>();
|
||||||
int currSize = getNumExistingKeys();
|
int currSize = getNumExistingKeys();
|
||||||
for (int i = 0; i < limit; i++) {
|
for (int i = 0; i < limit; i++) {
|
||||||
String partitionPath = partitionPaths[RAND.nextInt(partitionPaths.length)];
|
String partitionPath = partitionPaths[RAND.nextInt(partitionPaths.length)];
|
||||||
HoodieKey key = new HoodieKey(UUID.randomUUID().toString(), partitionPath);
|
HoodieKey key = new HoodieKey(UUID.randomUUID().toString(), partitionPath);
|
||||||
HoodieRecord record = new HoodieRecord(key, generateAvroPayload(key, commitTime));
|
HoodieRecord record = new HoodieRecord(key, generateAvroPayload(key, instantTime));
|
||||||
inserts.add(record);
|
inserts.add(record);
|
||||||
|
|
||||||
KeyPartition kp = new KeyPartition();
|
KeyPartition kp = new KeyPartition();
|
||||||
@@ -340,17 +340,17 @@ public class HoodieTestDataGenerator {
|
|||||||
return inserts;
|
return inserts;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<HoodieRecord> generateUpdatesWithHoodieAvroPayload(String commitTime, List<HoodieRecord> baseRecords) {
|
public List<HoodieRecord> generateUpdatesWithHoodieAvroPayload(String instantTime, List<HoodieRecord> baseRecords) {
|
||||||
List<HoodieRecord> updates = new ArrayList<>();
|
List<HoodieRecord> updates = new ArrayList<>();
|
||||||
for (HoodieRecord baseRecord : baseRecords) {
|
for (HoodieRecord baseRecord : baseRecords) {
|
||||||
HoodieRecord record = new HoodieRecord(baseRecord.getKey(), generateAvroPayload(baseRecord.getKey(), commitTime));
|
HoodieRecord record = new HoodieRecord(baseRecord.getKey(), generateAvroPayload(baseRecord.getKey(), instantTime));
|
||||||
updates.add(record);
|
updates.add(record);
|
||||||
}
|
}
|
||||||
return updates;
|
return updates;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<HoodieRecord> generateDeletes(String commitTime, Integer n) throws IOException {
|
public List<HoodieRecord> generateDeletes(String instantTime, Integer n) throws IOException {
|
||||||
List<HoodieRecord> inserts = generateInserts(commitTime, n);
|
List<HoodieRecord> inserts = generateInserts(instantTime, n);
|
||||||
return generateDeletesFromExistingRecords(inserts);
|
return generateDeletesFromExistingRecords(inserts);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -374,20 +374,20 @@ public class HoodieTestDataGenerator {
|
|||||||
return new HoodieRecord(key, payload);
|
return new HoodieRecord(key, payload);
|
||||||
}
|
}
|
||||||
|
|
||||||
public HoodieRecord generateUpdateRecord(HoodieKey key, String commitTime) throws IOException {
|
public HoodieRecord generateUpdateRecord(HoodieKey key, String instantTime) throws IOException {
|
||||||
return new HoodieRecord(key, generateRandomValue(key, commitTime));
|
return new HoodieRecord(key, generateRandomValue(key, instantTime));
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<HoodieRecord> generateUpdates(String commitTime, List<HoodieRecord> baseRecords) throws IOException {
|
public List<HoodieRecord> generateUpdates(String instantTime, List<HoodieRecord> baseRecords) throws IOException {
|
||||||
List<HoodieRecord> updates = new ArrayList<>();
|
List<HoodieRecord> updates = new ArrayList<>();
|
||||||
for (HoodieRecord baseRecord : baseRecords) {
|
for (HoodieRecord baseRecord : baseRecords) {
|
||||||
HoodieRecord record = generateUpdateRecord(baseRecord.getKey(), commitTime);
|
HoodieRecord record = generateUpdateRecord(baseRecord.getKey(), instantTime);
|
||||||
updates.add(record);
|
updates.add(record);
|
||||||
}
|
}
|
||||||
return updates;
|
return updates;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<HoodieRecord> generateUpdatesWithDiffPartition(String commitTime, List<HoodieRecord> baseRecords)
|
public List<HoodieRecord> generateUpdatesWithDiffPartition(String instantTime, List<HoodieRecord> baseRecords)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
List<HoodieRecord> updates = new ArrayList<>();
|
List<HoodieRecord> updates = new ArrayList<>();
|
||||||
for (HoodieRecord baseRecord : baseRecords) {
|
for (HoodieRecord baseRecord : baseRecords) {
|
||||||
@@ -399,7 +399,7 @@ public class HoodieTestDataGenerator {
|
|||||||
newPartition = partitionPaths[0];
|
newPartition = partitionPaths[0];
|
||||||
}
|
}
|
||||||
HoodieKey key = new HoodieKey(baseRecord.getRecordKey(), newPartition);
|
HoodieKey key = new HoodieKey(baseRecord.getRecordKey(), newPartition);
|
||||||
HoodieRecord record = generateUpdateRecord(key, commitTime);
|
HoodieRecord record = generateUpdateRecord(key, instantTime);
|
||||||
updates.add(record);
|
updates.add(record);
|
||||||
}
|
}
|
||||||
return updates;
|
return updates;
|
||||||
@@ -409,15 +409,15 @@ public class HoodieTestDataGenerator {
|
|||||||
* Generates new updates, randomly distributed across the keys above. There can be duplicates within the returned
|
* Generates new updates, randomly distributed across the keys above. There can be duplicates within the returned
|
||||||
* list
|
* list
|
||||||
*
|
*
|
||||||
* @param commitTime Commit Timestamp
|
* @param instantTime Instant Timestamp
|
||||||
* @param n Number of updates (including dups)
|
* @param n Number of updates (including dups)
|
||||||
* @return list of hoodie record updates
|
* @return list of hoodie record updates
|
||||||
*/
|
*/
|
||||||
public List<HoodieRecord> generateUpdates(String commitTime, Integer n) throws IOException {
|
public List<HoodieRecord> generateUpdates(String instantTime, Integer n) throws IOException {
|
||||||
List<HoodieRecord> updates = new ArrayList<>();
|
List<HoodieRecord> updates = new ArrayList<>();
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
KeyPartition kp = existingKeys.get(RAND.nextInt(numExistingKeys - 1));
|
KeyPartition kp = existingKeys.get(RAND.nextInt(numExistingKeys - 1));
|
||||||
HoodieRecord record = generateUpdateRecord(kp.key, commitTime);
|
HoodieRecord record = generateUpdateRecord(kp.key, instantTime);
|
||||||
updates.add(record);
|
updates.add(record);
|
||||||
}
|
}
|
||||||
return updates;
|
return updates;
|
||||||
@@ -426,12 +426,12 @@ public class HoodieTestDataGenerator {
|
|||||||
/**
|
/**
|
||||||
* Generates deduped updates of keys previously inserted, randomly distributed across the keys above.
|
* Generates deduped updates of keys previously inserted, randomly distributed across the keys above.
|
||||||
*
|
*
|
||||||
* @param commitTime Commit Timestamp
|
* @param instantTime Instant Timestamp
|
||||||
* @param n Number of unique records
|
* @param n Number of unique records
|
||||||
* @return list of hoodie record updates
|
* @return list of hoodie record updates
|
||||||
*/
|
*/
|
||||||
public List<HoodieRecord> generateUniqueUpdates(String commitTime, Integer n) {
|
public List<HoodieRecord> generateUniqueUpdates(String instantTime, Integer n) {
|
||||||
return generateUniqueUpdatesStream(commitTime, n).collect(Collectors.toList());
|
return generateUniqueUpdatesStream(instantTime, n).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -447,11 +447,11 @@ public class HoodieTestDataGenerator {
|
|||||||
/**
|
/**
|
||||||
* Generates deduped updates of keys previously inserted, randomly distributed across the keys above.
|
* Generates deduped updates of keys previously inserted, randomly distributed across the keys above.
|
||||||
*
|
*
|
||||||
* @param commitTime Commit Timestamp
|
* @param instantTime Commit Timestamp
|
||||||
* @param n Number of unique records
|
* @param n Number of unique records
|
||||||
* @return stream of hoodie record updates
|
* @return stream of hoodie record updates
|
||||||
*/
|
*/
|
||||||
public Stream<HoodieRecord> generateUniqueUpdatesStream(String commitTime, Integer n) {
|
public Stream<HoodieRecord> generateUniqueUpdatesStream(String instantTime, Integer n) {
|
||||||
final Set<KeyPartition> used = new HashSet<>();
|
final Set<KeyPartition> used = new HashSet<>();
|
||||||
if (n > numExistingKeys) {
|
if (n > numExistingKeys) {
|
||||||
throw new IllegalArgumentException("Requested unique updates is greater than number of available keys");
|
throw new IllegalArgumentException("Requested unique updates is greater than number of available keys");
|
||||||
@@ -467,7 +467,7 @@ public class HoodieTestDataGenerator {
|
|||||||
}
|
}
|
||||||
used.add(kp);
|
used.add(kp);
|
||||||
try {
|
try {
|
||||||
return new HoodieRecord(kp.key, generateRandomValue(kp.key, commitTime));
|
return new HoodieRecord(kp.key, generateRandomValue(kp.key, instantTime));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new HoodieIOException(e.getMessage(), e);
|
throw new HoodieIOException(e.getMessage(), e);
|
||||||
}
|
}
|
||||||
@@ -505,11 +505,11 @@ public class HoodieTestDataGenerator {
|
|||||||
/**
|
/**
|
||||||
* Generates deduped delete records previously inserted, randomly distributed across the keys above.
|
* Generates deduped delete records previously inserted, randomly distributed across the keys above.
|
||||||
*
|
*
|
||||||
* @param commitTime Commit Timestamp
|
* @param instantTime Commit Timestamp
|
||||||
* @param n Number of unique records
|
* @param n Number of unique records
|
||||||
* @return stream of hoodie records for delete
|
* @return stream of hoodie records for delete
|
||||||
*/
|
*/
|
||||||
public Stream<HoodieRecord> generateUniqueDeleteRecordStream(String commitTime, Integer n) {
|
public Stream<HoodieRecord> generateUniqueDeleteRecordStream(String instantTime, Integer n) {
|
||||||
final Set<KeyPartition> used = new HashSet<>();
|
final Set<KeyPartition> used = new HashSet<>();
|
||||||
if (n > numExistingKeys) {
|
if (n > numExistingKeys) {
|
||||||
throw new IllegalArgumentException("Requested unique deletes is greater than number of available keys");
|
throw new IllegalArgumentException("Requested unique deletes is greater than number of available keys");
|
||||||
@@ -528,7 +528,7 @@ public class HoodieTestDataGenerator {
|
|||||||
numExistingKeys--;
|
numExistingKeys--;
|
||||||
used.add(kp);
|
used.add(kp);
|
||||||
try {
|
try {
|
||||||
result.add(new HoodieRecord(kp.key, generateRandomDeleteValue(kp.key, commitTime)));
|
result.add(new HoodieRecord(kp.key, generateRandomDeleteValue(kp.key, instantTime)));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new HoodieIOException(e.getMessage(), e);
|
throw new HoodieIOException(e.getMessage(), e);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ import static org.mockito.Mockito.when;
|
|||||||
|
|
||||||
public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
|
public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
|
||||||
|
|
||||||
private final String commitTime = HoodieActiveTimeline.createNewInstantTime();
|
private final String instantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
@@ -58,7 +58,7 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness {
|
|||||||
@Test
|
@Test
|
||||||
public void testExecutor() {
|
public void testExecutor() {
|
||||||
|
|
||||||
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(commitTime, 100);
|
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, 100);
|
||||||
|
|
||||||
HoodieWriteConfig hoodieWriteConfig = mock(HoodieWriteConfig.class);
|
HoodieWriteConfig hoodieWriteConfig = mock(HoodieWriteConfig.class);
|
||||||
when(hoodieWriteConfig.getWriteBufferLimitBytes()).thenReturn(1024);
|
when(hoodieWriteConfig.getWriteBufferLimitBytes()).thenReturn(1024);
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ import static org.mockito.Mockito.when;
|
|||||||
|
|
||||||
public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
||||||
|
|
||||||
private final String commitTime = HoodieActiveTimeline.createNewInstantTime();
|
private final String instantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
@@ -79,7 +79,7 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
|||||||
@Test(timeout = 60000)
|
@Test(timeout = 60000)
|
||||||
public void testRecordReading() throws Exception {
|
public void testRecordReading() throws Exception {
|
||||||
final int numRecords = 128;
|
final int numRecords = 128;
|
||||||
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(commitTime, numRecords);
|
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
|
||||||
final BoundedInMemoryQueue<HoodieRecord, HoodieInsertValueGenResult<HoodieRecord>> queue =
|
final BoundedInMemoryQueue<HoodieRecord, HoodieInsertValueGenResult<HoodieRecord>> queue =
|
||||||
new BoundedInMemoryQueue(FileIOUtils.KB, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
|
new BoundedInMemoryQueue(FileIOUtils.KB, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
|
||||||
// Produce
|
// Produce
|
||||||
@@ -126,7 +126,7 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
|||||||
Map<String, Tuple2<Integer, Integer>> keyToProducerAndIndexMap = new HashMap<>();
|
Map<String, Tuple2<Integer, Integer>> keyToProducerAndIndexMap = new HashMap<>();
|
||||||
|
|
||||||
for (int i = 0; i < numProducers; i++) {
|
for (int i = 0; i < numProducers; i++) {
|
||||||
List<HoodieRecord> pRecs = dataGen.generateInserts(commitTime, numRecords);
|
List<HoodieRecord> pRecs = dataGen.generateInserts(instantTime, numRecords);
|
||||||
int j = 0;
|
int j = 0;
|
||||||
for (HoodieRecord r : pRecs) {
|
for (HoodieRecord r : pRecs) {
|
||||||
Assert.assertTrue(!keyToProducerAndIndexMap.containsKey(r.getRecordKey()));
|
Assert.assertTrue(!keyToProducerAndIndexMap.containsKey(r.getRecordKey()));
|
||||||
@@ -209,7 +209,7 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
|||||||
@Test(timeout = 60000)
|
@Test(timeout = 60000)
|
||||||
public void testMemoryLimitForBuffering() throws Exception {
|
public void testMemoryLimitForBuffering() throws Exception {
|
||||||
final int numRecords = 128;
|
final int numRecords = 128;
|
||||||
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(commitTime, numRecords);
|
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
|
||||||
// maximum number of records to keep in memory.
|
// maximum number of records to keep in memory.
|
||||||
final int recordLimit = 5;
|
final int recordLimit = 5;
|
||||||
final SizeEstimator<HoodieInsertValueGenResult<HoodieRecord>> sizeEstimator = new DefaultSizeEstimator<>();
|
final SizeEstimator<HoodieInsertValueGenResult<HoodieRecord>> sizeEstimator = new DefaultSizeEstimator<>();
|
||||||
@@ -258,7 +258,7 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness {
|
|||||||
@Test(timeout = 60000)
|
@Test(timeout = 60000)
|
||||||
public void testException() throws Exception {
|
public void testException() throws Exception {
|
||||||
final int numRecords = 256;
|
final int numRecords = 256;
|
||||||
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(commitTime, numRecords);
|
final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
|
||||||
final SizeEstimator<Tuple2<HoodieRecord, Option<IndexedRecord>>> sizeEstimator = new DefaultSizeEstimator<>();
|
final SizeEstimator<Tuple2<HoodieRecord, Option<IndexedRecord>>> sizeEstimator = new DefaultSizeEstimator<>();
|
||||||
// queue memory limit
|
// queue memory limit
|
||||||
HoodieInsertValueGenResult<HoodieRecord> payload =
|
HoodieInsertValueGenResult<HoodieRecord> payload =
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ public class TestHoodieIndex extends HoodieClientTestHarness {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean rollbackCommit(String commitTime) {
|
public boolean rollbackCommit(String instantTime) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -40,18 +40,18 @@ public class TestHoodieStorageWriterFactory extends TestHoodieClientBase {
|
|||||||
@Test
|
@Test
|
||||||
public void testGetStorageWriter() throws IOException {
|
public void testGetStorageWriter() throws IOException {
|
||||||
// parquet file format.
|
// parquet file format.
|
||||||
final String commitTime = "100";
|
final String instantTime = "100";
|
||||||
final Path parquetPath = new Path(basePath + "/partition/path/f1_1-0-1_000.parquet");
|
final Path parquetPath = new Path(basePath + "/partition/path/f1_1-0-1_000.parquet");
|
||||||
final HoodieWriteConfig cfg = getConfig();
|
final HoodieWriteConfig cfg = getConfig();
|
||||||
HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
||||||
HoodieStorageWriter<IndexedRecord> parquetWriter = HoodieStorageWriterFactory.getStorageWriter(commitTime,
|
HoodieStorageWriter<IndexedRecord> parquetWriter = HoodieStorageWriterFactory.getStorageWriter(instantTime,
|
||||||
parquetPath, table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA);
|
parquetPath, table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA);
|
||||||
Assert.assertTrue(parquetWriter instanceof HoodieParquetWriter);
|
Assert.assertTrue(parquetWriter instanceof HoodieParquetWriter);
|
||||||
|
|
||||||
// other file format exception.
|
// other file format exception.
|
||||||
final Path logPath = new Path(basePath + "/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1");
|
final Path logPath = new Path(basePath + "/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1");
|
||||||
try {
|
try {
|
||||||
HoodieStorageWriter<IndexedRecord> logWriter = HoodieStorageWriterFactory.getStorageWriter(commitTime, logPath,
|
HoodieStorageWriter<IndexedRecord> logWriter = HoodieStorageWriterFactory.getStorageWriter(instantTime, logPath,
|
||||||
table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA);
|
table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA);
|
||||||
fail("should fail since log storage writer is not supported yet.");
|
fail("should fail since log storage writer is not supported yet.");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|||||||
@@ -131,9 +131,9 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
HoodieTable table = HoodieTable.getHoodieTable(metaClient, client.getConfig(), jsc);
|
HoodieTable table = HoodieTable.getHoodieTable(metaClient, client.getConfig(), jsc);
|
||||||
|
|
||||||
assertFalse(table.getCompletedCommitsTimeline().empty());
|
assertFalse(table.getCompletedCommitsTimeline().empty());
|
||||||
String commitTime = table.getCompletedCommitsTimeline().getInstants().findFirst().get().getTimestamp();
|
String instantTime = table.getCompletedCommitsTimeline().getInstants().findFirst().get().getTimestamp();
|
||||||
assertFalse(table.getCompletedCleanTimeline().empty());
|
assertFalse(table.getCompletedCleanTimeline().empty());
|
||||||
assertEquals("The clean instant should be the same as the commit instant", commitTime,
|
assertEquals("The clean instant should be the same as the commit instant", instantTime,
|
||||||
table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp());
|
table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp());
|
||||||
|
|
||||||
HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
|
HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
|
||||||
@@ -173,7 +173,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
@Test
|
@Test
|
||||||
public void testBulkInsertPreppedAndCleanByVersions() throws Exception {
|
public void testBulkInsertPreppedAndCleanByVersions() throws Exception {
|
||||||
testInsertAndCleanByVersions(
|
testInsertAndCleanByVersions(
|
||||||
(client, recordRDD, commitTime) -> client.bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty()),
|
(client, recordRDD, instantTime) -> client.bulkInsertPreppedRecords(recordRDD, instantTime, Option.empty()),
|
||||||
HoodieWriteClient::upsertPreppedRecords, true);
|
HoodieWriteClient::upsertPreppedRecords, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -325,7 +325,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
@Test
|
@Test
|
||||||
public void testBulkInsertPreppedAndCleanByCommits() throws Exception {
|
public void testBulkInsertPreppedAndCleanByCommits() throws Exception {
|
||||||
testInsertAndCleanByCommits(
|
testInsertAndCleanByCommits(
|
||||||
(client, recordRDD, commitTime) -> client.bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty()),
|
(client, recordRDD, instantTime) -> client.bulkInsertPreppedRecords(recordRDD, instantTime, Option.empty()),
|
||||||
HoodieWriteClient::upsertPreppedRecords, true);
|
HoodieWriteClient::upsertPreppedRecords, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -598,7 +598,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testUpgradeDowngrade() {
|
public void testUpgradeDowngrade() {
|
||||||
String commitTime = "000";
|
String instantTime = "000";
|
||||||
|
|
||||||
String partition1 = DEFAULT_PARTITION_PATHS[0];
|
String partition1 = DEFAULT_PARTITION_PATHS[0];
|
||||||
String partition2 = DEFAULT_PARTITION_PATHS[1];
|
String partition2 = DEFAULT_PARTITION_PATHS[1];
|
||||||
@@ -616,7 +616,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
// create partition1 clean stat.
|
// create partition1 clean stat.
|
||||||
HoodieCleanStat cleanStat1 = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS,
|
HoodieCleanStat cleanStat1 = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS,
|
||||||
partition1, deletePathPatterns1, successDeleteFiles1,
|
partition1, deletePathPatterns1, successDeleteFiles1,
|
||||||
failedDeleteFiles1, commitTime);
|
failedDeleteFiles1, instantTime);
|
||||||
|
|
||||||
List<String> deletePathPatterns2 = new ArrayList<>();
|
List<String> deletePathPatterns2 = new ArrayList<>();
|
||||||
List<String> successDeleteFiles2 = new ArrayList<>();
|
List<String> successDeleteFiles2 = new ArrayList<>();
|
||||||
@@ -625,7 +625,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
// create partition2 empty clean stat.
|
// create partition2 empty clean stat.
|
||||||
HoodieCleanStat cleanStat2 = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_COMMITS,
|
HoodieCleanStat cleanStat2 = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_COMMITS,
|
||||||
partition2, deletePathPatterns2, successDeleteFiles2,
|
partition2, deletePathPatterns2, successDeleteFiles2,
|
||||||
failedDeleteFiles2, commitTime);
|
failedDeleteFiles2, instantTime);
|
||||||
|
|
||||||
// map with absolute file path.
|
// map with absolute file path.
|
||||||
Map<String, Tuple3> oldExpected = new HashMap<>();
|
Map<String, Tuple3> oldExpected = new HashMap<>();
|
||||||
@@ -639,7 +639,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
newExpected.put(partition2, new Tuple3<>(deletePathPatterns2, successDeleteFiles2, failedDeleteFiles2));
|
newExpected.put(partition2, new Tuple3<>(deletePathPatterns2, successDeleteFiles2, failedDeleteFiles2));
|
||||||
|
|
||||||
HoodieCleanMetadata metadata =
|
HoodieCleanMetadata metadata =
|
||||||
CleanerUtils.convertCleanMetadata(metaClient, commitTime, Option.of(0L), Arrays.asList(cleanStat1, cleanStat2));
|
CleanerUtils.convertCleanMetadata(metaClient, instantTime, Option.of(0L), Arrays.asList(cleanStat1, cleanStat2));
|
||||||
metadata.setVersion(CleanerUtils.CLEAN_METADATA_VERSION_1);
|
metadata.setVersion(CleanerUtils.CLEAN_METADATA_VERSION_1);
|
||||||
|
|
||||||
// NOw upgrade and check
|
// NOw upgrade and check
|
||||||
@@ -1107,15 +1107,15 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
/**
|
/**
|
||||||
* Utility method to create temporary data files.
|
* Utility method to create temporary data files.
|
||||||
*
|
*
|
||||||
* @param commitTime Commit Timestamp
|
* @param instantTime Commit Timestamp
|
||||||
* @param numFiles Number for files to be generated
|
* @param numFiles Number for files to be generated
|
||||||
* @return generated files
|
* @return generated files
|
||||||
* @throws IOException in case of error
|
* @throws IOException in case of error
|
||||||
*/
|
*/
|
||||||
private List<String> createMarkerFiles(String commitTime, int numFiles) throws IOException {
|
private List<String> createMarkerFiles(String instantTime, int numFiles) throws IOException {
|
||||||
List<String> files = new ArrayList<>();
|
List<String> files = new ArrayList<>();
|
||||||
for (int i = 0; i < numFiles; i++) {
|
for (int i = 0; i < numFiles; i++) {
|
||||||
files.add(HoodieTestUtils.createNewMarkerFile(basePath, "2019/03/29", commitTime));
|
files.add(HoodieTestUtils.createNewMarkerFile(basePath, "2019/03/29", instantTime));
|
||||||
}
|
}
|
||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -93,7 +93,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
|
|||||||
String fileName = UUID.randomUUID().toString();
|
String fileName = UUID.randomUUID().toString();
|
||||||
String partitionPath = "2016/05/04";
|
String partitionPath = "2016/05/04";
|
||||||
|
|
||||||
String commitTime = HoodieTestUtils.makeNewCommitTime();
|
String instantTime = HoodieTestUtils.makeNewCommitTime();
|
||||||
HoodieWriteConfig config = makeHoodieClientConfig();
|
HoodieWriteConfig config = makeHoodieClientConfig();
|
||||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||||
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
||||||
@@ -103,12 +103,12 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
|
|||||||
when(record.getPartitionPath()).thenReturn(partitionPath);
|
when(record.getPartitionPath()).thenReturn(partitionPath);
|
||||||
String writeToken = FSUtils.makeWriteToken(TaskContext.getPartitionId(), TaskContext.get().stageId(),
|
String writeToken = FSUtils.makeWriteToken(TaskContext.getPartitionId(), TaskContext.get().stageId(),
|
||||||
TaskContext.get().taskAttemptId());
|
TaskContext.get().taskAttemptId());
|
||||||
HoodieCreateHandle io = new HoodieCreateHandle(config, commitTime, table, partitionPath, fileName);
|
HoodieCreateHandle io = new HoodieCreateHandle(config, instantTime, table, partitionPath, fileName);
|
||||||
return Pair.of(io.makeNewPath(record.getPartitionPath()), writeToken);
|
return Pair.of(io.makeNewPath(record.getPartitionPath()), writeToken);
|
||||||
}).collect().get(0);
|
}).collect().get(0);
|
||||||
|
|
||||||
Assert.assertEquals(newPathWithWriteToken.getKey().toString(), this.basePath + "/" + partitionPath + "/"
|
Assert.assertEquals(newPathWithWriteToken.getKey().toString(), this.basePath + "/" + partitionPath + "/"
|
||||||
+ FSUtils.makeDataFileName(commitTime, newPathWithWriteToken.getRight(), fileName));
|
+ FSUtils.makeDataFileName(instantTime, newPathWithWriteToken.getRight(), fileName));
|
||||||
}
|
}
|
||||||
|
|
||||||
private HoodieWriteConfig makeHoodieClientConfig() throws Exception {
|
private HoodieWriteConfig makeHoodieClientConfig() throws Exception {
|
||||||
@@ -306,7 +306,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
|
|||||||
@Test
|
@Test
|
||||||
public void testInsertRecords() throws Exception {
|
public void testInsertRecords() throws Exception {
|
||||||
HoodieWriteConfig config = makeHoodieClientConfig();
|
HoodieWriteConfig config = makeHoodieClientConfig();
|
||||||
String commitTime = HoodieTestUtils.makeNewCommitTime();
|
String instantTime = HoodieTestUtils.makeNewCommitTime();
|
||||||
HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
|
HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
|
||||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||||
|
|
||||||
@@ -318,7 +318,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
|
|||||||
// Insert new records
|
// Insert new records
|
||||||
final List<HoodieRecord> recs2 = records;
|
final List<HoodieRecord> recs2 = records;
|
||||||
List<WriteStatus> returnedStatuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
List<WriteStatus> returnedStatuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
return table.handleInsert(commitTime, FSUtils.createNewFileIdPfx(), recs2.iterator());
|
return table.handleInsert(instantTime, FSUtils.createNewFileIdPfx(), recs2.iterator());
|
||||||
}).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect();
|
}).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect();
|
||||||
|
|
||||||
// TODO: check the actual files and make sure 11 records, total were written.
|
// TODO: check the actual files and make sure 11 records, total were written.
|
||||||
@@ -340,7 +340,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
|
|||||||
final List<HoodieRecord> recs3 = records;
|
final List<HoodieRecord> recs3 = records;
|
||||||
|
|
||||||
returnedStatuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
returnedStatuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
return table.handleInsert(commitTime, FSUtils.createNewFileIdPfx(), recs3.iterator());
|
return table.handleInsert(instantTime, FSUtils.createNewFileIdPfx(), recs3.iterator());
|
||||||
}).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect();
|
}).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect();
|
||||||
|
|
||||||
assertEquals(3, returnedStatuses.size());
|
assertEquals(3, returnedStatuses.size());
|
||||||
@@ -359,7 +359,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
|
|||||||
public void testFileSizeUpsertRecords() throws Exception {
|
public void testFileSizeUpsertRecords() throws Exception {
|
||||||
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withStorageConfig(HoodieStorageConfig.newBuilder()
|
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withStorageConfig(HoodieStorageConfig.newBuilder()
|
||||||
.limitFileSize(64 * 1024).parquetBlockSize(64 * 1024).parquetPageSize(64 * 1024).build()).build();
|
.limitFileSize(64 * 1024).parquetBlockSize(64 * 1024).parquetPageSize(64 * 1024).build()).build();
|
||||||
String commitTime = HoodieTestUtils.makeNewCommitTime();
|
String instantTime = HoodieTestUtils.makeNewCommitTime();
|
||||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||||
HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
|
HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
|
||||||
|
|
||||||
@@ -374,13 +374,13 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
|
|||||||
|
|
||||||
// Insert new records
|
// Insert new records
|
||||||
jsc.parallelize(Arrays.asList(1))
|
jsc.parallelize(Arrays.asList(1))
|
||||||
.map(i -> table.handleInsert(commitTime, FSUtils.createNewFileIdPfx(), records.iterator()))
|
.map(i -> table.handleInsert(instantTime, FSUtils.createNewFileIdPfx(), records.iterator()))
|
||||||
.map(x -> HoodieClientTestUtils.collectStatuses(x)).collect();
|
.map(x -> HoodieClientTestUtils.collectStatuses(x)).collect();
|
||||||
|
|
||||||
// Check the updated file
|
// Check the updated file
|
||||||
int counts = 0;
|
int counts = 0;
|
||||||
for (File file : new File(basePath + "/2016/01/31").listFiles()) {
|
for (File file : new File(basePath + "/2016/01/31").listFiles()) {
|
||||||
if (file.getName().endsWith(".parquet") && FSUtils.getCommitTime(file.getName()).equals(commitTime)) {
|
if (file.getName().endsWith(".parquet") && FSUtils.getCommitTime(file.getName()).equals(instantTime)) {
|
||||||
LOG.info(file.getName() + "-" + file.length());
|
LOG.info(file.getName() + "-" + file.length());
|
||||||
counts++;
|
counts++;
|
||||||
}
|
}
|
||||||
@@ -471,11 +471,11 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
|
|||||||
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1000 * 1024).build()).build();
|
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1000 * 1024).build()).build();
|
||||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||||
final HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
|
final HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
|
||||||
String commitTime = "000";
|
String instantTime = "000";
|
||||||
// Perform inserts of 100 records to test CreateHandle and BufferedExecutor
|
// Perform inserts of 100 records to test CreateHandle and BufferedExecutor
|
||||||
final List<HoodieRecord> inserts = dataGen.generateInsertsWithHoodieAvroPayload(commitTime, 100);
|
final List<HoodieRecord> inserts = dataGen.generateInsertsWithHoodieAvroPayload(instantTime, 100);
|
||||||
final List<List<WriteStatus>> ws = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
final List<List<WriteStatus>> ws = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
return table.handleInsert(commitTime, UUID.randomUUID().toString(), inserts.iterator());
|
return table.handleInsert(instantTime, UUID.randomUUID().toString(), inserts.iterator());
|
||||||
}).map(x -> (List<WriteStatus>) HoodieClientTestUtils.collectStatuses(x)).collect();
|
}).map(x -> (List<WriteStatus>) HoodieClientTestUtils.collectStatuses(x)).collect();
|
||||||
|
|
||||||
WriteStatus writeStatus = ws.get(0).get(0);
|
WriteStatus writeStatus = ws.get(0).get(0);
|
||||||
@@ -483,12 +483,12 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
|
|||||||
metaClient.getFs().create(new Path(basePath + "/.hoodie/000.commit")).close();
|
metaClient.getFs().create(new Path(basePath + "/.hoodie/000.commit")).close();
|
||||||
final HoodieCopyOnWriteTable table2 = new HoodieCopyOnWriteTable(config, jsc);
|
final HoodieCopyOnWriteTable table2 = new HoodieCopyOnWriteTable(config, jsc);
|
||||||
final List<HoodieRecord> updates =
|
final List<HoodieRecord> updates =
|
||||||
dataGen.generateUpdatesWithHoodieAvroPayload(commitTime, inserts);
|
dataGen.generateUpdatesWithHoodieAvroPayload(instantTime, inserts);
|
||||||
|
|
||||||
String partitionPath = updates.get(0).getPartitionPath();
|
String partitionPath = updates.get(0).getPartitionPath();
|
||||||
long numRecordsInPartition = updates.stream().filter(u -> u.getPartitionPath().equals(partitionPath)).count();
|
long numRecordsInPartition = updates.stream().filter(u -> u.getPartitionPath().equals(partitionPath)).count();
|
||||||
final List<List<WriteStatus>> updateStatus = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
final List<List<WriteStatus>> updateStatus = jsc.parallelize(Arrays.asList(1)).map(x -> {
|
||||||
return table.handleUpdate(commitTime, partitionPath, fileId, updates.iterator());
|
return table.handleUpdate(instantTime, partitionPath, fileId, updates.iterator());
|
||||||
}).map(x -> (List<WriteStatus>) HoodieClientTestUtils.collectStatuses(x)).collect();
|
}).map(x -> (List<WriteStatus>) HoodieClientTestUtils.collectStatuses(x)).collect();
|
||||||
assertEquals(updates.size() - numRecordsInPartition, updateStatus.get(0).get(0).getTotalErrorRecords());
|
assertEquals(updates.size() - numRecordsInPartition, updateStatus.get(0).get(0).getTotalErrorRecords());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -839,11 +839,11 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
|||||||
|
|
||||||
Assert.assertTrue(numLogFiles > 0);
|
Assert.assertTrue(numLogFiles > 0);
|
||||||
// Do a compaction
|
// Do a compaction
|
||||||
String commitTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
|
String instantTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
|
||||||
statuses = writeClient.compact(commitTime);
|
statuses = writeClient.compact(instantTime);
|
||||||
assertEquals(statuses.map(status -> status.getStat().getPath().contains("parquet")).count(), numLogFiles);
|
assertEquals(statuses.map(status -> status.getStat().getPath().contains("parquet")).count(), numLogFiles);
|
||||||
Assert.assertEquals(statuses.count(), numLogFiles);
|
Assert.assertEquals(statuses.count(), numLogFiles);
|
||||||
writeClient.commitCompaction(commitTime, statuses, Option.empty());
|
writeClient.commitCompaction(instantTime, statuses, Option.empty());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -991,14 +991,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
|||||||
instant = new HoodieInstant(State.INFLIGHT, commitActionType, "000");
|
instant = new HoodieInstant(State.INFLIGHT, commitActionType, "000");
|
||||||
activeTimeline.saveAsComplete(instant, Option.empty());
|
activeTimeline.saveAsComplete(instant, Option.empty());
|
||||||
|
|
||||||
String commitTime = "001";
|
String instantTime = "001";
|
||||||
client.startCommitWithTime(commitTime);
|
client.startCommitWithTime(instantTime);
|
||||||
|
|
||||||
List<HoodieRecord> records = dataGen.generateInserts(commitTime, 200);
|
List<HoodieRecord> records = dataGen.generateInserts(instantTime, 200);
|
||||||
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
|
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
|
||||||
|
|
||||||
JavaRDD<WriteStatus> statuses = client.insert(writeRecords, commitTime);
|
JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime);
|
||||||
assertTrue("Commit should succeed", client.commit(commitTime, statuses));
|
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
|
||||||
|
|
||||||
// Read from commit file
|
// Read from commit file
|
||||||
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
||||||
@@ -1018,12 +1018,12 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
|||||||
}
|
}
|
||||||
Assert.assertEquals(inserts, 200);
|
Assert.assertEquals(inserts, 200);
|
||||||
|
|
||||||
commitTime = "002";
|
instantTime = "002";
|
||||||
client.startCommitWithTime(commitTime);
|
client.startCommitWithTime(instantTime);
|
||||||
records = dataGen.generateUpdates(commitTime, records);
|
records = dataGen.generateUpdates(instantTime, records);
|
||||||
writeRecords = jsc.parallelize(records, 1);
|
writeRecords = jsc.parallelize(records, 1);
|
||||||
statuses = client.upsert(writeRecords, commitTime);
|
statuses = client.upsert(writeRecords, instantTime);
|
||||||
assertTrue("Commit should succeed", client.commit(commitTime, statuses));
|
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
|
||||||
|
|
||||||
// Read from commit file
|
// Read from commit file
|
||||||
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
||||||
@@ -1047,7 +1047,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
|||||||
Assert.assertEquals(inserts, 200);
|
Assert.assertEquals(inserts, 200);
|
||||||
Assert.assertEquals(upserts, 200);
|
Assert.assertEquals(upserts, 200);
|
||||||
|
|
||||||
client.rollback(commitTime);
|
client.rollback(instantTime);
|
||||||
|
|
||||||
// Read from commit file
|
// Read from commit file
|
||||||
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
||||||
@@ -1084,14 +1084,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
|||||||
Map<String, Long> fileIdToInsertsMap = new HashMap<>();
|
Map<String, Long> fileIdToInsertsMap = new HashMap<>();
|
||||||
Map<String, Long> fileIdToUpsertsMap = new HashMap<>();
|
Map<String, Long> fileIdToUpsertsMap = new HashMap<>();
|
||||||
|
|
||||||
String commitTime = "000";
|
String instantTime = "000";
|
||||||
client.startCommitWithTime(commitTime);
|
client.startCommitWithTime(instantTime);
|
||||||
|
|
||||||
List<HoodieRecord> records = dataGen.generateInserts(commitTime, 200);
|
List<HoodieRecord> records = dataGen.generateInserts(instantTime, 200);
|
||||||
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
|
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
|
||||||
|
|
||||||
JavaRDD<WriteStatus> statuses = client.insert(writeRecords, commitTime);
|
JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime);
|
||||||
assertTrue("Commit should succeed", client.commit(commitTime, statuses));
|
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
|
||||||
|
|
||||||
// Read from commit file
|
// Read from commit file
|
||||||
HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
||||||
@@ -1113,14 +1113,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
|||||||
}
|
}
|
||||||
Assert.assertEquals(inserts, 200);
|
Assert.assertEquals(inserts, 200);
|
||||||
|
|
||||||
commitTime = "001";
|
instantTime = "001";
|
||||||
client.startCommitWithTime(commitTime);
|
client.startCommitWithTime(instantTime);
|
||||||
// generate updates + inserts. inserts should be handled into small files
|
// generate updates + inserts. inserts should be handled into small files
|
||||||
records = dataGen.generateUpdates(commitTime, records);
|
records = dataGen.generateUpdates(instantTime, records);
|
||||||
records.addAll(dataGen.generateInserts(commitTime, 200));
|
records.addAll(dataGen.generateInserts(instantTime, 200));
|
||||||
writeRecords = jsc.parallelize(records, 1);
|
writeRecords = jsc.parallelize(records, 1);
|
||||||
statuses = client.upsert(writeRecords, commitTime);
|
statuses = client.upsert(writeRecords, instantTime);
|
||||||
assertTrue("Commit should succeed", client.commit(commitTime, statuses));
|
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
|
||||||
|
|
||||||
// Read from commit file
|
// Read from commit file
|
||||||
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
||||||
@@ -1148,10 +1148,10 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
|||||||
Assert.assertEquals(upserts, 200);
|
Assert.assertEquals(upserts, 200);
|
||||||
|
|
||||||
// Test small file handling after compaction
|
// Test small file handling after compaction
|
||||||
commitTime = "002";
|
instantTime = "002";
|
||||||
client.scheduleCompactionAtInstant(commitTime, Option.of(metadata.getExtraMetadata()));
|
client.scheduleCompactionAtInstant(instantTime, Option.of(metadata.getExtraMetadata()));
|
||||||
statuses = client.compact(commitTime);
|
statuses = client.compact(instantTime);
|
||||||
client.commitCompaction(commitTime, statuses, Option.empty());
|
client.commitCompaction(instantTime, statuses, Option.empty());
|
||||||
|
|
||||||
// Read from commit file
|
// Read from commit file
|
||||||
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
||||||
@@ -1172,14 +1172,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Write inserts + updates
|
// Write inserts + updates
|
||||||
commitTime = "003";
|
instantTime = "003";
|
||||||
client.startCommitWithTime(commitTime);
|
client.startCommitWithTime(instantTime);
|
||||||
// generate updates + inserts. inserts should be handled into small files
|
// generate updates + inserts. inserts should be handled into small files
|
||||||
records = dataGen.generateUpdates(commitTime, records);
|
records = dataGen.generateUpdates(instantTime, records);
|
||||||
records.addAll(dataGen.generateInserts(commitTime, 200));
|
records.addAll(dataGen.generateInserts(instantTime, 200));
|
||||||
writeRecords = jsc.parallelize(records, 1);
|
writeRecords = jsc.parallelize(records, 1);
|
||||||
statuses = client.upsert(writeRecords, commitTime);
|
statuses = client.upsert(writeRecords, instantTime);
|
||||||
assertTrue("Commit should succeed", client.commit(commitTime, statuses));
|
assertTrue("Commit should succeed", client.commit(instantTime, statuses));
|
||||||
|
|
||||||
// Read from commit file
|
// Read from commit file
|
||||||
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
table = HoodieTable.getHoodieTable(metaClient, cfg, jsc);
|
||||||
|
|||||||
@@ -160,15 +160,15 @@ public class HoodieFileGroup implements Serializable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Obtain the latest file slice, upto a commitTime i.e <= maxCommitTime.
|
* Obtain the latest file slice, upto a instantTime i.e <= maxInstantTime.
|
||||||
*/
|
*/
|
||||||
public Option<FileSlice> getLatestFileSliceBeforeOrOn(String maxCommitTime) {
|
public Option<FileSlice> getLatestFileSliceBeforeOrOn(String maxInstantTime) {
|
||||||
return Option.fromJavaOptional(getAllFileSlices().filter(slice -> HoodieTimeline
|
return Option.fromJavaOptional(getAllFileSlices().filter(slice -> HoodieTimeline
|
||||||
.compareTimestamps(slice.getBaseInstantTime(), maxCommitTime, HoodieTimeline.LESSER_OR_EQUAL)).findFirst());
|
.compareTimestamps(slice.getBaseInstantTime(), maxInstantTime, HoodieTimeline.LESSER_OR_EQUAL)).findFirst());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Obtain the latest file slice, upto a commitTime i.e < maxInstantTime.
|
* Obtain the latest file slice, upto an instantTime i.e < maxInstantTime.
|
||||||
*
|
*
|
||||||
* @param maxInstantTime Max Instant Time
|
* @param maxInstantTime Max Instant Time
|
||||||
* @return
|
* @return
|
||||||
|
|||||||
@@ -65,9 +65,9 @@ public class HoodiePartitionMetadata {
|
|||||||
/**
|
/**
|
||||||
* Construct metadata object to be written out.
|
* Construct metadata object to be written out.
|
||||||
*/
|
*/
|
||||||
public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath, Path partitionPath) {
|
public HoodiePartitionMetadata(FileSystem fs, String instantTime, Path basePath, Path partitionPath) {
|
||||||
this(fs, partitionPath);
|
this(fs, partitionPath);
|
||||||
props.setProperty(COMMIT_TIME_KEY, commitTime);
|
props.setProperty(COMMIT_TIME_KEY, instantTime);
|
||||||
props.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
|
props.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -160,8 +160,8 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
|
|||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String generateSequenceId(String commitTime, int partitionId, long recordIndex) {
|
public static String generateSequenceId(String instantTime, int partitionId, long recordIndex) {
|
||||||
return commitTime + "_" + partitionId + "_" + recordIndex;
|
return instantTime + "_" + partitionId + "_" + recordIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getPartitionPath() {
|
public String getPartitionPath() {
|
||||||
|
|||||||
@@ -142,7 +142,7 @@ public interface HoodieTimeline extends Serializable {
|
|||||||
/**
|
/**
|
||||||
* Create a new Timeline with all the instants after startTs.
|
* Create a new Timeline with all the instants after startTs.
|
||||||
*/
|
*/
|
||||||
HoodieTimeline findInstantsAfter(String commitTime, int numCommits);
|
HoodieTimeline findInstantsAfter(String instantTime, int numCommits);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Custom Filter of Instants.
|
* Custom Filter of Instants.
|
||||||
@@ -280,16 +280,16 @@ public interface HoodieTimeline extends Serializable {
|
|||||||
return new HoodieInstant(true, instant.getAction(), instant.getTimestamp());
|
return new HoodieInstant(true, instant.getAction(), instant.getTimestamp());
|
||||||
}
|
}
|
||||||
|
|
||||||
static String makeCommitFileName(String commitTime) {
|
static String makeCommitFileName(String instantTime) {
|
||||||
return StringUtils.join(commitTime, HoodieTimeline.COMMIT_EXTENSION);
|
return StringUtils.join(instantTime, HoodieTimeline.COMMIT_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
static String makeInflightCommitFileName(String commitTime) {
|
static String makeInflightCommitFileName(String instantTime) {
|
||||||
return StringUtils.join(commitTime, HoodieTimeline.INFLIGHT_COMMIT_EXTENSION);
|
return StringUtils.join(instantTime, HoodieTimeline.INFLIGHT_COMMIT_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
static String makeRequestedCommitFileName(String commitTime) {
|
static String makeRequestedCommitFileName(String instantTime) {
|
||||||
return StringUtils.join(commitTime, HoodieTimeline.REQUESTED_COMMIT_EXTENSION);
|
return StringUtils.join(instantTime, HoodieTimeline.REQUESTED_COMMIT_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
static String makeCleanerFileName(String instant) {
|
static String makeCleanerFileName(String instant) {
|
||||||
@@ -312,28 +312,28 @@ public interface HoodieTimeline extends Serializable {
|
|||||||
return StringUtils.join(instant, HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION);
|
return StringUtils.join(instant, HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
static String makeInflightSavePointFileName(String commitTime) {
|
static String makeInflightSavePointFileName(String instantTime) {
|
||||||
return StringUtils.join(commitTime, HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION);
|
return StringUtils.join(instantTime, HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
static String makeSavePointFileName(String commitTime) {
|
static String makeSavePointFileName(String instantTime) {
|
||||||
return StringUtils.join(commitTime, HoodieTimeline.SAVEPOINT_EXTENSION);
|
return StringUtils.join(instantTime, HoodieTimeline.SAVEPOINT_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
static String makeInflightDeltaFileName(String commitTime) {
|
static String makeInflightDeltaFileName(String instantTime) {
|
||||||
return StringUtils.join(commitTime, HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION);
|
return StringUtils.join(instantTime, HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
static String makeRequestedDeltaFileName(String commitTime) {
|
static String makeRequestedDeltaFileName(String instantTime) {
|
||||||
return StringUtils.join(commitTime, HoodieTimeline.REQUESTED_DELTA_COMMIT_EXTENSION);
|
return StringUtils.join(instantTime, HoodieTimeline.REQUESTED_DELTA_COMMIT_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
static String makeInflightCompactionFileName(String commitTime) {
|
static String makeInflightCompactionFileName(String instantTime) {
|
||||||
return StringUtils.join(commitTime, HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION);
|
return StringUtils.join(instantTime, HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
static String makeRequestedCompactionFileName(String commitTime) {
|
static String makeRequestedCompactionFileName(String instantTime) {
|
||||||
return StringUtils.join(commitTime, HoodieTimeline.REQUESTED_COMPACTION_EXTENSION);
|
return StringUtils.join(instantTime, HoodieTimeline.REQUESTED_COMPACTION_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
static String makeRestoreFileName(String instant) {
|
static String makeRestoreFileName(String instant) {
|
||||||
@@ -344,8 +344,8 @@ public interface HoodieTimeline extends Serializable {
|
|||||||
return StringUtils.join(instant, HoodieTimeline.INFLIGHT_RESTORE_EXTENSION);
|
return StringUtils.join(instant, HoodieTimeline.INFLIGHT_RESTORE_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
static String makeDeltaFileName(String commitTime) {
|
static String makeDeltaFileName(String instantTime) {
|
||||||
return commitTime + HoodieTimeline.DELTA_COMMIT_EXTENSION;
|
return instantTime + HoodieTimeline.DELTA_COMMIT_EXTENSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
static String getCommitFromCommitFile(String commitFileName) {
|
static String getCommitFromCommitFile(String commitFileName) {
|
||||||
|
|||||||
@@ -121,7 +121,7 @@ public interface HoodieLogFormat {
|
|||||||
// File Id
|
// File Id
|
||||||
private String logFileId;
|
private String logFileId;
|
||||||
// File Commit Time stamp
|
// File Commit Time stamp
|
||||||
private String commitTime;
|
private String instantTime;
|
||||||
// version number for this log file. If not specified, then the current version will be
|
// version number for this log file. If not specified, then the current version will be
|
||||||
// computed by inspecting the file system
|
// computed by inspecting the file system
|
||||||
private Integer logVersion;
|
private Integer logVersion;
|
||||||
@@ -173,7 +173,7 @@ public interface HoodieLogFormat {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public WriterBuilder overBaseCommit(String baseCommit) {
|
public WriterBuilder overBaseCommit(String baseCommit) {
|
||||||
this.commitTime = baseCommit;
|
this.instantTime = baseCommit;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -195,7 +195,7 @@ public interface HoodieLogFormat {
|
|||||||
if (logFileId == null) {
|
if (logFileId == null) {
|
||||||
throw new IllegalArgumentException("FileID is not specified");
|
throw new IllegalArgumentException("FileID is not specified");
|
||||||
}
|
}
|
||||||
if (commitTime == null) {
|
if (instantTime == null) {
|
||||||
throw new IllegalArgumentException("BaseCommitTime is not specified");
|
throw new IllegalArgumentException("BaseCommitTime is not specified");
|
||||||
}
|
}
|
||||||
if (fileExtension == null) {
|
if (fileExtension == null) {
|
||||||
@@ -212,7 +212,7 @@ public interface HoodieLogFormat {
|
|||||||
if (logVersion == null) {
|
if (logVersion == null) {
|
||||||
LOG.info("Computing the next log version for " + logFileId + " in " + parentPath);
|
LOG.info("Computing the next log version for " + logFileId + " in " + parentPath);
|
||||||
Option<Pair<Integer, String>> versionAndWriteToken =
|
Option<Pair<Integer, String>> versionAndWriteToken =
|
||||||
FSUtils.getLatestLogVersion(fs, parentPath, logFileId, fileExtension, commitTime);
|
FSUtils.getLatestLogVersion(fs, parentPath, logFileId, fileExtension, instantTime);
|
||||||
if (versionAndWriteToken.isPresent()) {
|
if (versionAndWriteToken.isPresent()) {
|
||||||
logVersion = versionAndWriteToken.get().getKey();
|
logVersion = versionAndWriteToken.get().getKey();
|
||||||
logWriteToken = versionAndWriteToken.get().getValue();
|
logWriteToken = versionAndWriteToken.get().getValue();
|
||||||
@@ -233,7 +233,7 @@ public interface HoodieLogFormat {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Path logPath = new Path(parentPath,
|
Path logPath = new Path(parentPath,
|
||||||
FSUtils.makeLogFileName(logFileId, fileExtension, commitTime, logVersion, logWriteToken));
|
FSUtils.makeLogFileName(logFileId, fileExtension, instantTime, logVersion, logWriteToken));
|
||||||
LOG.info("HoodieLogFile on path " + logPath);
|
LOG.info("HoodieLogFile on path " + logPath);
|
||||||
HoodieLogFile logFile = new HoodieLogFile(logPath);
|
HoodieLogFile logFile = new HoodieLogFile(logPath);
|
||||||
|
|
||||||
|
|||||||
@@ -123,14 +123,14 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private HoodieInstant readCommit(GenericRecord record, boolean loadDetails) {
|
private HoodieInstant readCommit(GenericRecord record, boolean loadDetails) {
|
||||||
final String commitTime = record.get(HoodiePartitionMetadata.COMMIT_TIME_KEY).toString();
|
final String instantTime = record.get(HoodiePartitionMetadata.COMMIT_TIME_KEY).toString();
|
||||||
final String action = record.get(ACTION_TYPE_KEY).toString();
|
final String action = record.get(ACTION_TYPE_KEY).toString();
|
||||||
if (loadDetails) {
|
if (loadDetails) {
|
||||||
Option.ofNullable(record.get(getMetadataKey(action))).map(actionData ->
|
Option.ofNullable(record.get(getMetadataKey(action))).map(actionData ->
|
||||||
this.readCommits.put(commitTime, actionData.toString().getBytes(StandardCharsets.UTF_8))
|
this.readCommits.put(instantTime, actionData.toString().getBytes(StandardCharsets.UTF_8))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return new HoodieInstant(false, action, commitTime);
|
return new HoodieInstant(false, action, instantTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getMetadataKey(String action) {
|
private String getMetadataKey(String action) {
|
||||||
|
|||||||
@@ -130,9 +130,9 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public HoodieDefaultTimeline findInstantsAfter(String commitTime, int numCommits) {
|
public HoodieDefaultTimeline findInstantsAfter(String instantTime, int numCommits) {
|
||||||
return new HoodieDefaultTimeline(instants.stream()
|
return new HoodieDefaultTimeline(instants.stream()
|
||||||
.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER)).limit(numCommits),
|
.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), instantTime, GREATER)).limit(numCommits),
|
||||||
details);
|
details);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -105,12 +105,12 @@ public class FSUtils {
|
|||||||
return String.format("%d-%d-%d", taskPartitionId, stageId, taskAttemptId);
|
return String.format("%d-%d-%d", taskPartitionId, stageId, taskAttemptId);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String makeDataFileName(String commitTime, String writeToken, String fileId) {
|
public static String makeDataFileName(String instantTime, String writeToken, String fileId) {
|
||||||
return String.format("%s_%s_%s.parquet", fileId, writeToken, commitTime);
|
return String.format("%s_%s_%s.parquet", fileId, writeToken, instantTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String makeMarkerFile(String commitTime, String writeToken, String fileId) {
|
public static String makeMarkerFile(String instantTime, String writeToken, String fileId) {
|
||||||
return String.format("%s_%s_%s%s", fileId, writeToken, commitTime, HoodieTableMetaClient.MARKER_EXTN);
|
return String.format("%s_%s_%s%s", fileId, writeToken, instantTime, HoodieTableMetaClient.MARKER_EXTN);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String translateMarkerToDataPath(String basePath, String markerPath, String instantTs) {
|
public static String translateMarkerToDataPath(String basePath, String markerPath, String instantTs) {
|
||||||
@@ -125,8 +125,8 @@ public class FSUtils {
|
|||||||
HoodieFileFormat.PARQUET.getFileExtension());
|
HoodieFileFormat.PARQUET.getFileExtension());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String maskWithoutFileId(String commitTime, int taskPartitionId) {
|
public static String maskWithoutFileId(String instantTime, int taskPartitionId) {
|
||||||
return String.format("*_%s_%s%s", taskPartitionId, commitTime, HoodieFileFormat.PARQUET.getFileExtension());
|
return String.format("*_%s_%s%s", taskPartitionId, instantTime, HoodieFileFormat.PARQUET.getFileExtension());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getCommitFromCommitFile(String commitFileName) {
|
public static String getCommitFromCommitFile(String commitFileName) {
|
||||||
@@ -282,7 +282,7 @@ public class FSUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the first part of the file name in the log file. That will be the fileId. Log file do not have commitTime in
|
* Get the first part of the file name in the log file. That will be the fileId. Log file do not have instantTime in
|
||||||
* the file name.
|
* the file name.
|
||||||
*/
|
*/
|
||||||
public static String getFileIdFromLogPath(Path path) {
|
public static String getFileIdFromLogPath(Path path) {
|
||||||
@@ -304,7 +304,7 @@ public class FSUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the first part of the file name in the log file. That will be the fileId. Log file do not have commitTime in
|
* Get the first part of the file name in the log file. That will be the fileId. Log file do not have instantTime in
|
||||||
* the file name.
|
* the file name.
|
||||||
*/
|
*/
|
||||||
public static String getBaseCommitTimeFromLogPath(Path path) {
|
public static String getBaseCommitTimeFromLogPath(Path path) {
|
||||||
|
|||||||
@@ -180,8 +180,8 @@ public class HoodieAvroUtils {
|
|||||||
/**
|
/**
|
||||||
* Adds the Hoodie commit metadata into the provided Generic Record.
|
* Adds the Hoodie commit metadata into the provided Generic Record.
|
||||||
*/
|
*/
|
||||||
public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime, String commitSeqno) {
|
public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String instantTime, String commitSeqno) {
|
||||||
record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
|
record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, instantTime);
|
||||||
record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, commitSeqno);
|
record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, commitSeqno);
|
||||||
return record;
|
return record;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -124,16 +124,16 @@ public class HoodieTestUtils {
|
|||||||
return new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
return new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void createCommitFiles(String basePath, String... commitTimes) throws IOException {
|
public static void createCommitFiles(String basePath, String... instantTimes) throws IOException {
|
||||||
for (String commitTime : commitTimes) {
|
for (String instantTime : instantTimes) {
|
||||||
new File(
|
new File(
|
||||||
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
||||||
+ HoodieTimeline.makeRequestedCommitFileName(commitTime)).createNewFile();
|
+ HoodieTimeline.makeRequestedCommitFileName(instantTime)).createNewFile();
|
||||||
new File(
|
new File(
|
||||||
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
||||||
+ HoodieTimeline.makeInflightCommitFileName(commitTime)).createNewFile();
|
+ HoodieTimeline.makeInflightCommitFileName(instantTime)).createNewFile();
|
||||||
new File(
|
new File(
|
||||||
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCommitFileName(commitTime))
|
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCommitFileName(instantTime))
|
||||||
.createNewFile();
|
.createNewFile();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -142,20 +142,20 @@ public class HoodieTestUtils {
|
|||||||
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME).mkdirs();
|
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME).mkdirs();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void createInflightCommitFiles(String basePath, String... commitTimes) throws IOException {
|
public static void createInflightCommitFiles(String basePath, String... instantTimes) throws IOException {
|
||||||
|
|
||||||
for (String commitTime : commitTimes) {
|
for (String instantTime : instantTimes) {
|
||||||
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
||||||
+ HoodieTimeline.makeRequestedCommitFileName(commitTime)).createNewFile();
|
+ HoodieTimeline.makeRequestedCommitFileName(instantTime)).createNewFile();
|
||||||
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeInflightCommitFileName(
|
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeInflightCommitFileName(
|
||||||
commitTime)).createNewFile();
|
instantTime)).createNewFile();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void createPendingCleanFiles(HoodieTableMetaClient metaClient, String... commitTimes) {
|
public static void createPendingCleanFiles(HoodieTableMetaClient metaClient, String... instantTimes) {
|
||||||
for (String commitTime : commitTimes) {
|
for (String instantTime : instantTimes) {
|
||||||
Arrays.asList(HoodieTimeline.makeRequestedCleanerFileName(commitTime),
|
Arrays.asList(HoodieTimeline.makeRequestedCleanerFileName(instantTime),
|
||||||
HoodieTimeline.makeInflightCleanerFileName(commitTime)).forEach(f -> {
|
HoodieTimeline.makeInflightCleanerFileName(instantTime)).forEach(f -> {
|
||||||
FSDataOutputStream os = null;
|
FSDataOutputStream os = null;
|
||||||
try {
|
try {
|
||||||
Path commitFile = new Path(
|
Path commitFile = new Path(
|
||||||
@@ -179,59 +179,59 @@ public class HoodieTestUtils {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String createNewDataFile(String basePath, String partitionPath, String commitTime)
|
public static String createNewDataFile(String basePath, String partitionPath, String instantTime)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
String fileID = UUID.randomUUID().toString();
|
String fileID = UUID.randomUUID().toString();
|
||||||
return createDataFile(basePath, partitionPath, commitTime, fileID);
|
return createDataFile(basePath, partitionPath, instantTime, fileID);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String createNewMarkerFile(String basePath, String partitionPath, String commitTime)
|
public static String createNewMarkerFile(String basePath, String partitionPath, String instantTime)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
String fileID = UUID.randomUUID().toString();
|
String fileID = UUID.randomUUID().toString();
|
||||||
return createMarkerFile(basePath, partitionPath, commitTime, fileID);
|
return createMarkerFile(basePath, partitionPath, instantTime, fileID);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String createDataFile(String basePath, String partitionPath, String commitTime, String fileID)
|
public static String createDataFile(String basePath, String partitionPath, String instantTime, String fileID)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
String folderPath = basePath + "/" + partitionPath + "/";
|
String folderPath = basePath + "/" + partitionPath + "/";
|
||||||
new File(folderPath).mkdirs();
|
new File(folderPath).mkdirs();
|
||||||
new File(folderPath + FSUtils.makeDataFileName(commitTime, DEFAULT_WRITE_TOKEN, fileID)).createNewFile();
|
new File(folderPath + FSUtils.makeDataFileName(instantTime, DEFAULT_WRITE_TOKEN, fileID)).createNewFile();
|
||||||
return fileID;
|
return fileID;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String createMarkerFile(String basePath, String partitionPath, String commitTime, String fileID)
|
public static String createMarkerFile(String basePath, String partitionPath, String instantTime, String fileID)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
String folderPath =
|
String folderPath =
|
||||||
basePath + "/" + HoodieTableMetaClient.TEMPFOLDER_NAME + "/" + commitTime + "/" + partitionPath + "/";
|
basePath + "/" + HoodieTableMetaClient.TEMPFOLDER_NAME + "/" + instantTime + "/" + partitionPath + "/";
|
||||||
new File(folderPath).mkdirs();
|
new File(folderPath).mkdirs();
|
||||||
File f = new File(folderPath + FSUtils.makeMarkerFile(commitTime, DEFAULT_WRITE_TOKEN, fileID));
|
File f = new File(folderPath + FSUtils.makeMarkerFile(instantTime, DEFAULT_WRITE_TOKEN, fileID));
|
||||||
f.createNewFile();
|
f.createNewFile();
|
||||||
return f.getAbsolutePath();
|
return f.getAbsolutePath();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String createNewLogFile(FileSystem fs, String basePath, String partitionPath, String commitTime,
|
public static String createNewLogFile(FileSystem fs, String basePath, String partitionPath, String instantTime,
|
||||||
String fileID, Option<Integer> version) throws IOException {
|
String fileID, Option<Integer> version) throws IOException {
|
||||||
String folderPath = basePath + "/" + partitionPath + "/";
|
String folderPath = basePath + "/" + partitionPath + "/";
|
||||||
boolean makeDir = fs.mkdirs(new Path(folderPath));
|
boolean makeDir = fs.mkdirs(new Path(folderPath));
|
||||||
if (!makeDir) {
|
if (!makeDir) {
|
||||||
throw new IOException("cannot create directory for path " + folderPath);
|
throw new IOException("cannot create directory for path " + folderPath);
|
||||||
}
|
}
|
||||||
boolean createFile = fs.createNewFile(new Path(folderPath + FSUtils.makeLogFileName(fileID, ".log", commitTime,
|
boolean createFile = fs.createNewFile(new Path(folderPath + FSUtils.makeLogFileName(fileID, ".log", instantTime,
|
||||||
version.orElse(DEFAULT_LOG_VERSION), HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
|
version.orElse(DEFAULT_LOG_VERSION), HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
|
||||||
if (!createFile) {
|
if (!createFile) {
|
||||||
throw new IOException(
|
throw new IOException(
|
||||||
StringUtils.format("cannot create data file for commit %s and fileId %s", commitTime, fileID));
|
StringUtils.format("cannot create data file for commit %s and fileId %s", instantTime, fileID));
|
||||||
}
|
}
|
||||||
return fileID;
|
return fileID;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void createCompactionCommitFiles(FileSystem fs, String basePath, String... commitTimes)
|
public static void createCompactionCommitFiles(FileSystem fs, String basePath, String... instantTimes)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
for (String commitTime : commitTimes) {
|
for (String instantTime : instantTimes) {
|
||||||
boolean createFile = fs.createNewFile(new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
boolean createFile = fs.createNewFile(new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
||||||
+ HoodieTimeline.makeCommitFileName(commitTime)));
|
+ HoodieTimeline.makeCommitFileName(instantTime)));
|
||||||
if (!createFile) {
|
if (!createFile) {
|
||||||
throw new IOException("cannot create commit file for commit " + commitTime);
|
throw new IOException("cannot create commit file for commit " + instantTime);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -244,67 +244,67 @@ public class HoodieTestUtils {
|
|||||||
AvroUtils.serializeCompactionPlan(plan));
|
AvroUtils.serializeCompactionPlan(plan));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getDataFilePath(String basePath, String partitionPath, String commitTime, String fileID) {
|
public static String getDataFilePath(String basePath, String partitionPath, String instantTime, String fileID) {
|
||||||
return basePath + "/" + partitionPath + "/" + FSUtils.makeDataFileName(commitTime, DEFAULT_WRITE_TOKEN, fileID);
|
return basePath + "/" + partitionPath + "/" + FSUtils.makeDataFileName(instantTime, DEFAULT_WRITE_TOKEN, fileID);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getLogFilePath(String basePath, String partitionPath, String commitTime, String fileID,
|
public static String getLogFilePath(String basePath, String partitionPath, String instantTime, String fileID,
|
||||||
Option<Integer> version) {
|
Option<Integer> version) {
|
||||||
return basePath + "/" + partitionPath + "/" + FSUtils.makeLogFileName(fileID, ".log", commitTime,
|
return basePath + "/" + partitionPath + "/" + FSUtils.makeLogFileName(fileID, ".log", instantTime,
|
||||||
version.orElse(DEFAULT_LOG_VERSION), HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
|
version.orElse(DEFAULT_LOG_VERSION), HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getCommitFilePath(String basePath, String commitTime) {
|
public static String getCommitFilePath(String basePath, String instantTime) {
|
||||||
return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + HoodieTimeline.COMMIT_EXTENSION;
|
return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + instantTime + HoodieTimeline.COMMIT_EXTENSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getInflightCommitFilePath(String basePath, String commitTime) {
|
public static String getInflightCommitFilePath(String basePath, String instantTime) {
|
||||||
return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime
|
return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + instantTime
|
||||||
+ HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
|
+ HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getRequestedCompactionFilePath(String basePath, String commitTime) {
|
public static String getRequestedCompactionFilePath(String basePath, String instantTime) {
|
||||||
return basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + commitTime
|
return basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + instantTime
|
||||||
+ HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
|
+ HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean doesDataFileExist(String basePath, String partitionPath, String commitTime,
|
public static boolean doesDataFileExist(String basePath, String partitionPath, String instantTime,
|
||||||
String fileID) {
|
String fileID) {
|
||||||
return new File(getDataFilePath(basePath, partitionPath, commitTime, fileID)).exists();
|
return new File(getDataFilePath(basePath, partitionPath, instantTime, fileID)).exists();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean doesLogFileExist(String basePath, String partitionPath, String commitTime, String fileID,
|
public static boolean doesLogFileExist(String basePath, String partitionPath, String instantTime, String fileID,
|
||||||
Option<Integer> version) {
|
Option<Integer> version) {
|
||||||
return new File(getLogFilePath(basePath, partitionPath, commitTime, fileID, version)).exists();
|
return new File(getLogFilePath(basePath, partitionPath, instantTime, fileID, version)).exists();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean doesCommitExist(String basePath, String commitTime) {
|
public static boolean doesCommitExist(String basePath, String instantTime) {
|
||||||
return new File(
|
return new File(
|
||||||
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + HoodieTimeline.COMMIT_EXTENSION)
|
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + instantTime + HoodieTimeline.COMMIT_EXTENSION)
|
||||||
.exists();
|
.exists();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean doesInflightExist(String basePath, String commitTime) {
|
public static boolean doesInflightExist(String basePath, String instantTime) {
|
||||||
return new File(
|
return new File(
|
||||||
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + HoodieTimeline.INFLIGHT_EXTENSION)
|
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + instantTime + HoodieTimeline.INFLIGHT_EXTENSION)
|
||||||
.exists();
|
.exists();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void createCleanFiles(HoodieTableMetaClient metaClient, String basePath,
|
public static void createCleanFiles(HoodieTableMetaClient metaClient, String basePath,
|
||||||
String commitTime, Configuration configuration)
|
String instantTime, Configuration configuration)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
createPendingCleanFiles(metaClient, commitTime);
|
createPendingCleanFiles(metaClient, instantTime);
|
||||||
Path commitFile = new Path(
|
Path commitFile = new Path(
|
||||||
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCleanerFileName(commitTime));
|
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCleanerFileName(instantTime));
|
||||||
FileSystem fs = FSUtils.getFs(basePath, configuration);
|
FileSystem fs = FSUtils.getFs(basePath, configuration);
|
||||||
try (FSDataOutputStream os = fs.create(commitFile, true)) {
|
try (FSDataOutputStream os = fs.create(commitFile, true)) {
|
||||||
HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS,
|
HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS,
|
||||||
DEFAULT_PARTITION_PATHS[rand.nextInt(DEFAULT_PARTITION_PATHS.length)], new ArrayList<>(), new ArrayList<>(),
|
DEFAULT_PARTITION_PATHS[rand.nextInt(DEFAULT_PARTITION_PATHS.length)], new ArrayList<>(), new ArrayList<>(),
|
||||||
new ArrayList<>(), commitTime);
|
new ArrayList<>(), instantTime);
|
||||||
// Create the clean metadata
|
// Create the clean metadata
|
||||||
|
|
||||||
HoodieCleanMetadata cleanMetadata =
|
HoodieCleanMetadata cleanMetadata =
|
||||||
CleanerUtils.convertCleanMetadata(metaClient, commitTime, Option.of(0L), Collections.singletonList(cleanStats));
|
CleanerUtils.convertCleanMetadata(metaClient, instantTime, Option.of(0L), Collections.singletonList(cleanStats));
|
||||||
// Write empty clean metadata
|
// Write empty clean metadata
|
||||||
os.write(AvroUtils.serializeCleanMetadata(cleanMetadata).get());
|
os.write(AvroUtils.serializeCleanMetadata(cleanMetadata).get());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ public class TestHoodieWriteStat {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSetPaths() {
|
public void testSetPaths() {
|
||||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
String instantTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||||
String basePathString = "/data/tables/some-hoodie-table";
|
String basePathString = "/data/tables/some-hoodie-table";
|
||||||
String partitionPathString = "2017/12/31";
|
String partitionPathString = "2017/12/31";
|
||||||
String fileName = UUID.randomUUID().toString();
|
String fileName = UUID.randomUUID().toString();
|
||||||
@@ -46,7 +46,7 @@ public class TestHoodieWriteStat {
|
|||||||
Path basePath = new Path(basePathString);
|
Path basePath = new Path(basePathString);
|
||||||
Path partitionPath = new Path(basePath, partitionPathString);
|
Path partitionPath = new Path(basePath, partitionPathString);
|
||||||
|
|
||||||
Path finalizeFilePath = new Path(partitionPath, FSUtils.makeDataFileName(commitTime, writeToken, fileName));
|
Path finalizeFilePath = new Path(partitionPath, FSUtils.makeDataFileName(instantTime, writeToken, fileName));
|
||||||
HoodieWriteStat writeStat = new HoodieWriteStat();
|
HoodieWriteStat writeStat = new HoodieWriteStat();
|
||||||
writeStat.setPath(basePath, finalizeFilePath);
|
writeStat.setPath(basePath, finalizeFilePath);
|
||||||
assertEquals(finalizeFilePath, new Path(basePath, writeStat.getPath()));
|
assertEquals(finalizeFilePath, new Path(basePath, writeStat.getPath()));
|
||||||
|
|||||||
@@ -407,7 +407,7 @@ public class TestHoodieActiveTimeline extends HoodieCommonTestHarness {
|
|||||||
private List<HoodieInstant> getAllInstants() {
|
private List<HoodieInstant> getAllInstants() {
|
||||||
timeline = new HoodieActiveTimeline(metaClient);
|
timeline = new HoodieActiveTimeline(metaClient);
|
||||||
List<HoodieInstant> allInstants = new ArrayList<>();
|
List<HoodieInstant> allInstants = new ArrayList<>();
|
||||||
long commitTime = 1;
|
long instantTime = 1;
|
||||||
for (State state : State.values()) {
|
for (State state : State.values()) {
|
||||||
if (state == State.INVALID) {
|
if (state == State.INVALID) {
|
||||||
continue;
|
continue;
|
||||||
@@ -432,7 +432,7 @@ public class TestHoodieActiveTimeline extends HoodieCommonTestHarness {
|
|||||||
action = HoodieTimeline.COMMIT_ACTION;
|
action = HoodieTimeline.COMMIT_ACTION;
|
||||||
}
|
}
|
||||||
|
|
||||||
allInstants.add(new HoodieInstant(state, action, String.format("%03d", commitTime++)));
|
allInstants.add(new HoodieInstant(state, action, String.format("%03d", instantTime++)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return allInstants;
|
return allInstants;
|
||||||
|
|||||||
@@ -100,12 +100,12 @@ public class SchemaTestUtil {
|
|||||||
public static List<IndexedRecord> generateHoodieTestRecords(int from, int limit)
|
public static List<IndexedRecord> generateHoodieTestRecords(int from, int limit)
|
||||||
throws IOException, URISyntaxException {
|
throws IOException, URISyntaxException {
|
||||||
List<IndexedRecord> records = generateTestRecords(from, limit);
|
List<IndexedRecord> records = generateTestRecords(from, limit);
|
||||||
String commitTime = HoodieActiveTimeline.createNewInstantTime();
|
String instantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||||
Schema hoodieFieldsSchema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
|
Schema hoodieFieldsSchema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
|
||||||
return records.stream().map(s -> HoodieAvroUtils.rewriteRecord((GenericRecord) s, hoodieFieldsSchema)).map(p -> {
|
return records.stream().map(s -> HoodieAvroUtils.rewriteRecord((GenericRecord) s, hoodieFieldsSchema)).map(p -> {
|
||||||
p.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, UUID.randomUUID().toString());
|
p.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, UUID.randomUUID().toString());
|
||||||
p.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, "0000/00/00");
|
p.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, "0000/00/00");
|
||||||
p.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
|
p.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, instantTime);
|
||||||
return p;
|
return p;
|
||||||
}).collect(Collectors.toList());
|
}).collect(Collectors.toList());
|
||||||
|
|
||||||
@@ -124,12 +124,12 @@ public class SchemaTestUtil {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static List<IndexedRecord> updateHoodieTestRecords(List<String> oldRecordKeys, List<IndexedRecord> newRecords,
|
public static List<IndexedRecord> updateHoodieTestRecords(List<String> oldRecordKeys, List<IndexedRecord> newRecords,
|
||||||
String commitTime) {
|
String instantTime) {
|
||||||
|
|
||||||
return newRecords.stream().map(p -> {
|
return newRecords.stream().map(p -> {
|
||||||
((GenericRecord) p).put(HoodieRecord.RECORD_KEY_METADATA_FIELD, oldRecordKeys.remove(0));
|
((GenericRecord) p).put(HoodieRecord.RECORD_KEY_METADATA_FIELD, oldRecordKeys.remove(0));
|
||||||
((GenericRecord) p).put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, "0000/00/00");
|
((GenericRecord) p).put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, "0000/00/00");
|
||||||
((GenericRecord) p).put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
|
((GenericRecord) p).put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, instantTime);
|
||||||
return p;
|
return p;
|
||||||
}).collect(Collectors.toList());
|
}).collect(Collectors.toList());
|
||||||
|
|
||||||
@@ -173,9 +173,9 @@ public class SchemaTestUtil {
|
|||||||
return new Schema.Parser().parse(SchemaTestUtil.class.getResourceAsStream("/timestamp-test-evolved.avsc"));
|
return new Schema.Parser().parse(SchemaTestUtil.class.getResourceAsStream("/timestamp-test-evolved.avsc"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static GenericRecord generateAvroRecordFromJson(Schema schema, int recordNumber, String commitTime,
|
public static GenericRecord generateAvroRecordFromJson(Schema schema, int recordNumber, String instantTime,
|
||||||
String fileId) throws IOException {
|
String fileId) throws IOException {
|
||||||
TestRecord record = new TestRecord(commitTime, recordNumber, fileId);
|
TestRecord record = new TestRecord(instantTime, recordNumber, fileId);
|
||||||
MercifulJsonConverter converter = new MercifulJsonConverter();
|
MercifulJsonConverter converter = new MercifulJsonConverter();
|
||||||
return converter.convert(record.toJsonString(), schema);
|
return converter.convert(record.toJsonString(), schema);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -68,16 +68,16 @@ public class TestFSUtils extends HoodieCommonTestHarness {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMakeDataFileName() {
|
public void testMakeDataFileName() {
|
||||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
String instantTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||||
String fileName = UUID.randomUUID().toString();
|
String fileName = UUID.randomUUID().toString();
|
||||||
assertEquals(FSUtils.makeDataFileName(commitTime, TEST_WRITE_TOKEN, fileName), fileName + "_" + TEST_WRITE_TOKEN + "_" + commitTime + ".parquet");
|
assertEquals(FSUtils.makeDataFileName(instantTime, TEST_WRITE_TOKEN, fileName), fileName + "_" + TEST_WRITE_TOKEN + "_" + instantTime + ".parquet");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMaskFileName() {
|
public void testMaskFileName() {
|
||||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
String instantTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||||
int taskPartitionId = 2;
|
int taskPartitionId = 2;
|
||||||
assertEquals(FSUtils.maskWithoutFileId(commitTime, taskPartitionId), "*_" + taskPartitionId + "_" + commitTime + ".parquet");
|
assertEquals(FSUtils.maskWithoutFileId(instantTime, taskPartitionId), "*_" + taskPartitionId + "_" + instantTime + ".parquet");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -140,17 +140,17 @@ public class TestFSUtils extends HoodieCommonTestHarness {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetCommitTime() {
|
public void testGetCommitTime() {
|
||||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
String instantTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||||
String fileName = UUID.randomUUID().toString();
|
String fileName = UUID.randomUUID().toString();
|
||||||
String fullFileName = FSUtils.makeDataFileName(commitTime, TEST_WRITE_TOKEN, fileName);
|
String fullFileName = FSUtils.makeDataFileName(instantTime, TEST_WRITE_TOKEN, fileName);
|
||||||
assertEquals(FSUtils.getCommitTime(fullFileName), commitTime);
|
assertEquals(FSUtils.getCommitTime(fullFileName), instantTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetFileNameWithoutMeta() {
|
public void testGetFileNameWithoutMeta() {
|
||||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
String instantTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||||
String fileName = UUID.randomUUID().toString();
|
String fileName = UUID.randomUUID().toString();
|
||||||
String fullFileName = FSUtils.makeDataFileName(commitTime, TEST_WRITE_TOKEN, fileName);
|
String fullFileName = FSUtils.makeDataFileName(instantTime, TEST_WRITE_TOKEN, fileName);
|
||||||
assertEquals(FSUtils.getFileId(fullFileName), fileName);
|
assertEquals(FSUtils.getFileId(fullFileName), fileName);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -271,15 +271,15 @@ public class TestFSUtils extends HoodieCommonTestHarness {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDeleteOlderRollbackFiles() throws Exception {
|
public void testDeleteOlderRollbackFiles() throws Exception {
|
||||||
String[] commitTimes = new String[]{"20160501010101", "20160501020101", "20160501030101", "20160501040101",
|
String[] instantTimes = new String[]{"20160501010101", "20160501020101", "20160501030101", "20160501040101",
|
||||||
"20160502020601", "20160502030601", "20160502040601", "20160502050601", "20160506030611",
|
"20160502020601", "20160502030601", "20160502040601", "20160502050601", "20160506030611",
|
||||||
"20160506040611", "20160506050611", "20160506060611"};
|
"20160506040611", "20160506050611", "20160506060611"};
|
||||||
List<HoodieInstant> hoodieInstants = new ArrayList<>();
|
List<HoodieInstant> hoodieInstants = new ArrayList<>();
|
||||||
// create rollback files
|
// create rollback files
|
||||||
for (String commitTime : commitTimes) {
|
for (String instantTime : instantTimes) {
|
||||||
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
||||||
+ commitTime + HoodieTimeline.ROLLBACK_EXTENSION).createNewFile();
|
+ instantTime + HoodieTimeline.ROLLBACK_EXTENSION).createNewFile();
|
||||||
hoodieInstants.add(new HoodieInstant(false, HoodieTimeline.ROLLBACK_ACTION, commitTime));
|
hoodieInstants.add(new HoodieInstant(false, HoodieTimeline.ROLLBACK_ACTION, instantTime));
|
||||||
}
|
}
|
||||||
|
|
||||||
FSUtils.deleteOlderRollbackMetaFiles(FSUtils.getFs(basePath, new Configuration()),
|
FSUtils.deleteOlderRollbackMetaFiles(FSUtils.getFs(basePath, new Configuration()),
|
||||||
@@ -295,15 +295,15 @@ public class TestFSUtils extends HoodieCommonTestHarness {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDeleteOlderCleanMetaFiles() throws Exception {
|
public void testDeleteOlderCleanMetaFiles() throws Exception {
|
||||||
String[] commitTimes = new String[]{"20160501010101", "20160501020101", "20160501030101", "20160501040101",
|
String[] instantTimes = new String[]{"20160501010101", "20160501020101", "20160501030101", "20160501040101",
|
||||||
"20160502020601", "20160502030601", "20160502040601", "20160502050601", "20160506030611",
|
"20160502020601", "20160502030601", "20160502040601", "20160502050601", "20160506030611",
|
||||||
"20160506040611", "20160506050611", "20160506060611"};
|
"20160506040611", "20160506050611", "20160506060611"};
|
||||||
List<HoodieInstant> hoodieInstants = new ArrayList<>();
|
List<HoodieInstant> hoodieInstants = new ArrayList<>();
|
||||||
// create rollback files
|
// create rollback files
|
||||||
for (String commitTime : commitTimes) {
|
for (String instantTime : instantTimes) {
|
||||||
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
||||||
+ commitTime + HoodieTimeline.CLEAN_EXTENSION).createNewFile();
|
+ instantTime + HoodieTimeline.CLEAN_EXTENSION).createNewFile();
|
||||||
hoodieInstants.add(new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, commitTime));
|
hoodieInstants.add(new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, instantTime));
|
||||||
}
|
}
|
||||||
FSUtils.deleteOlderCleanMetaFiles(FSUtils.getFs(basePath, new Configuration()),
|
FSUtils.deleteOlderCleanMetaFiles(FSUtils.getFs(basePath, new Configuration()),
|
||||||
basePath + "/.hoodie", hoodieInstants.stream());
|
basePath + "/.hoodie", hoodieInstants.stream());
|
||||||
@@ -318,7 +318,7 @@ public class TestFSUtils extends HoodieCommonTestHarness {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFileNameRelatedFunctions() throws Exception {
|
public void testFileNameRelatedFunctions() throws Exception {
|
||||||
String commitTime = "20160501010101";
|
String instantTime = "20160501010101";
|
||||||
String partitionStr = "2016/05/01";
|
String partitionStr = "2016/05/01";
|
||||||
int taskPartitionId = 456;
|
int taskPartitionId = 456;
|
||||||
String writeToken = "456";
|
String writeToken = "456";
|
||||||
@@ -328,13 +328,13 @@ public class TestFSUtils extends HoodieCommonTestHarness {
|
|||||||
final String LOG_EXTENTION = "." + LOG_STR;
|
final String LOG_EXTENTION = "." + LOG_STR;
|
||||||
|
|
||||||
// data file name
|
// data file name
|
||||||
String dataFileName = FSUtils.makeDataFileName(commitTime, writeToken, fileId);
|
String dataFileName = FSUtils.makeDataFileName(instantTime, writeToken, fileId);
|
||||||
assertTrue(commitTime.equals(FSUtils.getCommitTime(dataFileName)));
|
assertTrue(instantTime.equals(FSUtils.getCommitTime(dataFileName)));
|
||||||
assertTrue(fileId.equals(FSUtils.getFileId(dataFileName)));
|
assertTrue(fileId.equals(FSUtils.getFileId(dataFileName)));
|
||||||
|
|
||||||
String logFileName = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, commitTime, version, writeToken);
|
String logFileName = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, instantTime, version, writeToken);
|
||||||
assertTrue(FSUtils.isLogFile(new Path(logFileName)));
|
assertTrue(FSUtils.isLogFile(new Path(logFileName)));
|
||||||
assertTrue(commitTime.equals(FSUtils.getBaseCommitTimeFromLogPath(new Path(logFileName))));
|
assertTrue(instantTime.equals(FSUtils.getBaseCommitTimeFromLogPath(new Path(logFileName))));
|
||||||
assertTrue(fileId.equals(FSUtils.getFileIdFromLogPath(new Path(logFileName))));
|
assertTrue(fileId.equals(FSUtils.getFileIdFromLogPath(new Path(logFileName))));
|
||||||
assertTrue(version == FSUtils.getFileVersionFromLog(new Path(logFileName)));
|
assertTrue(version == FSUtils.getFileVersionFromLog(new Path(logFileName)));
|
||||||
assertTrue(LOG_STR.equals(FSUtils.getFileExtensionFromLog(new Path(logFileName))));
|
assertTrue(LOG_STR.equals(FSUtils.getFileExtensionFromLog(new Path(logFileName))));
|
||||||
@@ -342,16 +342,16 @@ public class TestFSUtils extends HoodieCommonTestHarness {
|
|||||||
// create three versions of log file
|
// create three versions of log file
|
||||||
String partitionPath = basePath + "/" + partitionStr;
|
String partitionPath = basePath + "/" + partitionStr;
|
||||||
new File(partitionPath).mkdirs();
|
new File(partitionPath).mkdirs();
|
||||||
String log1 = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, commitTime, 1, writeToken);
|
String log1 = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, instantTime, 1, writeToken);
|
||||||
new File(partitionPath + "/" + log1).createNewFile();
|
new File(partitionPath + "/" + log1).createNewFile();
|
||||||
String log2 = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, commitTime, 2, writeToken);
|
String log2 = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, instantTime, 2, writeToken);
|
||||||
new File(partitionPath + "/" + log2).createNewFile();
|
new File(partitionPath + "/" + log2).createNewFile();
|
||||||
String log3 = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, commitTime, 3, writeToken);
|
String log3 = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, instantTime, 3, writeToken);
|
||||||
new File(partitionPath + "/" + log3).createNewFile();
|
new File(partitionPath + "/" + log3).createNewFile();
|
||||||
|
|
||||||
assertTrue(3 == FSUtils.getLatestLogVersion(FSUtils.getFs(basePath, new Configuration()),
|
assertTrue(3 == FSUtils.getLatestLogVersion(FSUtils.getFs(basePath, new Configuration()),
|
||||||
new Path(partitionPath), fileId, LOG_EXTENTION, commitTime).get().getLeft());
|
new Path(partitionPath), fileId, LOG_EXTENTION, instantTime).get().getLeft());
|
||||||
assertTrue(4 == FSUtils.computeNextLogVersion(FSUtils.getFs(basePath, new Configuration()),
|
assertTrue(4 == FSUtils.computeNextLogVersion(FSUtils.getFs(basePath, new Configuration()),
|
||||||
new Path(partitionPath), fileId, LOG_EXTENTION, commitTime));
|
new Path(partitionPath), fileId, LOG_EXTENTION, instantTime));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -74,15 +74,15 @@ public class TestRecord implements Serializable {
|
|||||||
private TestNestedRecord testNestedRecord;
|
private TestNestedRecord testNestedRecord;
|
||||||
private String[] stringArray;
|
private String[] stringArray;
|
||||||
|
|
||||||
public TestRecord(String commitTime, int recordNumber, String fileId) {
|
public TestRecord(String instantTime, int recordNumber, String fileId) {
|
||||||
this._hoodie_commit_time = commitTime;
|
this._hoodie_commit_time = instantTime;
|
||||||
this._hoodie_record_key = "key" + recordNumber;
|
this._hoodie_record_key = "key" + recordNumber;
|
||||||
this._hoodie_partition_path = commitTime;
|
this._hoodie_partition_path = instantTime;
|
||||||
this._hoodie_file_name = fileId;
|
this._hoodie_file_name = fileId;
|
||||||
this._hoodie_commit_seqno = commitTime + recordNumber;
|
this._hoodie_commit_seqno = instantTime + recordNumber;
|
||||||
|
|
||||||
String commitTimeSuffix = "@" + commitTime;
|
String commitTimeSuffix = "@" + instantTime;
|
||||||
int commitHashCode = commitTime.hashCode();
|
int commitHashCode = instantTime.hashCode();
|
||||||
|
|
||||||
this.field1 = "field" + recordNumber;
|
this.field1 = "field" + recordNumber;
|
||||||
this.field2 = "field" + recordNumber + commitTimeSuffix;
|
this.field2 = "field" + recordNumber + commitTimeSuffix;
|
||||||
|
|||||||
@@ -215,7 +215,7 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
|
|||||||
String newCommitTime = HoodieActiveTimeline.createNewInstantTime();
|
String newCommitTime = HoodieActiveTimeline.createNewInstantTime();
|
||||||
List<String> keysToBeUpdated = new ArrayList<>();
|
List<String> keysToBeUpdated = new ArrayList<>();
|
||||||
keysToBeUpdated.add(key);
|
keysToBeUpdated.add(key);
|
||||||
// Update the commitTime for this record
|
// Update the instantTime for this record
|
||||||
List<IndexedRecord> updatedRecords =
|
List<IndexedRecord> updatedRecords =
|
||||||
SchemaTestUtil.updateHoodieTestRecords(keysToBeUpdated, recordsToUpdate, newCommitTime);
|
SchemaTestUtil.updateHoodieTestRecords(keysToBeUpdated, recordsToUpdate, newCommitTime);
|
||||||
// Upsert this updated record
|
// Upsert this updated record
|
||||||
@@ -238,7 +238,7 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
|
|||||||
// Upsert this updated record
|
// Upsert this updated record
|
||||||
SpillableMapTestUtils.upsertRecords(updatedRecords, records);
|
SpillableMapTestUtils.upsertRecords(updatedRecords, records);
|
||||||
gRecord = (GenericRecord) records.get(key).getData().getInsertValue(schema).get();
|
gRecord = (GenericRecord) records.get(key).getData().getInsertValue(schema).get();
|
||||||
// The record returned for this key should have the updated commitTime
|
// The record returned for this key should have the updated instantTime
|
||||||
assert newCommitTime.contentEquals(gRecord.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
|
assert newCommitTime.contentEquals(gRecord.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -170,10 +170,10 @@ public class InputFormatTestUtil {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static Iterable<? extends GenericRecord> generateAvroRecords(Schema schema, int numberOfRecords,
|
private static Iterable<? extends GenericRecord> generateAvroRecords(Schema schema, int numberOfRecords,
|
||||||
String commitTime, String fileId) throws IOException {
|
String instantTime, String fileId) throws IOException {
|
||||||
List<GenericRecord> records = new ArrayList<>(numberOfRecords);
|
List<GenericRecord> records = new ArrayList<>(numberOfRecords);
|
||||||
for (int i = 0; i < numberOfRecords; i++) {
|
for (int i = 0; i < numberOfRecords; i++) {
|
||||||
records.add(SchemaTestUtil.generateAvroRecordFromJson(schema, i, commitTime, fileId));
|
records.add(SchemaTestUtil.generateAvroRecordFromJson(schema, i, instantTime, fileId));
|
||||||
}
|
}
|
||||||
return records;
|
return records;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -265,19 +265,19 @@ public class TestHoodieRealtimeRecordReader {
|
|||||||
// initial commit
|
// initial commit
|
||||||
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema());
|
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema());
|
||||||
HoodieTestUtils.init(hadoopConf, basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ);
|
HoodieTestUtils.init(hadoopConf, basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ);
|
||||||
String commitTime = "100";
|
String instantTime = "100";
|
||||||
final int numRecords = 1000;
|
final int numRecords = 1000;
|
||||||
final int firstBatchLastRecordKey = numRecords - 1;
|
final int firstBatchLastRecordKey = numRecords - 1;
|
||||||
final int secondBatchLastRecordKey = 2 * numRecords - 1;
|
final int secondBatchLastRecordKey = 2 * numRecords - 1;
|
||||||
File partitionDir = InputFormatTestUtil.prepareParquetTable(basePath, schema, 1, numRecords, commitTime);
|
File partitionDir = InputFormatTestUtil.prepareParquetTable(basePath, schema, 1, numRecords, instantTime);
|
||||||
InputFormatTestUtil.commit(basePath, commitTime);
|
InputFormatTestUtil.commit(basePath, instantTime);
|
||||||
// Add the paths
|
// Add the paths
|
||||||
FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
|
FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
|
||||||
|
|
||||||
// insert new records to log file
|
// insert new records to log file
|
||||||
String newCommitTime = "101";
|
String newCommitTime = "101";
|
||||||
HoodieLogFormat.Writer writer =
|
HoodieLogFormat.Writer writer =
|
||||||
writeDataBlockToLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, numRecords, numRecords, 0);
|
writeDataBlockToLogFile(partitionDir, schema, "fileid0", instantTime, newCommitTime, numRecords, numRecords, 0);
|
||||||
long size = writer.getCurrentSize();
|
long size = writer.getCurrentSize();
|
||||||
writer.close();
|
writer.close();
|
||||||
assertTrue("block - size should be > 0", size > 0);
|
assertTrue("block - size should be > 0", size > 0);
|
||||||
@@ -285,7 +285,7 @@ public class TestHoodieRealtimeRecordReader {
|
|||||||
// create a split with baseFile (parquet file written earlier) and new log file(s)
|
// create a split with baseFile (parquet file written earlier) and new log file(s)
|
||||||
String logFilePath = writer.getLogFile().getPath().toString();
|
String logFilePath = writer.getLogFile().getPath().toString();
|
||||||
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
||||||
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf),
|
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + instantTime + ".parquet"), 0, 1, jobConf),
|
||||||
basePath.getRoot().getPath(), Collections.singletonList(logFilePath), newCommitTime);
|
basePath.getRoot().getPath(), Collections.singletonList(logFilePath), newCommitTime);
|
||||||
|
|
||||||
// create a RecordReader to be used by HoodieRealtimeRecordReader
|
// create a RecordReader to be used by HoodieRealtimeRecordReader
|
||||||
@@ -340,18 +340,18 @@ public class TestHoodieRealtimeRecordReader {
|
|||||||
// initial commit
|
// initial commit
|
||||||
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getComplexEvolvedSchema());
|
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getComplexEvolvedSchema());
|
||||||
HoodieTestUtils.init(hadoopConf, basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ);
|
HoodieTestUtils.init(hadoopConf, basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ);
|
||||||
String commitTime = "100";
|
String instantTime = "100";
|
||||||
int numberOfRecords = 100;
|
int numberOfRecords = 100;
|
||||||
int numberOfLogRecords = numberOfRecords / 2;
|
int numberOfLogRecords = numberOfRecords / 2;
|
||||||
File partitionDir = InputFormatTestUtil.prepareParquetTable(basePath, schema, 1, numberOfRecords, commitTime);
|
File partitionDir = InputFormatTestUtil.prepareParquetTable(basePath, schema, 1, numberOfRecords, instantTime);
|
||||||
InputFormatTestUtil.commit(basePath, commitTime);
|
InputFormatTestUtil.commit(basePath, instantTime);
|
||||||
// Add the paths
|
// Add the paths
|
||||||
FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
|
FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
|
||||||
|
|
||||||
// update files or generate new log file
|
// update files or generate new log file
|
||||||
String newCommitTime = "101";
|
String newCommitTime = "101";
|
||||||
HoodieLogFormat.Writer writer =
|
HoodieLogFormat.Writer writer =
|
||||||
writeLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, numberOfLogRecords);
|
writeLogFile(partitionDir, schema, "fileid0", instantTime, newCommitTime, numberOfLogRecords);
|
||||||
long size = writer.getCurrentSize();
|
long size = writer.getCurrentSize();
|
||||||
writer.close();
|
writer.close();
|
||||||
assertTrue("block - size should be > 0", size > 0);
|
assertTrue("block - size should be > 0", size > 0);
|
||||||
@@ -360,7 +360,7 @@ public class TestHoodieRealtimeRecordReader {
|
|||||||
// create a split with baseFile (parquet file written earlier) and new log file(s)
|
// create a split with baseFile (parquet file written earlier) and new log file(s)
|
||||||
String logFilePath = writer.getLogFile().getPath().toString();
|
String logFilePath = writer.getLogFile().getPath().toString();
|
||||||
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
||||||
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf),
|
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + instantTime + ".parquet"), 0, 1, jobConf),
|
||||||
basePath.getRoot().getPath(), Collections.singletonList(logFilePath), newCommitTime);
|
basePath.getRoot().getPath(), Collections.singletonList(logFilePath), newCommitTime);
|
||||||
|
|
||||||
// create a RecordReader to be used by HoodieRealtimeRecordReader
|
// create a RecordReader to be used by HoodieRealtimeRecordReader
|
||||||
@@ -385,7 +385,7 @@ public class TestHoodieRealtimeRecordReader {
|
|||||||
String recordCommitTime;
|
String recordCommitTime;
|
||||||
// check if the record written is with latest commit, here "101"
|
// check if the record written is with latest commit, here "101"
|
||||||
if (numRecordsRead > numberOfLogRecords) {
|
if (numRecordsRead > numberOfLogRecords) {
|
||||||
recordCommitTime = commitTime;
|
recordCommitTime = instantTime;
|
||||||
} else {
|
} else {
|
||||||
recordCommitTime = newCommitTime;
|
recordCommitTime = newCommitTime;
|
||||||
}
|
}
|
||||||
@@ -466,12 +466,12 @@ public class TestHoodieRealtimeRecordReader {
|
|||||||
List<String> logFilePaths = new ArrayList<>();
|
List<String> logFilePaths = new ArrayList<>();
|
||||||
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
|
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
|
||||||
HoodieTestUtils.init(hadoopConf, basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ);
|
HoodieTestUtils.init(hadoopConf, basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ);
|
||||||
String commitTime = "100";
|
String instantTime = "100";
|
||||||
int numberOfRecords = 100;
|
int numberOfRecords = 100;
|
||||||
int numberOfLogRecords = numberOfRecords / 2;
|
int numberOfLogRecords = numberOfRecords / 2;
|
||||||
File partitionDir =
|
File partitionDir =
|
||||||
InputFormatTestUtil.prepareSimpleParquetTable(basePath, schema, 1, numberOfRecords, commitTime);
|
InputFormatTestUtil.prepareSimpleParquetTable(basePath, schema, 1, numberOfRecords, instantTime);
|
||||||
InputFormatTestUtil.commit(basePath, commitTime);
|
InputFormatTestUtil.commit(basePath, instantTime);
|
||||||
// Add the paths
|
// Add the paths
|
||||||
FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
|
FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
|
||||||
List<Field> firstSchemaFields = schema.getFields();
|
List<Field> firstSchemaFields = schema.getFields();
|
||||||
@@ -480,7 +480,7 @@ public class TestHoodieRealtimeRecordReader {
|
|||||||
schema = SchemaTestUtil.getComplexEvolvedSchema();
|
schema = SchemaTestUtil.getComplexEvolvedSchema();
|
||||||
String newCommitTime = "101";
|
String newCommitTime = "101";
|
||||||
HoodieLogFormat.Writer writer =
|
HoodieLogFormat.Writer writer =
|
||||||
writeDataBlockToLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, numberOfLogRecords, 0, 1);
|
writeDataBlockToLogFile(partitionDir, schema, "fileid0", instantTime, newCommitTime, numberOfLogRecords, 0, 1);
|
||||||
long size = writer.getCurrentSize();
|
long size = writer.getCurrentSize();
|
||||||
logFilePaths.add(writer.getLogFile().getPath().toString());
|
logFilePaths.add(writer.getLogFile().getPath().toString());
|
||||||
writer.close();
|
writer.close();
|
||||||
@@ -488,14 +488,14 @@ public class TestHoodieRealtimeRecordReader {
|
|||||||
|
|
||||||
// write rollback for the previous block in new log file version
|
// write rollback for the previous block in new log file version
|
||||||
newCommitTime = "102";
|
newCommitTime = "102";
|
||||||
writer = writeRollbackBlockToLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, "101", 1);
|
writer = writeRollbackBlockToLogFile(partitionDir, schema, "fileid0", instantTime, newCommitTime, "101", 1);
|
||||||
logFilePaths.add(writer.getLogFile().getPath().toString());
|
logFilePaths.add(writer.getLogFile().getPath().toString());
|
||||||
writer.close();
|
writer.close();
|
||||||
InputFormatTestUtil.deltaCommit(basePath, newCommitTime);
|
InputFormatTestUtil.deltaCommit(basePath, newCommitTime);
|
||||||
|
|
||||||
// create a split with baseFile (parquet file written earlier) and new log file(s)
|
// create a split with baseFile (parquet file written earlier) and new log file(s)
|
||||||
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
||||||
new FileSplit(new Path(partitionDir + "/fileid0_1_" + commitTime + ".parquet"), 0, 1, jobConf),
|
new FileSplit(new Path(partitionDir + "/fileid0_1_" + instantTime + ".parquet"), 0, 1, jobConf),
|
||||||
basePath.getRoot().getPath(), logFilePaths, newCommitTime);
|
basePath.getRoot().getPath(), logFilePaths, newCommitTime);
|
||||||
|
|
||||||
// create a RecordReader to be used by HoodieRealtimeRecordReader
|
// create a RecordReader to be used by HoodieRealtimeRecordReader
|
||||||
|
|||||||
@@ -150,8 +150,8 @@ public class TestHiveSyncTool {
|
|||||||
@Test
|
@Test
|
||||||
public void testBasicSync() throws Exception {
|
public void testBasicSync() throws Exception {
|
||||||
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||||
String commitTime = "100";
|
String instantTime = "100";
|
||||||
TestUtil.createCOWTable(commitTime, 5);
|
TestUtil.createCOWTable(instantTime, 5);
|
||||||
HoodieHiveClient hiveClient =
|
HoodieHiveClient hiveClient =
|
||||||
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||||
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially",
|
assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially",
|
||||||
@@ -166,7 +166,7 @@ public class TestHiveSyncTool {
|
|||||||
hiveClient.getDataSchema().getColumns().size() + 1);
|
hiveClient.getDataSchema().getColumns().size() + 1);
|
||||||
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
||||||
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size());
|
||||||
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", commitTime,
|
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", instantTime,
|
||||||
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
|
hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -248,9 +248,9 @@ public class TestHiveSyncTool {
|
|||||||
@Test
|
@Test
|
||||||
public void testSyncMergeOnRead() throws Exception {
|
public void testSyncMergeOnRead() throws Exception {
|
||||||
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||||
String commitTime = "100";
|
String instantTime = "100";
|
||||||
String deltaCommitTime = "101";
|
String deltaCommitTime = "101";
|
||||||
TestUtil.createMORTable(commitTime, deltaCommitTime, 5);
|
TestUtil.createMORTable(instantTime, deltaCommitTime, 5);
|
||||||
|
|
||||||
String roTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE;
|
String roTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE;
|
||||||
HoodieHiveClient hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
HoodieHiveClient hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||||
@@ -291,10 +291,10 @@ public class TestHiveSyncTool {
|
|||||||
@Test
|
@Test
|
||||||
public void testSyncMergeOnReadRT() throws Exception {
|
public void testSyncMergeOnReadRT() throws Exception {
|
||||||
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||||
String commitTime = "100";
|
String instantTime = "100";
|
||||||
String deltaCommitTime = "101";
|
String deltaCommitTime = "101";
|
||||||
String snapshotTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
|
String snapshotTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
|
||||||
TestUtil.createMORTable(commitTime, deltaCommitTime, 5);
|
TestUtil.createMORTable(instantTime, deltaCommitTime, 5);
|
||||||
HoodieHiveClient hiveClientRT =
|
HoodieHiveClient hiveClientRT =
|
||||||
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
|
||||||
|
|
||||||
@@ -338,8 +338,8 @@ public class TestHiveSyncTool {
|
|||||||
@Test
|
@Test
|
||||||
public void testMultiPartitionKeySync() throws Exception {
|
public void testMultiPartitionKeySync() throws Exception {
|
||||||
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
|
||||||
String commitTime = "100";
|
String instantTime = "100";
|
||||||
TestUtil.createCOWTable(commitTime, 5);
|
TestUtil.createCOWTable(instantTime, 5);
|
||||||
|
|
||||||
HiveSyncConfig hiveSyncConfig = HiveSyncConfig.copy(TestUtil.hiveSyncConfig);
|
HiveSyncConfig hiveSyncConfig = HiveSyncConfig.copy(TestUtil.hiveSyncConfig);
|
||||||
hiveSyncConfig.partitionValueExtractorClass = MultiPartKeysValueExtractor.class.getCanonicalName();
|
hiveSyncConfig.partitionValueExtractorClass = MultiPartKeysValueExtractor.class.getCanonicalName();
|
||||||
@@ -360,7 +360,7 @@ public class TestHiveSyncTool {
|
|||||||
hiveClient.getDataSchema().getColumns().size() + 3);
|
hiveClient.getDataSchema().getColumns().size() + 3);
|
||||||
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
assertEquals("Table partitions should match the number of partitions we wrote", 5,
|
||||||
hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size());
|
hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size());
|
||||||
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", commitTime,
|
assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", instantTime,
|
||||||
hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get());
|
hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -153,7 +153,7 @@ public class TestUtil {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void createCOWTable(String commitTime, int numberOfPartitions)
|
static void createCOWTable(String instantTime, int numberOfPartitions)
|
||||||
throws IOException, InitializationError, URISyntaxException {
|
throws IOException, InitializationError, URISyntaxException {
|
||||||
Path path = new Path(hiveSyncConfig.basePath);
|
Path path = new Path(hiveSyncConfig.basePath);
|
||||||
FileIOUtils.deleteDirectory(new File(hiveSyncConfig.basePath));
|
FileIOUtils.deleteDirectory(new File(hiveSyncConfig.basePath));
|
||||||
@@ -162,12 +162,12 @@ public class TestUtil {
|
|||||||
boolean result = fileSystem.mkdirs(path);
|
boolean result = fileSystem.mkdirs(path);
|
||||||
checkResult(result);
|
checkResult(result);
|
||||||
DateTime dateTime = DateTime.now();
|
DateTime dateTime = DateTime.now();
|
||||||
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, commitTime);
|
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, instantTime);
|
||||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
||||||
createCommitFile(commitMetadata, commitTime);
|
createCommitFile(commitMetadata, instantTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void createMORTable(String commitTime, String deltaCommitTime, int numberOfPartitions)
|
static void createMORTable(String instantTime, String deltaCommitTime, int numberOfPartitions)
|
||||||
throws IOException, InitializationError, URISyntaxException, InterruptedException {
|
throws IOException, InitializationError, URISyntaxException, InterruptedException {
|
||||||
Path path = new Path(hiveSyncConfig.basePath);
|
Path path = new Path(hiveSyncConfig.basePath);
|
||||||
FileIOUtils.deleteDirectory(new File(hiveSyncConfig.basePath));
|
FileIOUtils.deleteDirectory(new File(hiveSyncConfig.basePath));
|
||||||
@@ -177,38 +177,38 @@ public class TestUtil {
|
|||||||
boolean result = fileSystem.mkdirs(path);
|
boolean result = fileSystem.mkdirs(path);
|
||||||
checkResult(result);
|
checkResult(result);
|
||||||
DateTime dateTime = DateTime.now();
|
DateTime dateTime = DateTime.now();
|
||||||
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, commitTime);
|
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, instantTime);
|
||||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
||||||
createdTablesSet
|
createdTablesSet
|
||||||
.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE);
|
.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE);
|
||||||
HoodieCommitMetadata compactionMetadata = new HoodieCommitMetadata();
|
HoodieCommitMetadata compactionMetadata = new HoodieCommitMetadata();
|
||||||
commitMetadata.getPartitionToWriteStats()
|
commitMetadata.getPartitionToWriteStats()
|
||||||
.forEach((key, value) -> value.forEach(l -> compactionMetadata.addWriteStat(key, l)));
|
.forEach((key, value) -> value.forEach(l -> compactionMetadata.addWriteStat(key, l)));
|
||||||
createCompactionCommitFile(compactionMetadata, commitTime);
|
createCompactionCommitFile(compactionMetadata, instantTime);
|
||||||
// Write a delta commit
|
// Write a delta commit
|
||||||
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), true);
|
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), true);
|
||||||
createDeltaCommitFile(deltaMetadata, deltaCommitTime);
|
createDeltaCommitFile(deltaMetadata, deltaCommitTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void addCOWPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, DateTime startFrom,
|
static void addCOWPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, DateTime startFrom,
|
||||||
String commitTime) throws IOException, URISyntaxException {
|
String instantTime) throws IOException, URISyntaxException {
|
||||||
HoodieCommitMetadata commitMetadata =
|
HoodieCommitMetadata commitMetadata =
|
||||||
createPartitions(numberOfPartitions, isParquetSchemaSimple, startFrom, commitTime);
|
createPartitions(numberOfPartitions, isParquetSchemaSimple, startFrom, instantTime);
|
||||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
|
||||||
createCommitFile(commitMetadata, commitTime);
|
createCommitFile(commitMetadata, instantTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void addMORPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, boolean isLogSchemaSimple,
|
static void addMORPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, boolean isLogSchemaSimple,
|
||||||
DateTime startFrom, String commitTime, String deltaCommitTime)
|
DateTime startFrom, String instantTime, String deltaCommitTime)
|
||||||
throws IOException, URISyntaxException, InterruptedException {
|
throws IOException, URISyntaxException, InterruptedException {
|
||||||
HoodieCommitMetadata commitMetadata =
|
HoodieCommitMetadata commitMetadata =
|
||||||
createPartitions(numberOfPartitions, isParquetSchemaSimple, startFrom, commitTime);
|
createPartitions(numberOfPartitions, isParquetSchemaSimple, startFrom, instantTime);
|
||||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE);
|
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE);
|
||||||
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE);
|
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE);
|
||||||
HoodieCommitMetadata compactionMetadata = new HoodieCommitMetadata();
|
HoodieCommitMetadata compactionMetadata = new HoodieCommitMetadata();
|
||||||
commitMetadata.getPartitionToWriteStats()
|
commitMetadata.getPartitionToWriteStats()
|
||||||
.forEach((key, value) -> value.forEach(l -> compactionMetadata.addWriteStat(key, l)));
|
.forEach((key, value) -> value.forEach(l -> compactionMetadata.addWriteStat(key, l)));
|
||||||
createCompactionCommitFile(compactionMetadata, commitTime);
|
createCompactionCommitFile(compactionMetadata, instantTime);
|
||||||
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), isLogSchemaSimple);
|
HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), isLogSchemaSimple);
|
||||||
createDeltaCommitFile(deltaMetadata, deltaCommitTime);
|
createDeltaCommitFile(deltaMetadata, deltaCommitTime);
|
||||||
}
|
}
|
||||||
@@ -232,7 +232,7 @@ public class TestUtil {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static HoodieCommitMetadata createPartitions(int numberOfPartitions, boolean isParquetSchemaSimple,
|
private static HoodieCommitMetadata createPartitions(int numberOfPartitions, boolean isParquetSchemaSimple,
|
||||||
DateTime startFrom, String commitTime) throws IOException, URISyntaxException {
|
DateTime startFrom, String instantTime) throws IOException, URISyntaxException {
|
||||||
startFrom = startFrom.withTimeAtStartOfDay();
|
startFrom = startFrom.withTimeAtStartOfDay();
|
||||||
|
|
||||||
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
|
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
|
||||||
@@ -241,20 +241,20 @@ public class TestUtil {
|
|||||||
Path partPath = new Path(hiveSyncConfig.basePath + "/" + partitionPath);
|
Path partPath = new Path(hiveSyncConfig.basePath + "/" + partitionPath);
|
||||||
fileSystem.makeQualified(partPath);
|
fileSystem.makeQualified(partPath);
|
||||||
fileSystem.mkdirs(partPath);
|
fileSystem.mkdirs(partPath);
|
||||||
List<HoodieWriteStat> writeStats = createTestData(partPath, isParquetSchemaSimple, commitTime);
|
List<HoodieWriteStat> writeStats = createTestData(partPath, isParquetSchemaSimple, instantTime);
|
||||||
startFrom = startFrom.minusDays(1);
|
startFrom = startFrom.minusDays(1);
|
||||||
writeStats.forEach(s -> commitMetadata.addWriteStat(partitionPath, s));
|
writeStats.forEach(s -> commitMetadata.addWriteStat(partitionPath, s));
|
||||||
}
|
}
|
||||||
return commitMetadata;
|
return commitMetadata;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<HoodieWriteStat> createTestData(Path partPath, boolean isParquetSchemaSimple, String commitTime)
|
private static List<HoodieWriteStat> createTestData(Path partPath, boolean isParquetSchemaSimple, String instantTime)
|
||||||
throws IOException, URISyntaxException {
|
throws IOException, URISyntaxException {
|
||||||
List<HoodieWriteStat> writeStats = new ArrayList<>();
|
List<HoodieWriteStat> writeStats = new ArrayList<>();
|
||||||
for (int i = 0; i < 5; i++) {
|
for (int i = 0; i < 5; i++) {
|
||||||
// Create 5 files
|
// Create 5 files
|
||||||
String fileId = UUID.randomUUID().toString();
|
String fileId = UUID.randomUUID().toString();
|
||||||
Path filePath = new Path(partPath.toString() + "/" + FSUtils.makeDataFileName(commitTime, "1-0-1", fileId));
|
Path filePath = new Path(partPath.toString() + "/" + FSUtils.makeDataFileName(instantTime, "1-0-1", fileId));
|
||||||
generateParquetData(filePath, isParquetSchemaSimple);
|
generateParquetData(filePath, isParquetSchemaSimple);
|
||||||
HoodieWriteStat writeStat = new HoodieWriteStat();
|
HoodieWriteStat writeStat = new HoodieWriteStat();
|
||||||
writeStat.setFileId(fileId);
|
writeStat.setFileId(fileId);
|
||||||
@@ -313,20 +313,20 @@ public class TestUtil {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void createCommitFile(HoodieCommitMetadata commitMetadata, String commitTime) throws IOException {
|
private static void createCommitFile(HoodieCommitMetadata commitMetadata, String instantTime) throws IOException {
|
||||||
byte[] bytes = commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8);
|
byte[] bytes = commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8);
|
||||||
Path fullPath = new Path(hiveSyncConfig.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
Path fullPath = new Path(hiveSyncConfig.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
||||||
+ HoodieTimeline.makeCommitFileName(commitTime));
|
+ HoodieTimeline.makeCommitFileName(instantTime));
|
||||||
FSDataOutputStream fsout = fileSystem.create(fullPath, true);
|
FSDataOutputStream fsout = fileSystem.create(fullPath, true);
|
||||||
fsout.write(bytes);
|
fsout.write(bytes);
|
||||||
fsout.close();
|
fsout.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void createCompactionCommitFile(HoodieCommitMetadata commitMetadata, String commitTime)
|
private static void createCompactionCommitFile(HoodieCommitMetadata commitMetadata, String instantTime)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
byte[] bytes = commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8);
|
byte[] bytes = commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8);
|
||||||
Path fullPath = new Path(hiveSyncConfig.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
Path fullPath = new Path(hiveSyncConfig.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
|
||||||
+ HoodieTimeline.makeCommitFileName(commitTime));
|
+ HoodieTimeline.makeCommitFileName(instantTime));
|
||||||
FSDataOutputStream fsout = fileSystem.create(fullPath, true);
|
FSDataOutputStream fsout = fileSystem.create(fullPath, true);
|
||||||
fsout.write(bytes);
|
fsout.write(bytes);
|
||||||
fsout.close();
|
fsout.close();
|
||||||
|
|||||||
@@ -196,20 +196,20 @@ public class DataSourceUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static JavaRDD<WriteStatus> doWriteOperation(HoodieWriteClient client, JavaRDD<HoodieRecord> hoodieRecords,
|
public static JavaRDD<WriteStatus> doWriteOperation(HoodieWriteClient client, JavaRDD<HoodieRecord> hoodieRecords,
|
||||||
String commitTime, String operation) {
|
String instantTime, String operation) {
|
||||||
if (operation.equals(DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL())) {
|
if (operation.equals(DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL())) {
|
||||||
return client.bulkInsert(hoodieRecords, commitTime);
|
return client.bulkInsert(hoodieRecords, instantTime);
|
||||||
} else if (operation.equals(DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL())) {
|
} else if (operation.equals(DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL())) {
|
||||||
return client.insert(hoodieRecords, commitTime);
|
return client.insert(hoodieRecords, instantTime);
|
||||||
} else {
|
} else {
|
||||||
// default is upsert
|
// default is upsert
|
||||||
return client.upsert(hoodieRecords, commitTime);
|
return client.upsert(hoodieRecords, instantTime);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static JavaRDD<WriteStatus> doDeleteOperation(HoodieWriteClient client, JavaRDD<HoodieKey> hoodieKeys,
|
public static JavaRDD<WriteStatus> doDeleteOperation(HoodieWriteClient client, JavaRDD<HoodieKey> hoodieKeys,
|
||||||
String commitTime) {
|
String instantTime) {
|
||||||
return client.delete(hoodieKeys, commitTime);
|
return client.delete(hoodieKeys, instantTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static HoodieRecord createHoodieRecord(GenericRecord gr, Comparable orderingVal, HoodieKey hKey,
|
public static HoodieRecord createHoodieRecord(GenericRecord gr, Comparable orderingVal, HoodieKey hKey,
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ private[hudi] object HoodieSparkSqlWriter {
|
|||||||
|
|
||||||
val jsc = new JavaSparkContext(sparkContext)
|
val jsc = new JavaSparkContext(sparkContext)
|
||||||
val basePath = new Path(parameters("path"))
|
val basePath = new Path(parameters("path"))
|
||||||
val commitTime = HoodieActiveTimeline.createNewInstantTime()
|
val instantTime = HoodieActiveTimeline.createNewInstantTime()
|
||||||
val fs = basePath.getFileSystem(sparkContext.hadoopConfiguration)
|
val fs = basePath.getFileSystem(sparkContext.hadoopConfiguration)
|
||||||
var exists = fs.exists(new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME))
|
var exists = fs.exists(new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME))
|
||||||
|
|
||||||
@@ -140,8 +140,8 @@ private[hudi] object HoodieSparkSqlWriter {
|
|||||||
log.info("new batch has no new records, skipping...")
|
log.info("new batch has no new records, skipping...")
|
||||||
(true, common.util.Option.empty())
|
(true, common.util.Option.empty())
|
||||||
}
|
}
|
||||||
client.startCommitWithTime(commitTime)
|
client.startCommitWithTime(instantTime)
|
||||||
val writeStatuses = DataSourceUtils.doWriteOperation(client, hoodieRecords, commitTime, operation)
|
val writeStatuses = DataSourceUtils.doWriteOperation(client, hoodieRecords, instantTime, operation)
|
||||||
(writeStatuses, client)
|
(writeStatuses, client)
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
@@ -172,14 +172,14 @@ private[hudi] object HoodieSparkSqlWriter {
|
|||||||
)
|
)
|
||||||
|
|
||||||
// Issue deletes
|
// Issue deletes
|
||||||
client.startCommitWithTime(commitTime)
|
client.startCommitWithTime(instantTime)
|
||||||
val writeStatuses = DataSourceUtils.doDeleteOperation(client, hoodieKeysToDelete, commitTime)
|
val writeStatuses = DataSourceUtils.doDeleteOperation(client, hoodieKeysToDelete, instantTime)
|
||||||
(writeStatuses, client)
|
(writeStatuses, client)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for errors and commit the write.
|
// Check for errors and commit the write.
|
||||||
val writeSuccessful = checkWriteStatus(writeStatuses, parameters, writeClient, commitTime, basePath, operation, jsc)
|
val writeSuccessful = checkWriteStatus(writeStatuses, parameters, writeClient, instantTime, basePath, operation, jsc)
|
||||||
(writeSuccessful, common.util.Option.ofNullable(commitTime))
|
(writeSuccessful, common.util.Option.ofNullable(instantTime))
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -246,7 +246,7 @@ private[hudi] object HoodieSparkSqlWriter {
|
|||||||
private def checkWriteStatus(writeStatuses: JavaRDD[WriteStatus],
|
private def checkWriteStatus(writeStatuses: JavaRDD[WriteStatus],
|
||||||
parameters: Map[String, String],
|
parameters: Map[String, String],
|
||||||
client: HoodieWriteClient[_],
|
client: HoodieWriteClient[_],
|
||||||
commitTime: String,
|
instantTime: String,
|
||||||
basePath: Path,
|
basePath: Path,
|
||||||
operation: String,
|
operation: String,
|
||||||
jsc: JavaSparkContext): Boolean = {
|
jsc: JavaSparkContext): Boolean = {
|
||||||
@@ -256,17 +256,17 @@ private[hudi] object HoodieSparkSqlWriter {
|
|||||||
val metaMap = parameters.filter(kv =>
|
val metaMap = parameters.filter(kv =>
|
||||||
kv._1.startsWith(parameters(COMMIT_METADATA_KEYPREFIX_OPT_KEY)))
|
kv._1.startsWith(parameters(COMMIT_METADATA_KEYPREFIX_OPT_KEY)))
|
||||||
val commitSuccess = if (metaMap.isEmpty) {
|
val commitSuccess = if (metaMap.isEmpty) {
|
||||||
client.commit(commitTime, writeStatuses)
|
client.commit(instantTime, writeStatuses)
|
||||||
} else {
|
} else {
|
||||||
client.commit(commitTime, writeStatuses,
|
client.commit(instantTime, writeStatuses,
|
||||||
common.util.Option.of(new util.HashMap[String, String](mapAsJavaMap(metaMap))))
|
common.util.Option.of(new util.HashMap[String, String](mapAsJavaMap(metaMap))))
|
||||||
}
|
}
|
||||||
|
|
||||||
if (commitSuccess) {
|
if (commitSuccess) {
|
||||||
log.info("Commit " + commitTime + " successful!")
|
log.info("Commit " + instantTime + " successful!")
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
log.info("Commit " + commitTime + " failed!")
|
log.info("Commit " + instantTime + " failed!")
|
||||||
}
|
}
|
||||||
|
|
||||||
val hiveSyncEnabled = parameters.get(HIVE_SYNC_ENABLED_OPT_KEY).exists(r => r.toBoolean)
|
val hiveSyncEnabled = parameters.get(HIVE_SYNC_ENABLED_OPT_KEY).exists(r => r.toBoolean)
|
||||||
|
|||||||
@@ -132,8 +132,8 @@ public class HoodieSnapshotCopier implements Serializable {
|
|||||||
if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) {
|
if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) {
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
String commitTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName());
|
String instantTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName());
|
||||||
return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp,
|
return HoodieTimeline.compareTimestamps(instantTime, latestCommitTimestamp,
|
||||||
HoodieTimeline.LESSER_OR_EQUAL);
|
HoodieTimeline.LESSER_OR_EQUAL);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -215,8 +215,8 @@ public class HoodieSnapshotExporter {
|
|||||||
if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) {
|
if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) {
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
String commitTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName());
|
String instantTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName());
|
||||||
return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp,
|
return HoodieTimeline.compareTimestamps(instantTime, latestCommitTimestamp,
|
||||||
HoodieTimeline.LESSER_OR_EQUAL);
|
HoodieTimeline.LESSER_OR_EQUAL);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -361,16 +361,16 @@ public class DeltaSync implements Serializable {
|
|||||||
|
|
||||||
boolean isEmpty = records.isEmpty();
|
boolean isEmpty = records.isEmpty();
|
||||||
|
|
||||||
String commitTime = startCommit();
|
String instantTime = startCommit();
|
||||||
LOG.info("Starting commit : " + commitTime);
|
LOG.info("Starting commit : " + instantTime);
|
||||||
|
|
||||||
JavaRDD<WriteStatus> writeStatusRDD;
|
JavaRDD<WriteStatus> writeStatusRDD;
|
||||||
if (cfg.operation == Operation.INSERT) {
|
if (cfg.operation == Operation.INSERT) {
|
||||||
writeStatusRDD = writeClient.insert(records, commitTime);
|
writeStatusRDD = writeClient.insert(records, instantTime);
|
||||||
} else if (cfg.operation == Operation.UPSERT) {
|
} else if (cfg.operation == Operation.UPSERT) {
|
||||||
writeStatusRDD = writeClient.upsert(records, commitTime);
|
writeStatusRDD = writeClient.upsert(records, instantTime);
|
||||||
} else if (cfg.operation == Operation.BULK_INSERT) {
|
} else if (cfg.operation == Operation.BULK_INSERT) {
|
||||||
writeStatusRDD = writeClient.bulkInsert(records, commitTime);
|
writeStatusRDD = writeClient.bulkInsert(records, instantTime);
|
||||||
} else {
|
} else {
|
||||||
throw new HoodieDeltaStreamerException("Unknown operation :" + cfg.operation);
|
throw new HoodieDeltaStreamerException("Unknown operation :" + cfg.operation);
|
||||||
}
|
}
|
||||||
@@ -391,9 +391,9 @@ public class DeltaSync implements Serializable {
|
|||||||
+ totalErrorRecords + "/" + totalRecords);
|
+ totalErrorRecords + "/" + totalRecords);
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean success = writeClient.commit(commitTime, writeStatusRDD, Option.of(checkpointCommitMetadata));
|
boolean success = writeClient.commit(instantTime, writeStatusRDD, Option.of(checkpointCommitMetadata));
|
||||||
if (success) {
|
if (success) {
|
||||||
LOG.info("Commit " + commitTime + " successful!");
|
LOG.info("Commit " + instantTime + " successful!");
|
||||||
|
|
||||||
// Schedule compaction if needed
|
// Schedule compaction if needed
|
||||||
if (cfg.isAsyncCompactionEnabled()) {
|
if (cfg.isAsyncCompactionEnabled()) {
|
||||||
@@ -407,8 +407,8 @@ public class DeltaSync implements Serializable {
|
|||||||
hiveSyncTimeMs = hiveSyncContext != null ? hiveSyncContext.stop() : 0;
|
hiveSyncTimeMs = hiveSyncContext != null ? hiveSyncContext.stop() : 0;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOG.info("Commit " + commitTime + " failed!");
|
LOG.info("Commit " + instantTime + " failed!");
|
||||||
throw new HoodieException("Commit " + commitTime + " failed!");
|
throw new HoodieException("Commit " + instantTime + " failed!");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOG.error("Delta Sync found errors when writing. Errors/Total=" + totalErrorRecords + "/" + totalRecords);
|
LOG.error("Delta Sync found errors when writing. Errors/Total=" + totalErrorRecords + "/" + totalRecords);
|
||||||
@@ -420,8 +420,8 @@ public class DeltaSync implements Serializable {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
// Rolling back instant
|
// Rolling back instant
|
||||||
writeClient.rollback(commitTime);
|
writeClient.rollback(instantTime);
|
||||||
throw new HoodieException("Commit " + commitTime + " failed and rolled-back !");
|
throw new HoodieException("Commit " + instantTime + " failed and rolled-back !");
|
||||||
}
|
}
|
||||||
long overallTimeMs = overallTimerContext != null ? overallTimerContext.stop() : 0;
|
long overallTimeMs = overallTimerContext != null ? overallTimerContext.stop() : 0;
|
||||||
|
|
||||||
|
|||||||
@@ -102,10 +102,10 @@ public class HiveIncrPullSource extends AvroSource {
|
|||||||
return Option.of(commitTimes.get(0));
|
return Option.of(commitTimes.get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (String commitTime : commitTimes) {
|
for (String instantTime : commitTimes) {
|
||||||
// TODO(vc): Add an option to delete consumed commits
|
// TODO(vc): Add an option to delete consumed commits
|
||||||
if (commitTime.compareTo(latestTargetCommit.get()) > 0) {
|
if (instantTime.compareTo(latestTargetCommit.get()) > 0) {
|
||||||
return Option.of(commitTime);
|
return Option.of(instantTime);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Option.empty();
|
return Option.empty();
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ public abstract class AbstractBaseTestSource extends AvroSource {
|
|||||||
super(props, sparkContext, sparkSession, schemaProvider);
|
super(props, sparkContext, sparkSession, schemaProvider);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static Stream<GenericRecord> fetchNextBatch(TypedProperties props, int sourceLimit, String commitTime,
|
protected static Stream<GenericRecord> fetchNextBatch(TypedProperties props, int sourceLimit, String instantTime,
|
||||||
int partition) {
|
int partition) {
|
||||||
int maxUniqueKeys =
|
int maxUniqueKeys =
|
||||||
props.getInteger(TestSourceConfig.MAX_UNIQUE_RECORDS_PROP, TestSourceConfig.DEFAULT_MAX_UNIQUE_RECORDS);
|
props.getInteger(TestSourceConfig.MAX_UNIQUE_RECORDS_PROP, TestSourceConfig.DEFAULT_MAX_UNIQUE_RECORDS);
|
||||||
@@ -116,14 +116,14 @@ public abstract class AbstractBaseTestSource extends AvroSource {
|
|||||||
LOG.info("After adjustments => NumInserts=" + numInserts + ", NumUpdates=" + (numUpdates - 50) + ", NumDeletes=50, maxUniqueRecords="
|
LOG.info("After adjustments => NumInserts=" + numInserts + ", NumUpdates=" + (numUpdates - 50) + ", NumDeletes=50, maxUniqueRecords="
|
||||||
+ maxUniqueKeys);
|
+ maxUniqueKeys);
|
||||||
// if we generate update followed by deletes -> some keys in update batch might be picked up for deletes. Hence generating delete batch followed by updates
|
// if we generate update followed by deletes -> some keys in update batch might be picked up for deletes. Hence generating delete batch followed by updates
|
||||||
deleteStream = dataGenerator.generateUniqueDeleteRecordStream(commitTime, 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
deleteStream = dataGenerator.generateUniqueDeleteRecordStream(instantTime, 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
||||||
updateStream = dataGenerator.generateUniqueUpdatesStream(commitTime, numUpdates - 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
updateStream = dataGenerator.generateUniqueUpdatesStream(instantTime, numUpdates - 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
||||||
} else {
|
} else {
|
||||||
LOG.info("After adjustments => NumInserts=" + numInserts + ", NumUpdates=" + numUpdates + ", maxUniqueRecords=" + maxUniqueKeys);
|
LOG.info("After adjustments => NumInserts=" + numInserts + ", NumUpdates=" + numUpdates + ", maxUniqueRecords=" + maxUniqueKeys);
|
||||||
updateStream = dataGenerator.generateUniqueUpdatesStream(commitTime, numUpdates)
|
updateStream = dataGenerator.generateUniqueUpdatesStream(instantTime, numUpdates)
|
||||||
.map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
.map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
||||||
}
|
}
|
||||||
Stream<GenericRecord> insertStream = dataGenerator.generateInsertsStream(commitTime, numInserts, false)
|
Stream<GenericRecord> insertStream = dataGenerator.generateInsertsStream(instantTime, numInserts, false)
|
||||||
.map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
.map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator));
|
||||||
return Stream.concat(deleteStream, Stream.concat(updateStream, insertStream));
|
return Stream.concat(deleteStream, Stream.concat(updateStream, insertStream));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -99,14 +99,14 @@ public abstract class AbstractDFSSourceTestBase extends UtilitiesTestBase {
|
|||||||
* Generates a batch of test data and writes the data to a file.
|
* Generates a batch of test data and writes the data to a file.
|
||||||
*
|
*
|
||||||
* @param filename The name of the file.
|
* @param filename The name of the file.
|
||||||
* @param commitTime The commit time.
|
* @param instantTime The commit time.
|
||||||
* @param n The number of records to generate.
|
* @param n The number of records to generate.
|
||||||
* @return The file path.
|
* @return The file path.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
Path generateOneFile(String filename, String commitTime, int n) throws IOException {
|
Path generateOneFile(String filename, String instantTime, int n) throws IOException {
|
||||||
Path path = new Path(dfsRoot, filename + fileSuffix);
|
Path path = new Path(dfsRoot, filename + fileSuffix);
|
||||||
writeNewDataToFile(dataGenerator.generateInserts(commitTime, n, useFlattenedSchema), path);
|
writeNewDataToFile(dataGenerator.generateInserts(instantTime, n, useFlattenedSchema), path);
|
||||||
return path;
|
return path;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -52,12 +52,12 @@ public class DistributedTestDataSource extends AbstractBaseTestSource {
|
|||||||
@Override
|
@Override
|
||||||
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCkptStr, long sourceLimit) {
|
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCkptStr, long sourceLimit) {
|
||||||
int nextCommitNum = lastCkptStr.map(s -> Integer.parseInt(s) + 1).orElse(0);
|
int nextCommitNum = lastCkptStr.map(s -> Integer.parseInt(s) + 1).orElse(0);
|
||||||
String commitTime = String.format("%05d", nextCommitNum);
|
String instantTime = String.format("%05d", nextCommitNum);
|
||||||
LOG.info("Source Limit is set to " + sourceLimit);
|
LOG.info("Source Limit is set to " + sourceLimit);
|
||||||
|
|
||||||
// No new data.
|
// No new data.
|
||||||
if (sourceLimit <= 0) {
|
if (sourceLimit <= 0) {
|
||||||
return new InputBatch<>(Option.empty(), commitTime);
|
return new InputBatch<>(Option.empty(), instantTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
TypedProperties newProps = new TypedProperties();
|
TypedProperties newProps = new TypedProperties();
|
||||||
@@ -76,8 +76,8 @@ public class DistributedTestDataSource extends AbstractBaseTestSource {
|
|||||||
if (!dataGeneratorMap.containsKey(p)) {
|
if (!dataGeneratorMap.containsKey(p)) {
|
||||||
initDataGen(newProps, p);
|
initDataGen(newProps, p);
|
||||||
}
|
}
|
||||||
return fetchNextBatch(newProps, perPartitionSourceLimit, commitTime, p).iterator();
|
return fetchNextBatch(newProps, perPartitionSourceLimit, instantTime, p).iterator();
|
||||||
}, true);
|
}, true);
|
||||||
return new InputBatch<>(Option.of(avroRDD), commitTime);
|
return new InputBatch<>(Option.of(avroRDD), instantTime);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ public class TestDataSource extends AbstractBaseTestSource {
|
|||||||
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCheckpointStr, long sourceLimit) {
|
protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCheckpointStr, long sourceLimit) {
|
||||||
|
|
||||||
int nextCommitNum = lastCheckpointStr.map(s -> Integer.parseInt(s) + 1).orElse(0);
|
int nextCommitNum = lastCheckpointStr.map(s -> Integer.parseInt(s) + 1).orElse(0);
|
||||||
String commitTime = String.format("%05d", nextCommitNum);
|
String instantTime = String.format("%05d", nextCommitNum);
|
||||||
LOG.info("Source Limit is set to " + sourceLimit);
|
LOG.info("Source Limit is set to " + sourceLimit);
|
||||||
|
|
||||||
// No new data.
|
// No new data.
|
||||||
@@ -58,8 +58,8 @@ public class TestDataSource extends AbstractBaseTestSource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
List<GenericRecord> records =
|
List<GenericRecord> records =
|
||||||
fetchNextBatch(props, (int) sourceLimit, commitTime, DEFAULT_PARTITION_NUM).collect(Collectors.toList());
|
fetchNextBatch(props, (int) sourceLimit, instantTime, DEFAULT_PARTITION_NUM).collect(Collectors.toList());
|
||||||
JavaRDD<GenericRecord> avroRDD = sparkContext.<GenericRecord>parallelize(records, 4);
|
JavaRDD<GenericRecord> avroRDD = sparkContext.<GenericRecord>parallelize(records, 4);
|
||||||
return new InputBatch<>(Option.of(avroRDD), commitTime);
|
return new InputBatch<>(Option.of(avroRDD), instantTime);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user