1
0

[HUDI-65] commitTime rename to instantTime (#1431)

This commit is contained in:
Zhiyuan Zhao
2020-03-23 09:06:00 +08:00
committed by GitHub
parent 38c3ccc51a
commit 0241b21f77
68 changed files with 673 additions and 673 deletions

View File

@@ -86,7 +86,7 @@ public class CleansCommand implements CommandMarker {
}
@CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean")
public String showCleanPartitions(@CliOption(key = {"clean"}, help = "clean to show") final String commitTime,
public String showCleanPartitions(@CliOption(key = {"clean"}, help = "clean to show") final String instantTime,
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
@@ -96,10 +96,10 @@ public class CleansCommand implements CommandMarker {
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
HoodieInstant cleanInstant = new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, commitTime);
HoodieInstant cleanInstant = new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, instantTime);
if (!timeline.containsInstant(cleanInstant)) {
return "Clean " + commitTime + " not found in metadata " + timeline;
return "Clean " + instantTime + " not found in metadata " + timeline;
}
HoodieCleanMetadata cleanMetadata =

View File

@@ -220,18 +220,18 @@ public class CommitsCommand implements CommandMarker {
}
@CliCommand(value = "commit rollback", help = "Rollback a commit")
public String rollbackCommit(@CliOption(key = {"commit"}, help = "Commit to rollback") final String commitTime,
public String rollbackCommit(@CliOption(key = {"commit"}, help = "Commit to rollback") final String instantTime,
@CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath)
throws Exception {
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
HoodieTimeline completedTimeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
HoodieTimeline filteredTimeline = completedTimeline.filter(instant -> instant.getTimestamp().equals(commitTime));
HoodieTimeline filteredTimeline = completedTimeline.filter(instant -> instant.getTimestamp().equals(instantTime));
if (filteredTimeline.empty()) {
return "Commit " + commitTime + " not found in Commits " + completedTimeline;
return "Commit " + instantTime + " not found in Commits " + completedTimeline;
}
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), commitTime,
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), instantTime,
HoodieCLI.getTableMetaClient().getBasePath());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
@@ -239,16 +239,16 @@ public class CommitsCommand implements CommandMarker {
// Refresh the current
refreshCommits();
if (exitCode != 0) {
return "Commit " + commitTime + " failed to roll back";
return "Commit " + instantTime + " failed to roll back";
}
return "Commit " + commitTime + " rolled back";
return "Commit " + instantTime + " rolled back";
}
@CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit")
public String showCommitPartitions(
@CliOption(key = {"createView"}, mandatory = false, help = "view name to store output table",
unspecifiedDefaultValue = "") final String exportTableName,
@CliOption(key = {"commit"}, help = "Commit to show") final String commitTime,
@CliOption(key = {"commit"}, help = "Commit to show") final String instantTime,
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
@@ -258,10 +258,10 @@ public class CommitsCommand implements CommandMarker {
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
if (!timeline.containsInstant(commitInstant)) {
return "Commit " + commitTime + " not found in Commits " + timeline;
return "Commit " + instantTime + " not found in Commits " + timeline;
}
HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get(),
HoodieCommitMetadata.class);
@@ -306,7 +306,7 @@ public class CommitsCommand implements CommandMarker {
public String showCommitFiles(
@CliOption(key = {"createView"}, mandatory = false, help = "view name to store output table",
unspecifiedDefaultValue = "") final String exportTableName,
@CliOption(key = {"commit"}, help = "Commit to show") final String commitTime,
@CliOption(key = {"commit"}, help = "Commit to show") final String instantTime,
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
@@ -316,10 +316,10 @@ public class CommitsCommand implements CommandMarker {
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
if (!timeline.containsInstant(commitInstant)) {
return "Commit " + commitTime + " not found in Commits " + timeline;
return "Commit " + instantTime + " not found in Commits " + timeline;
}
HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get(),
HoodieCommitMetadata.class);

View File

@@ -92,7 +92,7 @@ public class SavepointsCommand implements CommandMarker {
@CliCommand(value = "savepoint rollback", help = "Savepoint a commit")
public String rollbackToSavepoint(
@CliOption(key = {"savepoint"}, help = "Savepoint to rollback") final String commitTime,
@CliOption(key = {"savepoint"}, help = "Savepoint to rollback") final String instantTime,
@CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath)
throws Exception {
HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
@@ -101,14 +101,14 @@ public class SavepointsCommand implements CommandMarker {
}
HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime);
if (!timeline.containsInstant(commitInstant)) {
return "Commit " + commitTime + " not found in Commits " + timeline;
return "Commit " + instantTime + " not found in Commits " + timeline;
}
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), commitTime,
sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), instantTime,
metaClient.getBasePath());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
@@ -116,9 +116,9 @@ public class SavepointsCommand implements CommandMarker {
// Refresh the current
refreshMetaClient();
if (exitCode != 0) {
return "Savepoint " + commitTime + " failed to roll back";
return "Savepoint " + instantTime + " failed to roll back";
}
return "Savepoint " + commitTime + " rolled back";
return "Savepoint " + instantTime + " rolled back";
}
@CliCommand(value = "savepoints refresh", help = "Refresh the savepoints")
@@ -128,24 +128,24 @@ public class SavepointsCommand implements CommandMarker {
}
@CliCommand(value = "savepoint delete", help = "Delete the savepoint")
public String deleteSavepoint(@CliOption(key = {"commit"}, help = "Delete a savepoint") final String commitTime) throws Exception {
public String deleteSavepoint(@CliOption(key = {"commit"}, help = "Delete a savepoint") final String instantTime) throws Exception {
HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
HoodieTimeline completedInstants = metaClient.getActiveTimeline().getSavePointTimeline().filterCompletedInstants();
if (completedInstants.empty()) {
throw new HoodieException("There are no completed savepoint to run delete");
}
HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, commitTime);
HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, instantTime);
if (!completedInstants.containsInstant(savePoint)) {
return "Commit " + commitTime + " not found in Commits " + completedInstants;
return "Commit " + instantTime + " not found in Commits " + completedInstants;
}
try (JavaSparkContext jsc = SparkUtil.initJavaSparkConf("Delete Savepoint")) {
HoodieWriteClient client = createHoodieClient(jsc, metaClient.getBasePath());
client.deleteSavepoint(commitTime);
client.deleteSavepoint(instantTime);
refreshMetaClient();
}
return "Savepoint " + commitTime + " deleted";
return "Savepoint " + instantTime + " deleted";
}
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {

View File

@@ -282,13 +282,13 @@ public class SparkMain {
return 0;
}
private static int rollback(JavaSparkContext jsc, String commitTime, String basePath) throws Exception {
private static int rollback(JavaSparkContext jsc, String instantTime, String basePath) throws Exception {
HoodieWriteClient client = createHoodieClient(jsc, basePath);
if (client.rollback(commitTime)) {
LOG.info(String.format("The commit \"%s\" rolled back.", commitTime));
if (client.rollback(instantTime)) {
LOG.info(String.format("The commit \"%s\" rolled back.", instantTime));
return 0;
} else {
LOG.info(String.format("The commit \"%s\" failed to roll back.", commitTime));
LOG.info(String.format("The commit \"%s\" failed to roll back.", instantTime));
return -1;
}
}

View File

@@ -74,14 +74,14 @@ public class StatsCommand implements CommandMarker {
List<Comparable[]> rows = new ArrayList<>();
DecimalFormat df = new DecimalFormat("#.00");
for (HoodieInstant commitTime : timeline.getInstants().collect(Collectors.toList())) {
for (HoodieInstant instantTime : timeline.getInstants().collect(Collectors.toList())) {
String waf = "0";
HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitTime).get(),
HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(instantTime).get(),
HoodieCommitMetadata.class);
if (commit.fetchTotalUpdateRecordsWritten() > 0) {
waf = df.format((float) commit.fetchTotalRecordsWritten() / commit.fetchTotalUpdateRecordsWritten());
}
rows.add(new Comparable[] {commitTime.getTimestamp(), commit.fetchTotalUpdateRecordsWritten(),
rows.add(new Comparable[] {instantTime.getTimestamp(), commit.fetchTotalUpdateRecordsWritten(),
commit.fetchTotalRecordsWritten(), waf});
totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
totalRecordsWritten += commit.fetchTotalRecordsWritten();
@@ -97,8 +97,8 @@ public class StatsCommand implements CommandMarker {
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}
private Comparable[] printFileSizeHistogram(String commitTime, Snapshot s) {
return new Comparable[] {commitTime, s.getMin(), s.getValue(0.1), s.getMedian(), s.getMean(), s.get95thPercentile(),
private Comparable[] printFileSizeHistogram(String instantTime, Snapshot s) {
return new Comparable[] {instantTime, s.getMin(), s.getValue(0.1), s.getMedian(), s.getMean(), s.get95thPercentile(),
s.getMax(), s.size(), s.getStdDev()};
}
@@ -121,19 +121,19 @@ public class StatsCommand implements CommandMarker {
Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
HashMap<String, Histogram> commitHistoMap = new HashMap<>();
for (FileStatus fileStatus : statuses) {
String commitTime = FSUtils.getCommitTime(fileStatus.getPath().getName());
String instantTime = FSUtils.getCommitTime(fileStatus.getPath().getName());
long sz = fileStatus.getLen();
if (!commitHistoMap.containsKey(commitTime)) {
commitHistoMap.put(commitTime, new Histogram(new UniformReservoir(MAX_FILES)));
if (!commitHistoMap.containsKey(instantTime)) {
commitHistoMap.put(instantTime, new Histogram(new UniformReservoir(MAX_FILES)));
}
commitHistoMap.get(commitTime).update(sz);
commitHistoMap.get(instantTime).update(sz);
globalHistogram.update(sz);
}
List<Comparable[]> rows = new ArrayList<>();
for (String commitTime : commitHistoMap.keySet()) {
Snapshot s = commitHistoMap.get(commitTime).getSnapshot();
rows.add(printFileSizeHistogram(commitTime, s));
for (String instantTime : commitHistoMap.keySet()) {
Snapshot s = commitHistoMap.get(instantTime).getSnapshot();
rows.add(printFileSizeHistogram(instantTime, s));
}
Snapshot s = globalHistogram.getSnapshot();
rows.add(printFileSizeHistogram("ALL", s));

View File

@@ -147,11 +147,11 @@ class DedupeSparkJob(basePath: String,
// 2. Remove duplicates from the bad files
dupeFixPlan.foreach { case (fileName, keysToSkip) =>
val commitTime = FSUtils.getCommitTime(fileNameToPathMap(fileName).getName)
val instantTime = FSUtils.getCommitTime(fileNameToPathMap(fileName).getName)
val badFilePath = new Path(s"$repairOutputPath/${fileNameToPathMap(fileName).getName}.bad")
val newFilePath = new Path(s"$repairOutputPath/${fileNameToPathMap(fileName).getName}")
LOG.info(" Skipping and writing new file for : " + fileName)
SparkHelpers.skipKeysAndWriteNewFile(commitTime, fs, badFilePath, newFilePath, dupeFixPlan(fileName))
SparkHelpers.skipKeysAndWriteNewFile(instantTime, fs, badFilePath, newFilePath, dupeFixPlan(fileName))
fs.delete(badFilePath, false)
}

View File

@@ -38,14 +38,14 @@ import scala.collection.mutable._
object SparkHelpers {
@throws[Exception]
def skipKeysAndWriteNewFile(commitTime: String, fs: FileSystem, sourceFile: Path, destinationFile: Path, keysToSkip: Set[String]) {
def skipKeysAndWriteNewFile(instantTime: String, fs: FileSystem, sourceFile: Path, destinationFile: Path, keysToSkip: Set[String]) {
val sourceRecords = ParquetUtils.readAvroRecords(fs.getConf, sourceFile)
val schema: Schema = sourceRecords.get(0).getSchema
val filter: BloomFilter = BloomFilterFactory.createBloomFilter(HoodieIndexConfig.DEFAULT_BLOOM_FILTER_NUM_ENTRIES.toInt, HoodieIndexConfig.DEFAULT_BLOOM_FILTER_FPP.toDouble,
HoodieIndexConfig.DEFAULT_HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES.toInt, HoodieIndexConfig.DEFAULT_BLOOM_INDEX_FILTER_TYPE);
val writeSupport: HoodieAvroWriteSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter)
val parquetConfig: HoodieParquetConfig = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP, HoodieStorageConfig.DEFAULT_PARQUET_BLOCK_SIZE_BYTES.toInt, HoodieStorageConfig.DEFAULT_PARQUET_PAGE_SIZE_BYTES.toInt, HoodieStorageConfig.DEFAULT_PARQUET_FILE_MAX_BYTES.toInt, fs.getConf, HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO.toDouble)
val writer = new HoodieParquetWriter[HoodieJsonPayload, IndexedRecord](commitTime, destinationFile, parquetConfig, schema)
val writer = new HoodieParquetWriter[HoodieJsonPayload, IndexedRecord](instantTime, destinationFile, parquetConfig, schema)
for (rec <- sourceRecords) {
val key: String = rec.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString
if (!keysToSkip.contains(key)) {