diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java index 7c304980e..facbcede1 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java @@ -86,7 +86,7 @@ public class CleansCommand implements CommandMarker { } @CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean") - public String showCleanPartitions(@CliOption(key = {"clean"}, help = "clean to show") final String commitTime, + public String showCleanPartitions(@CliOption(key = {"clean"}, help = "clean to show") final String instantTime, @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @@ -96,10 +96,10 @@ public class CleansCommand implements CommandMarker { HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants(); - HoodieInstant cleanInstant = new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, commitTime); + HoodieInstant cleanInstant = new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, instantTime); if (!timeline.containsInstant(cleanInstant)) { - return "Clean " + commitTime + " not found in metadata " + timeline; + return "Clean " + instantTime + " not found in metadata " + timeline; } HoodieCleanMetadata cleanMetadata = diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CommitsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CommitsCommand.java index 030dcdb1c..45c84257c 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CommitsCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CommitsCommand.java @@ -220,18 +220,18 @@ public class CommitsCommand implements CommandMarker { } @CliCommand(value = "commit rollback", help = "Rollback a commit") - public String rollbackCommit(@CliOption(key = {"commit"}, help = "Commit to rollback") final String commitTime, + public String rollbackCommit(@CliOption(key = {"commit"}, help = "Commit to rollback") final String instantTime, @CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath) throws Exception { HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline(); HoodieTimeline completedTimeline = activeTimeline.getCommitsTimeline().filterCompletedInstants(); - HoodieTimeline filteredTimeline = completedTimeline.filter(instant -> instant.getTimestamp().equals(commitTime)); + HoodieTimeline filteredTimeline = completedTimeline.filter(instant -> instant.getTimestamp().equals(instantTime)); if (filteredTimeline.empty()) { - return "Commit " + commitTime + " not found in Commits " + completedTimeline; + return "Commit " + instantTime + " not found in Commits " + completedTimeline; } SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath); - sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), commitTime, + sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), instantTime, HoodieCLI.getTableMetaClient().getBasePath()); Process process = sparkLauncher.launch(); InputStreamConsumer.captureOutput(process); @@ -239,16 +239,16 @@ public class CommitsCommand implements CommandMarker { // Refresh the current refreshCommits(); if (exitCode != 0) { - return "Commit " + commitTime + " failed to roll back"; + return "Commit " + instantTime + " failed to roll back"; } - return "Commit " + commitTime + " rolled back"; + return "Commit " + instantTime + " rolled back"; } @CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit") public String showCommitPartitions( @CliOption(key = {"createView"}, mandatory = false, help = "view name to store output table", unspecifiedDefaultValue = "") final String exportTableName, - @CliOption(key = {"commit"}, help = "Commit to show") final String commitTime, + @CliOption(key = {"commit"}, help = "Commit to show") final String instantTime, @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @@ -258,10 +258,10 @@ public class CommitsCommand implements CommandMarker { HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants(); - HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime); + HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime); if (!timeline.containsInstant(commitInstant)) { - return "Commit " + commitTime + " not found in Commits " + timeline; + return "Commit " + instantTime + " not found in Commits " + timeline; } HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get(), HoodieCommitMetadata.class); @@ -306,7 +306,7 @@ public class CommitsCommand implements CommandMarker { public String showCommitFiles( @CliOption(key = {"createView"}, mandatory = false, help = "view name to store output table", unspecifiedDefaultValue = "") final String exportTableName, - @CliOption(key = {"commit"}, help = "Commit to show") final String commitTime, + @CliOption(key = {"commit"}, help = "Commit to show") final String instantTime, @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @@ -316,10 +316,10 @@ public class CommitsCommand implements CommandMarker { HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants(); - HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime); + HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime); if (!timeline.containsInstant(commitInstant)) { - return "Commit " + commitTime + " not found in Commits " + timeline; + return "Commit " + instantTime + " not found in Commits " + timeline; } HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get(), HoodieCommitMetadata.class); diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SavepointsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SavepointsCommand.java index 9ef15ac80..82c3edf20 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SavepointsCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SavepointsCommand.java @@ -92,7 +92,7 @@ public class SavepointsCommand implements CommandMarker { @CliCommand(value = "savepoint rollback", help = "Savepoint a commit") public String rollbackToSavepoint( - @CliOption(key = {"savepoint"}, help = "Savepoint to rollback") final String commitTime, + @CliOption(key = {"savepoint"}, help = "Savepoint to rollback") final String instantTime, @CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath) throws Exception { HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient(); @@ -101,14 +101,14 @@ public class SavepointsCommand implements CommandMarker { } HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants(); - HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime); + HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime); if (!timeline.containsInstant(commitInstant)) { - return "Commit " + commitTime + " not found in Commits " + timeline; + return "Commit " + instantTime + " not found in Commits " + timeline; } SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath); - sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), commitTime, + sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), instantTime, metaClient.getBasePath()); Process process = sparkLauncher.launch(); InputStreamConsumer.captureOutput(process); @@ -116,9 +116,9 @@ public class SavepointsCommand implements CommandMarker { // Refresh the current refreshMetaClient(); if (exitCode != 0) { - return "Savepoint " + commitTime + " failed to roll back"; + return "Savepoint " + instantTime + " failed to roll back"; } - return "Savepoint " + commitTime + " rolled back"; + return "Savepoint " + instantTime + " rolled back"; } @CliCommand(value = "savepoints refresh", help = "Refresh the savepoints") @@ -128,24 +128,24 @@ public class SavepointsCommand implements CommandMarker { } @CliCommand(value = "savepoint delete", help = "Delete the savepoint") - public String deleteSavepoint(@CliOption(key = {"commit"}, help = "Delete a savepoint") final String commitTime) throws Exception { + public String deleteSavepoint(@CliOption(key = {"commit"}, help = "Delete a savepoint") final String instantTime) throws Exception { HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient(); HoodieTimeline completedInstants = metaClient.getActiveTimeline().getSavePointTimeline().filterCompletedInstants(); if (completedInstants.empty()) { throw new HoodieException("There are no completed savepoint to run delete"); } - HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, commitTime); + HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, instantTime); if (!completedInstants.containsInstant(savePoint)) { - return "Commit " + commitTime + " not found in Commits " + completedInstants; + return "Commit " + instantTime + " not found in Commits " + completedInstants; } try (JavaSparkContext jsc = SparkUtil.initJavaSparkConf("Delete Savepoint")) { HoodieWriteClient client = createHoodieClient(jsc, metaClient.getBasePath()); - client.deleteSavepoint(commitTime); + client.deleteSavepoint(instantTime); refreshMetaClient(); } - return "Savepoint " + commitTime + " deleted"; + return "Savepoint " + instantTime + " deleted"; } private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception { diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java index 6e5b5f697..b67181e2f 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java @@ -282,13 +282,13 @@ public class SparkMain { return 0; } - private static int rollback(JavaSparkContext jsc, String commitTime, String basePath) throws Exception { + private static int rollback(JavaSparkContext jsc, String instantTime, String basePath) throws Exception { HoodieWriteClient client = createHoodieClient(jsc, basePath); - if (client.rollback(commitTime)) { - LOG.info(String.format("The commit \"%s\" rolled back.", commitTime)); + if (client.rollback(instantTime)) { + LOG.info(String.format("The commit \"%s\" rolled back.", instantTime)); return 0; } else { - LOG.info(String.format("The commit \"%s\" failed to roll back.", commitTime)); + LOG.info(String.format("The commit \"%s\" failed to roll back.", instantTime)); return -1; } } diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java index b05aee27b..eba9e9cb6 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java @@ -74,14 +74,14 @@ public class StatsCommand implements CommandMarker { List rows = new ArrayList<>(); DecimalFormat df = new DecimalFormat("#.00"); - for (HoodieInstant commitTime : timeline.getInstants().collect(Collectors.toList())) { + for (HoodieInstant instantTime : timeline.getInstants().collect(Collectors.toList())) { String waf = "0"; - HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitTime).get(), + HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(instantTime).get(), HoodieCommitMetadata.class); if (commit.fetchTotalUpdateRecordsWritten() > 0) { waf = df.format((float) commit.fetchTotalRecordsWritten() / commit.fetchTotalUpdateRecordsWritten()); } - rows.add(new Comparable[] {commitTime.getTimestamp(), commit.fetchTotalUpdateRecordsWritten(), + rows.add(new Comparable[] {instantTime.getTimestamp(), commit.fetchTotalUpdateRecordsWritten(), commit.fetchTotalRecordsWritten(), waf}); totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten(); totalRecordsWritten += commit.fetchTotalRecordsWritten(); @@ -97,8 +97,8 @@ public class StatsCommand implements CommandMarker { return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows); } - private Comparable[] printFileSizeHistogram(String commitTime, Snapshot s) { - return new Comparable[] {commitTime, s.getMin(), s.getValue(0.1), s.getMedian(), s.getMean(), s.get95thPercentile(), + private Comparable[] printFileSizeHistogram(String instantTime, Snapshot s) { + return new Comparable[] {instantTime, s.getMin(), s.getValue(0.1), s.getMedian(), s.getMean(), s.get95thPercentile(), s.getMax(), s.size(), s.getStdDev()}; } @@ -121,19 +121,19 @@ public class StatsCommand implements CommandMarker { Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES)); HashMap commitHistoMap = new HashMap<>(); for (FileStatus fileStatus : statuses) { - String commitTime = FSUtils.getCommitTime(fileStatus.getPath().getName()); + String instantTime = FSUtils.getCommitTime(fileStatus.getPath().getName()); long sz = fileStatus.getLen(); - if (!commitHistoMap.containsKey(commitTime)) { - commitHistoMap.put(commitTime, new Histogram(new UniformReservoir(MAX_FILES))); + if (!commitHistoMap.containsKey(instantTime)) { + commitHistoMap.put(instantTime, new Histogram(new UniformReservoir(MAX_FILES))); } - commitHistoMap.get(commitTime).update(sz); + commitHistoMap.get(instantTime).update(sz); globalHistogram.update(sz); } List rows = new ArrayList<>(); - for (String commitTime : commitHistoMap.keySet()) { - Snapshot s = commitHistoMap.get(commitTime).getSnapshot(); - rows.add(printFileSizeHistogram(commitTime, s)); + for (String instantTime : commitHistoMap.keySet()) { + Snapshot s = commitHistoMap.get(instantTime).getSnapshot(); + rows.add(printFileSizeHistogram(instantTime, s)); } Snapshot s = globalHistogram.getSnapshot(); rows.add(printFileSizeHistogram("ALL", s)); diff --git a/hudi-cli/src/main/scala/org/apache/hudi/cli/DedupeSparkJob.scala b/hudi-cli/src/main/scala/org/apache/hudi/cli/DedupeSparkJob.scala index d7b196d8c..71a7988ff 100644 --- a/hudi-cli/src/main/scala/org/apache/hudi/cli/DedupeSparkJob.scala +++ b/hudi-cli/src/main/scala/org/apache/hudi/cli/DedupeSparkJob.scala @@ -147,11 +147,11 @@ class DedupeSparkJob(basePath: String, // 2. Remove duplicates from the bad files dupeFixPlan.foreach { case (fileName, keysToSkip) => - val commitTime = FSUtils.getCommitTime(fileNameToPathMap(fileName).getName) + val instantTime = FSUtils.getCommitTime(fileNameToPathMap(fileName).getName) val badFilePath = new Path(s"$repairOutputPath/${fileNameToPathMap(fileName).getName}.bad") val newFilePath = new Path(s"$repairOutputPath/${fileNameToPathMap(fileName).getName}") LOG.info(" Skipping and writing new file for : " + fileName) - SparkHelpers.skipKeysAndWriteNewFile(commitTime, fs, badFilePath, newFilePath, dupeFixPlan(fileName)) + SparkHelpers.skipKeysAndWriteNewFile(instantTime, fs, badFilePath, newFilePath, dupeFixPlan(fileName)) fs.delete(badFilePath, false) } diff --git a/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala b/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala index f6daea299..4c8e4c1b0 100644 --- a/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala +++ b/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala @@ -38,14 +38,14 @@ import scala.collection.mutable._ object SparkHelpers { @throws[Exception] - def skipKeysAndWriteNewFile(commitTime: String, fs: FileSystem, sourceFile: Path, destinationFile: Path, keysToSkip: Set[String]) { + def skipKeysAndWriteNewFile(instantTime: String, fs: FileSystem, sourceFile: Path, destinationFile: Path, keysToSkip: Set[String]) { val sourceRecords = ParquetUtils.readAvroRecords(fs.getConf, sourceFile) val schema: Schema = sourceRecords.get(0).getSchema val filter: BloomFilter = BloomFilterFactory.createBloomFilter(HoodieIndexConfig.DEFAULT_BLOOM_FILTER_NUM_ENTRIES.toInt, HoodieIndexConfig.DEFAULT_BLOOM_FILTER_FPP.toDouble, HoodieIndexConfig.DEFAULT_HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES.toInt, HoodieIndexConfig.DEFAULT_BLOOM_INDEX_FILTER_TYPE); val writeSupport: HoodieAvroWriteSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter) val parquetConfig: HoodieParquetConfig = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP, HoodieStorageConfig.DEFAULT_PARQUET_BLOCK_SIZE_BYTES.toInt, HoodieStorageConfig.DEFAULT_PARQUET_PAGE_SIZE_BYTES.toInt, HoodieStorageConfig.DEFAULT_PARQUET_FILE_MAX_BYTES.toInt, fs.getConf, HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO.toDouble) - val writer = new HoodieParquetWriter[HoodieJsonPayload, IndexedRecord](commitTime, destinationFile, parquetConfig, schema) + val writer = new HoodieParquetWriter[HoodieJsonPayload, IndexedRecord](instantTime, destinationFile, parquetConfig, schema) for (rec <- sourceRecords) { val key: String = rec.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString if (!keysToSkip.contains(key)) { diff --git a/hudi-client/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java b/hudi-client/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java index ae75bb80f..275e5d9bb 100644 --- a/hudi-client/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java +++ b/hudi-client/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java @@ -96,23 +96,23 @@ public abstract class AbstractHoodieWriteClient e } /** - * Commit changes performed at the given commitTime marker. + * Commit changes performed at the given instantTime marker. */ - public boolean commit(String commitTime, JavaRDD writeStatuses) { - return commit(commitTime, writeStatuses, Option.empty()); + public boolean commit(String instantTime, JavaRDD writeStatuses) { + return commit(instantTime, writeStatuses, Option.empty()); } /** - * Commit changes performed at the given commitTime marker. + * Commit changes performed at the given instantTime marker. */ - public boolean commit(String commitTime, JavaRDD writeStatuses, + public boolean commit(String instantTime, JavaRDD writeStatuses, Option> extraMetadata) { HoodieTableMetaClient metaClient = createMetaClient(false); - return commit(commitTime, writeStatuses, extraMetadata, metaClient.getCommitActionType()); + return commit(instantTime, writeStatuses, extraMetadata, metaClient.getCommitActionType()); } protected JavaRDD updateIndexAndCommitIfNeeded(JavaRDD writeStatusRDD, HoodieTable table, - String commitTime) { + String instantTime) { // cache writeStatusRDD before updating index, so that all actions before this are not triggered again for future // RDD actions that are performed after updating the index. writeStatusRDD = writeStatusRDD.persist(config.getWriteStatusStorageLevel()); @@ -121,26 +121,26 @@ public abstract class AbstractHoodieWriteClient e JavaRDD statuses = index.updateLocation(writeStatusRDD, jsc, table); metrics.updateIndexMetrics(UPDATE_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop())); // Trigger the insert and collect statuses - commitOnAutoCommit(commitTime, statuses, table.getMetaClient().getCommitActionType()); + commitOnAutoCommit(instantTime, statuses, table.getMetaClient().getCommitActionType()); return statuses; } - protected void commitOnAutoCommit(String commitTime, JavaRDD resultRDD, String actionType) { + protected void commitOnAutoCommit(String instantTime, JavaRDD resultRDD, String actionType) { if (config.shouldAutoCommit()) { - LOG.info("Auto commit enabled: Committing " + commitTime); - boolean commitResult = commit(commitTime, resultRDD, Option.empty(), actionType); + LOG.info("Auto commit enabled: Committing " + instantTime); + boolean commitResult = commit(instantTime, resultRDD, Option.empty(), actionType); if (!commitResult) { - throw new HoodieCommitException("Failed to commit " + commitTime); + throw new HoodieCommitException("Failed to commit " + instantTime); } } else { - LOG.info("Auto commit disabled for " + commitTime); + LOG.info("Auto commit disabled for " + instantTime); } } - private boolean commit(String commitTime, JavaRDD writeStatuses, + private boolean commit(String instantTime, JavaRDD writeStatuses, Option> extraMetadata, String actionType) { - LOG.info("Commiting " + commitTime); + LOG.info("Commiting " + instantTime); // Create a Hoodie table which encapsulated the commits and files visible HoodieTable table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc); @@ -152,7 +152,7 @@ public abstract class AbstractHoodieWriteClient e updateMetadataAndRollingStats(actionType, metadata, stats); // Finalize write - finalizeWrite(table, commitTime, stats); + finalizeWrite(table, instantTime, stats); // add in extra metadata if (extraMetadata.isPresent()) { @@ -162,23 +162,23 @@ public abstract class AbstractHoodieWriteClient e metadata.setOperationType(operationType); try { - activeTimeline.saveAsComplete(new HoodieInstant(true, actionType, commitTime), + activeTimeline.saveAsComplete(new HoodieInstant(true, actionType, instantTime), Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); - postCommit(metadata, commitTime, extraMetadata); + postCommit(metadata, instantTime, extraMetadata); if (writeContext != null) { long durationInMs = metrics.getDurationInMs(writeContext.stop()); - metrics.updateCommitMetrics(HoodieActiveTimeline.COMMIT_FORMATTER.parse(commitTime).getTime(), durationInMs, + metrics.updateCommitMetrics(HoodieActiveTimeline.COMMIT_FORMATTER.parse(instantTime).getTime(), durationInMs, metadata, actionType); writeContext = null; } - LOG.info("Committed " + commitTime); + LOG.info("Committed " + instantTime); } catch (IOException e) { - throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + commitTime, + throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + instantTime, e); } catch (ParseException e) { - throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + commitTime + throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + instantTime + "Instant time is not of valid format", e); } return true; diff --git a/hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java b/hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java index 4e17b3732..7ccb2a47b 100644 --- a/hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java +++ b/hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java @@ -167,13 +167,13 @@ public class HoodieWriteClient extends AbstractHo } /** - * Upsert a batch of new records into Hoodie table at the supplied commitTime. + * Upsert a batch of new records into Hoodie table at the supplied instantTime. * * @param records JavaRDD of hoodieRecords to upsert - * @param commitTime Instant time of the commit + * @param instantTime Instant time of the commit * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts */ - public JavaRDD upsert(JavaRDD> records, final String commitTime) { + public JavaRDD upsert(JavaRDD> records, final String instantTime) { HoodieTable table = getTableAndInitCtx(WriteOperationType.UPSERT); setOperationType(WriteOperationType.UPSERT); try { @@ -185,34 +185,34 @@ public class HoodieWriteClient extends AbstractHo // perform index loop up to get existing location of records JavaRDD> taggedRecords = getIndex().tagLocation(dedupedRecords, jsc, table); metrics.updateIndexMetrics(LOOKUP_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop())); - return upsertRecordsInternal(taggedRecords, commitTime, table, true); + return upsertRecordsInternal(taggedRecords, instantTime, table, true); } catch (Throwable e) { if (e instanceof HoodieUpsertException) { throw (HoodieUpsertException) e; } - throw new HoodieUpsertException("Failed to upsert for commit time " + commitTime, e); + throw new HoodieUpsertException("Failed to upsert for commit time " + instantTime, e); } } /** - * Upserts the given prepared records into the Hoodie table, at the supplied commitTime. + * Upserts the given prepared records into the Hoodie table, at the supplied instantTime. *

* This implementation requires that the input records are already tagged, and de-duped if needed. * * @param preppedRecords Prepared HoodieRecords to upsert - * @param commitTime Instant time of the commit + * @param instantTime Instant time of the commit * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts */ - public JavaRDD upsertPreppedRecords(JavaRDD> preppedRecords, final String commitTime) { + public JavaRDD upsertPreppedRecords(JavaRDD> preppedRecords, final String instantTime) { HoodieTable table = getTableAndInitCtx(WriteOperationType.UPSERT_PREPPED); setOperationType(WriteOperationType.UPSERT_PREPPED); try { - return upsertRecordsInternal(preppedRecords, commitTime, table, true); + return upsertRecordsInternal(preppedRecords, instantTime, table, true); } catch (Throwable e) { if (e instanceof HoodieUpsertException) { throw (HoodieUpsertException) e; } - throw new HoodieUpsertException("Failed to upsert prepared records for commit time " + commitTime, e); + throw new HoodieUpsertException("Failed to upsert prepared records for commit time " + instantTime, e); } } @@ -223,10 +223,10 @@ public class HoodieWriteClient extends AbstractHo * alignment, as with upsert(), by profiling the workload * * @param records HoodieRecords to insert - * @param commitTime Instant time of the commit + * @param instantTime Instant time of the commit * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts */ - public JavaRDD insert(JavaRDD> records, final String commitTime) { + public JavaRDD insert(JavaRDD> records, final String instantTime) { HoodieTable table = getTableAndInitCtx(WriteOperationType.INSERT); setOperationType(WriteOperationType.INSERT); try { @@ -234,36 +234,36 @@ public class HoodieWriteClient extends AbstractHo JavaRDD> dedupedRecords = combineOnCondition(config.shouldCombineBeforeInsert(), records, config.getInsertShuffleParallelism()); - return upsertRecordsInternal(dedupedRecords, commitTime, table, false); + return upsertRecordsInternal(dedupedRecords, instantTime, table, false); } catch (Throwable e) { if (e instanceof HoodieInsertException) { throw e; } - throw new HoodieInsertException("Failed to insert for commit time " + commitTime, e); + throw new HoodieInsertException("Failed to insert for commit time " + instantTime, e); } } /** - * Inserts the given prepared records into the Hoodie table, at the supplied commitTime. + * Inserts the given prepared records into the Hoodie table, at the supplied instantTime. *

* This implementation skips the index check, skips de-duping and is able to leverage benefits such as small file * handling/blocking alignment, as with insert(), by profiling the workload. The prepared HoodieRecords should be * de-duped if needed. * * @param preppedRecords HoodieRecords to insert - * @param commitTime Instant time of the commit + * @param instantTime Instant time of the commit * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts */ - public JavaRDD insertPreppedRecords(JavaRDD> preppedRecords, final String commitTime) { + public JavaRDD insertPreppedRecords(JavaRDD> preppedRecords, final String instantTime) { HoodieTable table = getTableAndInitCtx(WriteOperationType.INSERT_PREPPED); setOperationType(WriteOperationType.INSERT_PREPPED); try { - return upsertRecordsInternal(preppedRecords, commitTime, table, false); + return upsertRecordsInternal(preppedRecords, instantTime, table, false); } catch (Throwable e) { if (e instanceof HoodieInsertException) { throw e; } - throw new HoodieInsertException("Failed to insert prepared records for commit time " + commitTime, e); + throw new HoodieInsertException("Failed to insert prepared records for commit time " + instantTime, e); } } @@ -275,11 +275,11 @@ public class HoodieWriteClient extends AbstractHo * the numbers of files with less memory compared to the {@link HoodieWriteClient#insert(JavaRDD, String)} * * @param records HoodieRecords to insert - * @param commitTime Instant time of the commit + * @param instantTime Instant time of the commit * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts */ - public JavaRDD bulkInsert(JavaRDD> records, final String commitTime) { - return bulkInsert(records, commitTime, Option.empty()); + public JavaRDD bulkInsert(JavaRDD> records, final String instantTime) { + return bulkInsert(records, instantTime, Option.empty()); } /** @@ -292,12 +292,12 @@ public class HoodieWriteClient extends AbstractHo * {@link UserDefinedBulkInsertPartitioner}. * * @param records HoodieRecords to insert - * @param commitTime Instant time of the commit + * @param instantTime Instant time of the commit * @param bulkInsertPartitioner If specified then it will be used to partition input records before they are inserted * into hoodie. * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts */ - public JavaRDD bulkInsert(JavaRDD> records, final String commitTime, + public JavaRDD bulkInsert(JavaRDD> records, final String instantTime, Option bulkInsertPartitioner) { HoodieTable table = getTableAndInitCtx(WriteOperationType.BULK_INSERT); setOperationType(WriteOperationType.BULK_INSERT); @@ -306,12 +306,12 @@ public class HoodieWriteClient extends AbstractHo JavaRDD> dedupedRecords = combineOnCondition(config.shouldCombineBeforeInsert(), records, config.getInsertShuffleParallelism()); - return bulkInsertInternal(dedupedRecords, commitTime, table, bulkInsertPartitioner); + return bulkInsertInternal(dedupedRecords, instantTime, table, bulkInsertPartitioner); } catch (Throwable e) { if (e instanceof HoodieInsertException) { throw e; } - throw new HoodieInsertException("Failed to bulk insert for commit time " + commitTime, e); + throw new HoodieInsertException("Failed to bulk insert for commit time " + instantTime, e); } } @@ -326,34 +326,34 @@ public class HoodieWriteClient extends AbstractHo * {@link UserDefinedBulkInsertPartitioner}. * * @param preppedRecords HoodieRecords to insert - * @param commitTime Instant time of the commit + * @param instantTime Instant time of the commit * @param bulkInsertPartitioner If specified then it will be used to partition input records before they are inserted * into hoodie. * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts */ - public JavaRDD bulkInsertPreppedRecords(JavaRDD> preppedRecords, final String commitTime, + public JavaRDD bulkInsertPreppedRecords(JavaRDD> preppedRecords, final String instantTime, Option bulkInsertPartitioner) { HoodieTable table = getTableAndInitCtx(WriteOperationType.BULK_INSERT_PREPPED); setOperationType(WriteOperationType.BULK_INSERT_PREPPED); try { - return bulkInsertInternal(preppedRecords, commitTime, table, bulkInsertPartitioner); + return bulkInsertInternal(preppedRecords, instantTime, table, bulkInsertPartitioner); } catch (Throwable e) { if (e instanceof HoodieInsertException) { throw e; } - throw new HoodieInsertException("Failed to bulk insert prepared records for commit time " + commitTime, e); + throw new HoodieInsertException("Failed to bulk insert prepared records for commit time " + instantTime, e); } } /** - * Deletes a list of {@link HoodieKey}s from the Hoodie table, at the supplied commitTime {@link HoodieKey}s will be + * Deletes a list of {@link HoodieKey}s from the Hoodie table, at the supplied instantTime {@link HoodieKey}s will be * de-duped and non existent keys will be removed before deleting. * * @param keys {@link List} of {@link HoodieKey}s to be deleted - * @param commitTime Commit time handle + * @param instantTime Commit time handle * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts */ - public JavaRDD delete(JavaRDD keys, final String commitTime) { + public JavaRDD delete(JavaRDD keys, final String instantTime) { HoodieTable table = getTableAndInitCtx(WriteOperationType.DELETE); setOperationType(WriteOperationType.DELETE); try { @@ -370,23 +370,23 @@ public class HoodieWriteClient extends AbstractHo JavaRDD> taggedValidRecords = taggedRecords.filter(HoodieRecord::isCurrentLocationKnown); if (!taggedValidRecords.isEmpty()) { metrics.updateIndexMetrics(LOOKUP_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop())); - return upsertRecordsInternal(taggedValidRecords, commitTime, table, true); + return upsertRecordsInternal(taggedValidRecords, instantTime, table, true); } else { // if entire set of keys are non existent - saveWorkloadProfileMetadataToInflight(new WorkloadProfile(jsc.emptyRDD()), table, commitTime); + saveWorkloadProfileMetadataToInflight(new WorkloadProfile(jsc.emptyRDD()), table, instantTime); JavaRDD writeStatusRDD = jsc.emptyRDD(); - commitOnAutoCommit(commitTime, writeStatusRDD, table.getMetaClient().getCommitActionType()); + commitOnAutoCommit(instantTime, writeStatusRDD, table.getMetaClient().getCommitActionType()); return writeStatusRDD; } } catch (Throwable e) { if (e instanceof HoodieUpsertException) { throw (HoodieUpsertException) e; } - throw new HoodieUpsertException("Failed to delete for commit time " + commitTime, e); + throw new HoodieUpsertException("Failed to delete for commit time " + instantTime, e); } } - private JavaRDD bulkInsertInternal(JavaRDD> dedupedRecords, String commitTime, + private JavaRDD bulkInsertInternal(JavaRDD> dedupedRecords, String instantTime, HoodieTable table, Option bulkInsertPartitioner) { final JavaRDD> repartitionedRecords; final int parallelism = config.getBulkInsertShuffleParallelism(); @@ -407,13 +407,13 @@ public class HoodieWriteClient extends AbstractHo IntStream.range(0, parallelism).mapToObj(i -> FSUtils.createNewFileIdPfx()).collect(Collectors.toList()); table.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED, - table.getMetaClient().getCommitActionType(), commitTime), Option.empty()); + table.getMetaClient().getCommitActionType(), instantTime), Option.empty()); JavaRDD writeStatusRDD = repartitionedRecords - .mapPartitionsWithIndex(new BulkInsertMapFunction(commitTime, config, table, fileIDPrefixes), true) + .mapPartitionsWithIndex(new BulkInsertMapFunction(instantTime, config, table, fileIDPrefixes), true) .flatMap(List::iterator); - return updateIndexAndCommitIfNeeded(writeStatusRDD, table, commitTime); + return updateIndexAndCommitIfNeeded(writeStatusRDD, table, instantTime); } private JavaRDD> combineOnCondition(boolean condition, JavaRDD> records, @@ -427,7 +427,7 @@ public class HoodieWriteClient extends AbstractHo * are unknown across batches Inserts (which are new parquet files) are rolled back based on commit time. // TODO : * Create a new WorkloadProfile metadata file instead of using HoodieCommitMetadata */ - private void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, HoodieTable table, String commitTime) + private void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, HoodieTable table, String instantTime) throws HoodieCommitException { try { HoodieCommitMetadata metadata = new HoodieCommitMetadata(); @@ -446,15 +446,15 @@ public class HoodieWriteClient extends AbstractHo HoodieActiveTimeline activeTimeline = table.getActiveTimeline(); String commitActionType = table.getMetaClient().getCommitActionType(); - HoodieInstant requested = new HoodieInstant(State.REQUESTED, commitActionType, commitTime); + HoodieInstant requested = new HoodieInstant(State.REQUESTED, commitActionType, instantTime); activeTimeline.transitionRequestedToInflight(requested, Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); } catch (IOException io) { - throw new HoodieCommitException("Failed to commit " + commitTime + " unable to save inflight metadata ", io); + throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", io); } } - private JavaRDD upsertRecordsInternal(JavaRDD> preppedRecords, String commitTime, + private JavaRDD upsertRecordsInternal(JavaRDD> preppedRecords, String instantTime, HoodieTable hoodieTable, final boolean isUpsert) { // Cache the tagged records, so we don't end up computing both @@ -469,7 +469,7 @@ public class HoodieWriteClient extends AbstractHo if (hoodieTable.isWorkloadProfileNeeded()) { profile = new WorkloadProfile(preppedRecords); LOG.info("Workload profile :" + profile); - saveWorkloadProfileMetadataToInflight(profile, hoodieTable, commitTime); + saveWorkloadProfileMetadataToInflight(profile, hoodieTable, instantTime); } // partition using the insert partitioner @@ -477,13 +477,13 @@ public class HoodieWriteClient extends AbstractHo JavaRDD> partitionedRecords = partition(preppedRecords, partitioner); JavaRDD writeStatusRDD = partitionedRecords.mapPartitionsWithIndex((partition, recordItr) -> { if (isUpsert) { - return hoodieTable.handleUpsertPartition(commitTime, partition, recordItr, partitioner); + return hoodieTable.handleUpsertPartition(instantTime, partition, recordItr, partitioner); } else { - return hoodieTable.handleInsertPartition(commitTime, partition, recordItr, partitioner); + return hoodieTable.handleInsertPartition(instantTime, partition, recordItr, partitioner); } }, true).flatMap(List::iterator); - return updateIndexAndCommitIfNeeded(writeStatusRDD, hoodieTable, commitTime); + return updateIndexAndCommitIfNeeded(writeStatusRDD, hoodieTable, instantTime); } private Partitioner getPartitioner(HoodieTable table, boolean isUpsert, WorkloadProfile profile) { @@ -551,7 +551,7 @@ public class HoodieWriteClient extends AbstractHo } /** - * Savepoint a specific commit. Latest version of data files as of the passed in commitTime will be referenced in the + * Savepoint a specific commit. Latest version of data files as of the passed in instantTime will be referenced in the * savepoint and will never be cleaned. The savepointed commit will never be rolledback or archived. *

* This gives an option to rollback the state to the savepoint anytime. Savepoint needs to be manually created and @@ -559,19 +559,19 @@ public class HoodieWriteClient extends AbstractHo *

* Savepoint should be on a commit that could not have been cleaned. * - * @param commitTime - commit that should be savepointed + * @param instantTime - commit that should be savepointed * @param user - User creating the savepoint * @param comment - Comment for the savepoint * @return true if the savepoint was created successfully */ - public boolean savepoint(String commitTime, String user, String comment) { + public boolean savepoint(String instantTime, String user, String comment) { HoodieTable table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc); if (table.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) { throw new UnsupportedOperationException("Savepointing is not supported or MergeOnRead table types"); } Option cleanInstant = table.getCompletedCleanTimeline().lastInstant(); - HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime); + HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime); if (!table.getCompletedCommitsTimeline().containsInstant(commitInstant)) { throw new HoodieSavepointException("Could not savepoint non-existing commit " + commitInstant); } @@ -589,8 +589,8 @@ public class HoodieWriteClient extends AbstractHo // Cannot allow savepoint time on a commit that could have been cleaned ValidationUtils.checkArgument( - HoodieTimeline.compareTimestamps(commitTime, lastCommitRetained, HoodieTimeline.GREATER_OR_EQUAL), - "Could not savepoint commit " + commitTime + " as this is beyond the lookup window " + lastCommitRetained); + HoodieTimeline.compareTimestamps(instantTime, lastCommitRetained, HoodieTimeline.GREATER_OR_EQUAL), + "Could not savepoint commit " + instantTime + " as this is beyond the lookup window " + lastCommitRetained); Map> latestFilesMap = jsc .parallelize(FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath(), @@ -599,7 +599,7 @@ public class HoodieWriteClient extends AbstractHo // Scan all partitions files with this commit time LOG.info("Collecting latest files in partition path " + partitionPath); BaseFileOnlyView view = table.getBaseFileOnlyView(); - List latestFiles = view.getLatestBaseFilesBeforeOrOn(partitionPath, commitTime) + List latestFiles = view.getLatestBaseFilesBeforeOrOn(partitionPath, instantTime) .map(HoodieBaseFile::getFileName).collect(Collectors.toList()); return new Tuple2<>(partitionPath, latestFiles); }).collectAsMap(); @@ -607,14 +607,14 @@ public class HoodieWriteClient extends AbstractHo HoodieSavepointMetadata metadata = AvroUtils.convertSavepointMetadata(user, comment, latestFilesMap); // Nothing to save in the savepoint table.getActiveTimeline().createNewInstant( - new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, commitTime)); + new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, instantTime)); table.getActiveTimeline() - .saveAsComplete(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, commitTime), + .saveAsComplete(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, instantTime), AvroUtils.serializeSavepointMetadata(metadata)); - LOG.info("Savepoint " + commitTime + " created"); + LOG.info("Savepoint " + instantTime + " created"); return true; } catch (IOException e) { - throw new HoodieSavepointException("Failed to savepoint " + commitTime, e); + throw new HoodieSavepointException("Failed to savepoint " + instantTime, e); } } @@ -687,15 +687,15 @@ public class HoodieWriteClient extends AbstractHo // to be running. Rollback to savepoint also removes any pending compaction actions that are generated after // savepoint time. Allowing pending compaction to be retained is not safe as those workload could be referencing // file-slices that will be rolled-back as part of this operation - HoodieTimeline commitTimeline = table.getMetaClient().getCommitsAndCompactionTimeline(); + HoodieTimeline instantTimeline = table.getMetaClient().getCommitsAndCompactionTimeline(); HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, savepointTime); boolean isSavepointPresent = table.getCompletedSavepointTimeline().containsInstant(savePoint); if (!isSavepointPresent) { - throw new HoodieRollbackException("No savepoint for commitTime " + savepointTime); + throw new HoodieRollbackException("No savepoint for instantTime " + savepointTime); } - List commitsToRollback = commitTimeline.findInstantsAfter(savepointTime, Integer.MAX_VALUE).getInstants() + List commitsToRollback = instantTimeline.findInstantsAfter(savepointTime, Integer.MAX_VALUE).getInstants() .map(HoodieInstant::getTimestamp).collect(Collectors.toList()); LOG.info("Rolling back commits " + commitsToRollback); @@ -716,11 +716,11 @@ public class HoodieWriteClient extends AbstractHo * this commit (2) clean indexing data, (3) clean new generated parquet files. (4) Finally delete .commit or .inflight * file. * - * @param commitTime Instant time of the commit + * @param instantTime Instant time of the commit * @return {@code true} If rollback the record changes successfully. {@code false} otherwise */ - public boolean rollback(final String commitTime) throws HoodieRollbackException { - rollbackInternal(commitTime); + public boolean rollback(final String instantTime) throws HoodieRollbackException { + rollbackInternal(instantTime); return true; } @@ -852,9 +852,9 @@ public class HoodieWriteClient extends AbstractHo // Only rollback pending commit/delta-commits. Do not touch compaction commits rollbackPendingCommits(); } - String commitTime = HoodieActiveTimeline.createNewInstantTime(); - startCommit(commitTime); - return commitTime; + String instantTime = HoodieActiveTimeline.createNewInstantTime(); + startCommit(instantTime); + return instantTime; } /** diff --git a/hudi-client/src/main/java/org/apache/hudi/execution/BulkInsertMapFunction.java b/hudi-client/src/main/java/org/apache/hudi/execution/BulkInsertMapFunction.java index 200ac4eb0..249ff3d04 100644 --- a/hudi-client/src/main/java/org/apache/hudi/execution/BulkInsertMapFunction.java +++ b/hudi-client/src/main/java/org/apache/hudi/execution/BulkInsertMapFunction.java @@ -35,14 +35,14 @@ import java.util.List; public class BulkInsertMapFunction implements Function2>, Iterator>> { - private String commitTime; + private String instantTime; private HoodieWriteConfig config; private HoodieTable hoodieTable; private List fileIDPrefixes; - public BulkInsertMapFunction(String commitTime, HoodieWriteConfig config, HoodieTable hoodieTable, - List fileIDPrefixes) { - this.commitTime = commitTime; + public BulkInsertMapFunction(String instantTime, HoodieWriteConfig config, HoodieTable hoodieTable, + List fileIDPrefixes) { + this.instantTime = instantTime; this.config = config; this.hoodieTable = hoodieTable; this.fileIDPrefixes = fileIDPrefixes; @@ -50,7 +50,7 @@ public class BulkInsertMapFunction @Override public Iterator> call(Integer partition, Iterator> sortedRecordItr) { - return new CopyOnWriteLazyInsertIterable<>(sortedRecordItr, config, commitTime, hoodieTable, + return new CopyOnWriteLazyInsertIterable<>(sortedRecordItr, config, instantTime, hoodieTable, fileIDPrefixes.get(partition)); } } diff --git a/hudi-client/src/main/java/org/apache/hudi/execution/CopyOnWriteLazyInsertIterable.java b/hudi-client/src/main/java/org/apache/hudi/execution/CopyOnWriteLazyInsertIterable.java index 47e3fd978..bdcea618d 100644 --- a/hudi-client/src/main/java/org/apache/hudi/execution/CopyOnWriteLazyInsertIterable.java +++ b/hudi-client/src/main/java/org/apache/hudi/execution/CopyOnWriteLazyInsertIterable.java @@ -46,16 +46,16 @@ public class CopyOnWriteLazyInsertIterable extends LazyIterableIterator, List> { protected final HoodieWriteConfig hoodieConfig; - protected final String commitTime; + protected final String instantTime; protected final HoodieTable hoodieTable; protected final String idPrefix; protected int numFilesWritten; public CopyOnWriteLazyInsertIterable(Iterator> sortedRecordItr, HoodieWriteConfig config, - String commitTime, HoodieTable hoodieTable, String idPrefix) { + String instantTime, HoodieTable hoodieTable, String idPrefix) { super(sortedRecordItr); this.hoodieConfig = config; - this.commitTime = commitTime; + this.instantTime = instantTime; this.hoodieTable = hoodieTable; this.idPrefix = idPrefix; this.numFilesWritten = 0; @@ -136,7 +136,7 @@ public class CopyOnWriteLazyInsertIterable final HoodieRecord insertPayload = payload.record; // lazily initialize the handle, for the first time if (handle == null) { - handle = new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable, insertPayload.getPartitionPath(), + handle = new HoodieCreateHandle(hoodieConfig, instantTime, hoodieTable, insertPayload.getPartitionPath(), getNextFileId(idPrefix)); } @@ -147,7 +147,7 @@ public class CopyOnWriteLazyInsertIterable // handle is full. statuses.add(handle.close()); // Need to handle the rejected payload & open new handle - handle = new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable, insertPayload.getPartitionPath(), + handle = new HoodieCreateHandle(hoodieConfig, instantTime, hoodieTable, insertPayload.getPartitionPath(), getNextFileId(idPrefix)); handle.write(insertPayload, payload.insertValue, payload.exception); // we should be able to write 1 payload. } diff --git a/hudi-client/src/main/java/org/apache/hudi/execution/MergeOnReadLazyInsertIterable.java b/hudi-client/src/main/java/org/apache/hudi/execution/MergeOnReadLazyInsertIterable.java index a6007ffbe..11c00350e 100644 --- a/hudi-client/src/main/java/org/apache/hudi/execution/MergeOnReadLazyInsertIterable.java +++ b/hudi-client/src/main/java/org/apache/hudi/execution/MergeOnReadLazyInsertIterable.java @@ -35,8 +35,8 @@ import java.util.List; public class MergeOnReadLazyInsertIterable extends CopyOnWriteLazyInsertIterable { public MergeOnReadLazyInsertIterable(Iterator> sortedRecordItr, HoodieWriteConfig config, - String commitTime, HoodieTable hoodieTable, String idPfx) { - super(sortedRecordItr, config, commitTime, hoodieTable, idPfx); + String instantTime, HoodieTable hoodieTable, String idPfx) { + super(sortedRecordItr, config, instantTime, hoodieTable, idPfx); } @Override @@ -52,7 +52,7 @@ public class MergeOnReadLazyInsertIterable extend List statuses = new ArrayList<>(); // lazily initialize the handle, for the first time if (handle == null) { - handle = new HoodieAppendHandle(hoodieConfig, commitTime, hoodieTable, + handle = new HoodieAppendHandle(hoodieConfig, instantTime, hoodieTable, insertPayload.getPartitionPath(), getNextFileId(idPrefix)); } if (handle.canWrite(insertPayload)) { @@ -63,7 +63,7 @@ public class MergeOnReadLazyInsertIterable extend handle.close(); statuses.add(handle.getWriteStatus()); // Need to handle the rejected payload & open new handle - handle = new HoodieAppendHandle(hoodieConfig, commitTime, hoodieTable, + handle = new HoodieAppendHandle(hoodieConfig, instantTime, hoodieTable, insertPayload.getPartitionPath(), getNextFileId(idPrefix)); handle.write(insertPayload, payload.insertValue, payload.exception); // we should be able to write 1 payload. } diff --git a/hudi-client/src/main/java/org/apache/hudi/index/HoodieIndex.java b/hudi-client/src/main/java/org/apache/hudi/index/HoodieIndex.java index 8305a7b40..645184ea0 100644 --- a/hudi-client/src/main/java/org/apache/hudi/index/HoodieIndex.java +++ b/hudi-client/src/main/java/org/apache/hudi/index/HoodieIndex.java @@ -98,9 +98,9 @@ public abstract class HoodieIndex implements Seri HoodieTable hoodieTable) throws HoodieIndexException; /** - * Rollback the efffects of the commit made at commitTime. + * Rollback the efffects of the commit made at instantTime. */ - public abstract boolean rollbackCommit(String commitTime); + public abstract boolean rollbackCommit(String instantTime); /** * An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the `partitionPath`. Such an diff --git a/hudi-client/src/main/java/org/apache/hudi/index/InMemoryHashIndex.java b/hudi-client/src/main/java/org/apache/hudi/index/InMemoryHashIndex.java index b688e7af4..49512ab86 100644 --- a/hudi-client/src/main/java/org/apache/hudi/index/InMemoryHashIndex.java +++ b/hudi-client/src/main/java/org/apache/hudi/index/InMemoryHashIndex.java @@ -94,7 +94,7 @@ public class InMemoryHashIndex extends HoodieInde } @Override - public boolean rollbackCommit(String commitTime) { + public boolean rollbackCommit(String instantTime) { return true; } diff --git a/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java b/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java index 5a4151958..b2166d0d1 100644 --- a/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java +++ b/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java @@ -223,7 +223,7 @@ public class HoodieBloomIndex extends HoodieIndex } @Override - public boolean rollbackCommit(String commitTime) { + public boolean rollbackCommit(String instantTime) { // Nope, don't need to do anything. return true; } diff --git a/hudi-client/src/main/java/org/apache/hudi/index/hbase/HBaseIndex.java b/hudi-client/src/main/java/org/apache/hudi/index/hbase/HBaseIndex.java index 38fd4e1f3..bfd40ba95 100644 --- a/hudi-client/src/main/java/org/apache/hudi/index/hbase/HBaseIndex.java +++ b/hudi-client/src/main/java/org/apache/hudi/index/hbase/HBaseIndex.java @@ -465,7 +465,7 @@ public class HBaseIndex extends HoodieIndex { } @Override - public boolean rollbackCommit(String commitTime) { + public boolean rollbackCommit(String instantTime) { // Rollback in HbaseIndex is managed via method {@link #checkIfValidCommit()} return true; } diff --git a/hudi-client/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java b/hudi-client/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java index 39601b4cb..c5be9daa5 100644 --- a/hudi-client/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java +++ b/hudi-client/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java @@ -100,17 +100,17 @@ public class HoodieAppendHandle extends HoodieWri // Total number of new records inserted into the delta file private long insertRecordsWritten = 0; - public HoodieAppendHandle(HoodieWriteConfig config, String commitTime, HoodieTable hoodieTable, + public HoodieAppendHandle(HoodieWriteConfig config, String instantTime, HoodieTable hoodieTable, String partitionPath, String fileId, Iterator> recordItr) { - super(config, commitTime, partitionPath, fileId, hoodieTable); + super(config, instantTime, partitionPath, fileId, hoodieTable); writeStatus.setStat(new HoodieDeltaWriteStat()); this.fileId = fileId; this.recordItr = recordItr; } - public HoodieAppendHandle(HoodieWriteConfig config, String commitTime, HoodieTable hoodieTable, + public HoodieAppendHandle(HoodieWriteConfig config, String instantTime, HoodieTable hoodieTable, String partitionPath, String fileId) { - this(config, commitTime, hoodieTable, partitionPath, fileId, null); + this(config, instantTime, hoodieTable, partitionPath, fileId, null); } private void init(HoodieRecord record) { @@ -118,7 +118,7 @@ public class HoodieAppendHandle extends HoodieWri // extract some information from the first record SliceView rtView = hoodieTable.getSliceView(); Option fileSlice = rtView.getLatestFileSlice(partitionPath, fileId); - // Set the base commit time as the current commitTime for new inserts into log files + // Set the base commit time as the current instantTime for new inserts into log files String baseInstantTime = instantTime; if (fileSlice.isPresent()) { baseInstantTime = fileSlice.get().getBaseInstantTime(); diff --git a/hudi-client/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java b/hudi-client/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java index 87c83cf65..1ab22e0f0 100644 --- a/hudi-client/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java +++ b/hudi-client/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java @@ -55,21 +55,21 @@ public class HoodieCreateHandle extends HoodieWri private Iterator> recordIterator; private boolean useWriterSchema = false; - public HoodieCreateHandle(HoodieWriteConfig config, String commitTime, HoodieTable hoodieTable, + public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTable hoodieTable, String partitionPath, String fileId) { - super(config, commitTime, partitionPath, fileId, hoodieTable); + super(config, instantTime, partitionPath, fileId, hoodieTable); writeStatus.setFileId(fileId); writeStatus.setPartitionPath(partitionPath); this.path = makeNewPath(partitionPath); try { - HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, commitTime, + HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, instantTime, new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath)); partitionMetadata.trySave(TaskContext.getPartitionId()); createMarkerFile(partitionPath); this.storageWriter = - HoodieStorageWriterFactory.getStorageWriter(commitTime, path, hoodieTable, config, writerSchema); + HoodieStorageWriterFactory.getStorageWriter(instantTime, path, hoodieTable, config, writerSchema); } catch (IOException e) { throw new HoodieInsertException("Failed to initialize HoodieStorageWriter for path " + path, e); } @@ -79,9 +79,9 @@ public class HoodieCreateHandle extends HoodieWri /** * Called by the compactor code path. */ - public HoodieCreateHandle(HoodieWriteConfig config, String commitTime, HoodieTable hoodieTable, + public HoodieCreateHandle(HoodieWriteConfig config, String instantTime, HoodieTable hoodieTable, String partitionPath, String fileId, Iterator> recordIterator) { - this(config, commitTime, hoodieTable, partitionPath, fileId); + this(config, instantTime, hoodieTable, partitionPath, fileId); this.recordIterator = recordIterator; this.useWriterSchema = true; } diff --git a/hudi-client/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java b/hudi-client/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java index f26163262..2affeca42 100644 --- a/hudi-client/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java +++ b/hudi-client/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java @@ -69,9 +69,9 @@ public class HoodieMergeHandle extends HoodieWrit private long insertRecordsWritten = 0; private boolean useWriterSchema; - public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable hoodieTable, + public HoodieMergeHandle(HoodieWriteConfig config, String instantTime, HoodieTable hoodieTable, Iterator> recordItr, String partitionPath, String fileId) { - super(config, commitTime, partitionPath, fileId, hoodieTable); + super(config, instantTime, partitionPath, fileId, hoodieTable); init(fileId, recordItr); init(fileId, partitionPath, hoodieTable.getBaseFileOnlyView().getLatestBaseFile(partitionPath, fileId).get()); } @@ -79,10 +79,10 @@ public class HoodieMergeHandle extends HoodieWrit /** * Called by compactor code path. */ - public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable hoodieTable, + public HoodieMergeHandle(HoodieWriteConfig config, String instantTime, HoodieTable hoodieTable, Map> keyToNewRecords, String partitionPath, String fileId, HoodieBaseFile dataFileToBeMerged) { - super(config, commitTime, partitionPath, fileId, hoodieTable); + super(config, instantTime, partitionPath, fileId, hoodieTable); this.keyToNewRecords = keyToNewRecords; this.useWriterSchema = true; init(fileId, this.partitionPath, dataFileToBeMerged); diff --git a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java index 5f1fb8fc5..ad8987a54 100644 --- a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java +++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java @@ -50,10 +50,10 @@ public class HoodieParquetWriter HoodieStorageWriter getStorageWriter( - String commitTime, Path path, HoodieTable hoodieTable, HoodieWriteConfig config, Schema schema) + String instantTime, Path path, HoodieTable hoodieTable, HoodieWriteConfig config, Schema schema) throws IOException { final String name = path.getName(); final String extension = FSUtils.isLogFile(path) ? HOODIE_LOG.getFileExtension() : FSUtils.getFileExtension(name); if (PARQUET.getFileExtension().equals(extension)) { - return newParquetStorageWriter(commitTime, path, config, schema, hoodieTable); + return newParquetStorageWriter(instantTime, path, config, schema, hoodieTable); } throw new UnsupportedOperationException(extension + " format not supported yet."); } private static HoodieStorageWriter newParquetStorageWriter( - String commitTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable) + String instantTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable) throws IOException { BloomFilter filter = BloomFilterFactory .createBloomFilter(config.getBloomFilterNumEntries(), config.getBloomFilterFPP(), @@ -63,6 +63,6 @@ public class HoodieStorageWriterFactory { config.getParquetBlockSize(), config.getParquetPageSize(), config.getParquetMaxFileSize(), hoodieTable.getHadoopConf(), config.getParquetCompressionRatio()); - return new HoodieParquetWriter<>(commitTime, path, parquetConfig, schema); + return new HoodieParquetWriter<>(instantTime, path, parquetConfig, schema); } } diff --git a/hudi-client/src/main/java/org/apache/hudi/table/CleanHelper.java b/hudi-client/src/main/java/org/apache/hudi/table/CleanHelper.java index 3c73c7eb3..b9f586d8e 100644 --- a/hudi-client/src/main/java/org/apache/hudi/table/CleanHelper.java +++ b/hudi-client/src/main/java/org/apache/hudi/table/CleanHelper.java @@ -245,18 +245,18 @@ public class CleanHelper> implements Serializab } /** - * Gets the latest version < commitTime. This version file could still be used by queries. + * Gets the latest version < instantTime. This version file could still be used by queries. */ - private String getLatestVersionBeforeCommit(List fileSliceList, HoodieInstant commitTime) { + private String getLatestVersionBeforeCommit(List fileSliceList, HoodieInstant instantTime) { for (FileSlice file : fileSliceList) { String fileCommitTime = file.getBaseInstantTime(); - if (HoodieTimeline.compareTimestamps(commitTime.getTimestamp(), fileCommitTime, HoodieTimeline.GREATER)) { - // fileList is sorted on the reverse, so the first commit we find <= commitTime is the + if (HoodieTimeline.compareTimestamps(instantTime.getTimestamp(), fileCommitTime, HoodieTimeline.GREATER)) { + // fileList is sorted on the reverse, so the first commit we find <= instantTime is the // one we want return fileCommitTime; } } - // There is no version of this file which is <= commitTime + // There is no version of this file which is <= instantTime return null; } diff --git a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java index 2791bc765..94c520cc4 100644 --- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java +++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java @@ -160,7 +160,7 @@ public class HoodieCopyOnWriteTable extends Hoodi } @Override - public HoodieCompactionPlan scheduleCompaction(JavaSparkContext jsc, String commitTime) { + public HoodieCompactionPlan scheduleCompaction(JavaSparkContext jsc, String instantTime) { throw new HoodieNotSupportedException("Compaction is not supported from a CopyOnWrite table"); } @@ -170,7 +170,7 @@ public class HoodieCopyOnWriteTable extends Hoodi throw new HoodieNotSupportedException("Compaction is not supported from a CopyOnWrite table"); } - public Iterator> handleUpdate(String commitTime, String partitionPath, String fileId, + public Iterator> handleUpdate(String instantTime, String partitionPath, String fileId, Iterator> recordItr) throws IOException { // This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records @@ -179,22 +179,22 @@ public class HoodieCopyOnWriteTable extends Hoodi return Collections.singletonList((List) Collections.EMPTY_LIST).iterator(); } // these are updates - HoodieMergeHandle upsertHandle = getUpdateHandle(commitTime, partitionPath, fileId, recordItr); - return handleUpdateInternal(upsertHandle, commitTime, fileId); + HoodieMergeHandle upsertHandle = getUpdateHandle(instantTime, partitionPath, fileId, recordItr); + return handleUpdateInternal(upsertHandle, instantTime, fileId); } - public Iterator> handleUpdate(String commitTime, String partitionPath, String fileId, + public Iterator> handleUpdate(String instantTime, String partitionPath, String fileId, Map> keyToNewRecords, HoodieBaseFile oldDataFile) throws IOException { // these are updates - HoodieMergeHandle upsertHandle = getUpdateHandle(commitTime, partitionPath, fileId, keyToNewRecords, oldDataFile); - return handleUpdateInternal(upsertHandle, commitTime, fileId); + HoodieMergeHandle upsertHandle = getUpdateHandle(instantTime, partitionPath, fileId, keyToNewRecords, oldDataFile); + return handleUpdateInternal(upsertHandle, instantTime, fileId); } - protected Iterator> handleUpdateInternal(HoodieMergeHandle upsertHandle, String commitTime, + protected Iterator> handleUpdateInternal(HoodieMergeHandle upsertHandle, String instantTime, String fileId) throws IOException { if (upsertHandle.getOldFilePath() == null) { throw new HoodieUpsertException( - "Error in finding the old file path at commit " + commitTime + " for fileId: " + fileId); + "Error in finding the old file path at commit " + instantTime + " for fileId: " + fileId); } else { AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getWriterSchema()); BoundedInMemoryExecutor wrapper = null; @@ -221,46 +221,46 @@ public class HoodieCopyOnWriteTable extends Hoodi return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus())).iterator(); } - protected HoodieMergeHandle getUpdateHandle(String commitTime, String partitionPath, String fileId, Iterator> recordItr) { - return new HoodieMergeHandle<>(config, commitTime, this, recordItr, partitionPath, fileId); + protected HoodieMergeHandle getUpdateHandle(String instantTime, String partitionPath, String fileId, Iterator> recordItr) { + return new HoodieMergeHandle<>(config, instantTime, this, recordItr, partitionPath, fileId); } - protected HoodieMergeHandle getUpdateHandle(String commitTime, String partitionPath, String fileId, + protected HoodieMergeHandle getUpdateHandle(String instantTime, String partitionPath, String fileId, Map> keyToNewRecords, HoodieBaseFile dataFileToBeMerged) { - return new HoodieMergeHandle<>(config, commitTime, this, keyToNewRecords, + return new HoodieMergeHandle<>(config, instantTime, this, keyToNewRecords, partitionPath, fileId, dataFileToBeMerged); } - public Iterator> handleInsert(String commitTime, String idPfx, Iterator> recordItr) + public Iterator> handleInsert(String instantTime, String idPfx, Iterator> recordItr) throws Exception { // This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records if (!recordItr.hasNext()) { LOG.info("Empty partition"); return Collections.singletonList((List) Collections.EMPTY_LIST).iterator(); } - return new CopyOnWriteLazyInsertIterable<>(recordItr, config, commitTime, this, idPfx); + return new CopyOnWriteLazyInsertIterable<>(recordItr, config, instantTime, this, idPfx); } - public Iterator> handleInsert(String commitTime, String partitionPath, String fileId, + public Iterator> handleInsert(String instantTime, String partitionPath, String fileId, Iterator> recordItr) { HoodieCreateHandle createHandle = - new HoodieCreateHandle(config, commitTime, this, partitionPath, fileId, recordItr); + new HoodieCreateHandle(config, instantTime, this, partitionPath, fileId, recordItr); createHandle.write(); return Collections.singletonList(Collections.singletonList(createHandle.close())).iterator(); } @SuppressWarnings("unchecked") @Override - public Iterator> handleUpsertPartition(String commitTime, Integer partition, Iterator recordItr, - Partitioner partitioner) { + public Iterator> handleUpsertPartition(String instantTime, Integer partition, Iterator recordItr, + Partitioner partitioner) { UpsertPartitioner upsertPartitioner = (UpsertPartitioner) partitioner; BucketInfo binfo = upsertPartitioner.getBucketInfo(partition); BucketType btype = binfo.bucketType; try { if (btype.equals(BucketType.INSERT)) { - return handleInsert(commitTime, binfo.fileIdPrefix, recordItr); + return handleInsert(instantTime, binfo.fileIdPrefix, recordItr); } else if (btype.equals(BucketType.UPDATE)) { - return handleUpdate(commitTime, binfo.partitionPath, binfo.fileIdPrefix, recordItr); + return handleUpdate(instantTime, binfo.partitionPath, binfo.fileIdPrefix, recordItr); } else { throw new HoodieUpsertException("Unknown bucketType " + btype + " for partition :" + partition); } @@ -272,9 +272,9 @@ public class HoodieCopyOnWriteTable extends Hoodi } @Override - public Iterator> handleInsertPartition(String commitTime, Integer partition, Iterator recordItr, - Partitioner partitioner) { - return handleUpsertPartition(commitTime, partition, recordItr, partitioner); + public Iterator> handleInsertPartition(String instantTime, Integer partition, Iterator recordItr, + Partitioner partitioner) { + return handleUpsertPartition(instantTime, partition, recordItr, partitioner); } /** diff --git a/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java b/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java index df7a0c137..a7c5a6856 100644 --- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java +++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java @@ -98,16 +98,16 @@ public class HoodieMergeOnReadTable extends Hoodi } @Override - public Iterator> handleUpdate(String commitTime, String partitionPath, + public Iterator> handleUpdate(String instantTime, String partitionPath, String fileId, Iterator> recordItr) throws IOException { - LOG.info("Merging updates for commit " + commitTime + " for file " + fileId); + LOG.info("Merging updates for commit " + instantTime + " for file " + fileId); if (!index.canIndexLogFiles() && mergeOnReadUpsertPartitioner.getSmallFileIds().contains(fileId)) { - LOG.info("Small file corrections for updates for commit " + commitTime + " for file " + fileId); - return super.handleUpdate(commitTime, partitionPath, fileId, recordItr); + LOG.info("Small file corrections for updates for commit " + instantTime + " for file " + fileId); + return super.handleUpdate(instantTime, partitionPath, fileId, recordItr); } else { - HoodieAppendHandle appendHandle = new HoodieAppendHandle<>(config, commitTime, this, + HoodieAppendHandle appendHandle = new HoodieAppendHandle<>(config, instantTime, this, partitionPath, fileId, recordItr); appendHandle.doAppend(); appendHandle.close(); @@ -116,13 +116,13 @@ public class HoodieMergeOnReadTable extends Hoodi } @Override - public Iterator> handleInsert(String commitTime, String idPfx, Iterator> recordItr) + public Iterator> handleInsert(String instantTime, String idPfx, Iterator> recordItr) throws Exception { // If canIndexLogFiles, write inserts to log files else write inserts to parquet files if (index.canIndexLogFiles()) { - return new MergeOnReadLazyInsertIterable<>(recordItr, config, commitTime, this, idPfx); + return new MergeOnReadLazyInsertIterable<>(recordItr, config, instantTime, this, idPfx); } else { - return super.handleInsert(commitTime, idPfx, recordItr); + return super.handleInsert(instantTime, idPfx, recordItr); } } diff --git a/hudi-client/src/main/java/org/apache/hudi/table/HoodieTable.java b/hudi-client/src/main/java/org/apache/hudi/table/HoodieTable.java index 2e73ef0cf..e38510ffd 100644 --- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieTable.java +++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieTable.java @@ -248,13 +248,13 @@ public abstract class HoodieTable implements Seri /** * Perform the ultimate IO for a given upserted (RDD) partition. */ - public abstract Iterator> handleUpsertPartition(String commitTime, Integer partition, + public abstract Iterator> handleUpsertPartition(String instantTime, Integer partition, Iterator> recordIterator, Partitioner partitioner); /** * Perform the ultimate IO for a given inserted (RDD) partition. */ - public abstract Iterator> handleInsertPartition(String commitTime, Integer partition, + public abstract Iterator> handleInsertPartition(String instantTime, Integer partition, Iterator> recordIterator, Partitioner partitioner); /** diff --git a/hudi-client/src/main/java/org/apache/hudi/table/compact/HoodieMergeOnReadTableCompactor.java b/hudi-client/src/main/java/org/apache/hudi/table/compact/HoodieMergeOnReadTableCompactor.java index 22c90453c..8f46b3abc 100644 --- a/hudi-client/src/main/java/org/apache/hudi/table/compact/HoodieMergeOnReadTableCompactor.java +++ b/hudi-client/src/main/java/org/apache/hudi/table/compact/HoodieMergeOnReadTableCompactor.java @@ -99,12 +99,12 @@ public class HoodieMergeOnReadTableCompactor implements HoodieCompactor { } private List compact(HoodieCopyOnWriteTable hoodieCopyOnWriteTable, HoodieTableMetaClient metaClient, - HoodieWriteConfig config, CompactionOperation operation, String commitTime) throws IOException { + HoodieWriteConfig config, CompactionOperation operation, String instantTime) throws IOException { FileSystem fs = metaClient.getFs(); Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema())); LOG.info("Compacting base " + operation.getDataFileName() + " with delta files " + operation.getDeltaFileNames() - + " for commit " + commitTime); + + " for commit " + instantTime); // TODO - FIX THIS // Reads the entire avro file. Always only specific blocks should be read from the avro file // (failure recover). @@ -136,11 +136,11 @@ public class HoodieMergeOnReadTableCompactor implements HoodieCompactor { // If the dataFile is present, there is a base parquet file present, perform updates else perform inserts into a // new base parquet file. if (oldDataFileOpt.isPresent()) { - result = hoodieCopyOnWriteTable.handleUpdate(commitTime, operation.getPartitionPath(), + result = hoodieCopyOnWriteTable.handleUpdate(instantTime, operation.getPartitionPath(), operation.getFileId(), scanner.getRecords(), oldDataFileOpt.get()); } else { - result = hoodieCopyOnWriteTable.handleInsert(commitTime, operation.getPartitionPath(), operation.getFileId(), + result = hoodieCopyOnWriteTable.handleInsert(instantTime, operation.getPartitionPath(), operation.getFileId(), scanner.iterator()); } Iterable> resultIterable = () -> result; diff --git a/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientBase.java b/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientBase.java index 5f47bf535..b67435386 100644 --- a/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientBase.java +++ b/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientBase.java @@ -212,13 +212,13 @@ public class TestHoodieClientBase extends HoodieClientTestHarness { * Ensure records have location field set. * * @param taggedRecords Tagged Records - * @param commitTime Commit Timestamp + * @param instantTime Commit Timestamp */ - protected void checkTaggedRecords(List taggedRecords, String commitTime) { + protected void checkTaggedRecords(List taggedRecords, String instantTime) { for (HoodieRecord rec : taggedRecords) { assertTrue("Record " + rec + " found with no location.", rec.isCurrentLocationKnown()); - assertEquals("All records should have commit time " + commitTime + ", since updates were made", - rec.getCurrentLocation().getInstantTime(), commitTime); + assertEquals("All records should have commit time " + instantTime + ", since updates were made", + rec.getCurrentLocation().getInstantTime(), instantTime); } } diff --git a/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java b/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java index c7da7a7e5..e29107737 100644 --- a/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java +++ b/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java @@ -129,7 +129,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase { */ @Test public void testAutoCommitOnBulkInsertPrepped() throws Exception { - testAutoCommit((writeClient, recordRDD, commitTime) -> writeClient.bulkInsertPreppedRecords(recordRDD, commitTime, + testAutoCommit((writeClient, recordRDD, instantTime) -> writeClient.bulkInsertPreppedRecords(recordRDD, instantTime, Option.empty()), true); } @@ -357,9 +357,9 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase { final List recordsInFirstBatch = new ArrayList<>(); Function2, String, Integer> recordGenFunction = - (String commitTime, Integer numRecordsInThisCommit) -> { - List fewRecordsForInsert = dataGen.generateInserts(commitTime, 200); - List fewRecordsForDelete = dataGen.generateDeletes(commitTime, 100); + (String instantTime, Integer numRecordsInThisCommit) -> { + List fewRecordsForInsert = dataGen.generateInserts(instantTime, 200); + List fewRecordsForDelete = dataGen.generateDeletes(instantTime, 100); recordsInFirstBatch.addAll(fewRecordsForInsert); recordsInFirstBatch.addAll(fewRecordsForDelete); @@ -376,7 +376,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase { newCommitTime = "004"; final List recordsInSecondBatch = new ArrayList<>(); - recordGenFunction = (String commitTime, Integer numRecordsInThisCommit) -> { + recordGenFunction = (String instantTime, Integer numRecordsInThisCommit) -> { List fewRecordsForDelete = recordsInFirstBatch.subList(0, 50); List fewRecordsForUpdate = recordsInFirstBatch.subList(50, 100); recordsInSecondBatch.addAll(dataGen.generateDeletesFromExistingRecords(fewRecordsForDelete)); @@ -704,18 +704,18 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase { testDeletes(client, updateBatch3.getRight(), 10, file1, "007", 140, keysSoFar); } - private Pair, List> testUpdates(String commitTime, HoodieWriteClient client, + private Pair, List> testUpdates(String instantTime, HoodieWriteClient client, int sizeToInsertAndUpdate, int expectedTotalRecords) throws IOException { - client.startCommitWithTime(commitTime); - List inserts = dataGen.generateInserts(commitTime, sizeToInsertAndUpdate); + client.startCommitWithTime(instantTime); + List inserts = dataGen.generateInserts(instantTime, sizeToInsertAndUpdate); Set keys = HoodieClientTestUtils.getRecordKeys(inserts); List insertsAndUpdates = new ArrayList<>(); insertsAndUpdates.addAll(inserts); - insertsAndUpdates.addAll(dataGen.generateUpdates(commitTime, inserts)); + insertsAndUpdates.addAll(dataGen.generateUpdates(instantTime, inserts)); JavaRDD insertAndUpdatesRDD = jsc.parallelize(insertsAndUpdates, 1); - List statuses = client.upsert(insertAndUpdatesRDD, commitTime).collect(); + List statuses = client.upsert(insertAndUpdatesRDD, instantTime).collect(); assertNoWriteErrors(statuses); // Check the entire dataset has all records still @@ -729,13 +729,13 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase { } private void testDeletes(HoodieWriteClient client, List previousRecords, int sizeToDelete, - String existingFile, String commitTime, int exepctedRecords, List keys) { - client.startCommitWithTime(commitTime); + String existingFile, String instantTime, int exepctedRecords, List keys) { + client.startCommitWithTime(instantTime); List hoodieKeysToDelete = HoodieClientTestUtils .getKeysToDelete(HoodieClientTestUtils.getHoodieKeys(previousRecords), sizeToDelete); JavaRDD deleteKeys = jsc.parallelize(hoodieKeysToDelete, 1); - List statuses = client.delete(deleteKeys, commitTime).collect(); + List statuses = client.delete(deleteKeys, instantTime).collect(); assertNoWriteErrors(statuses); @@ -757,7 +757,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase { List records = ParquetUtils.readAvroRecords(jsc.hadoopConfiguration(), newFile); for (GenericRecord record : records) { String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(); - assertTrue("key expected to be part of " + commitTime, keys.contains(recordKey)); + assertTrue("key expected to be part of " + instantTime, keys.contains(recordKey)); assertFalse("Key deleted", hoodieKeysToDelete.contains(recordKey)); } } @@ -802,21 +802,21 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc); - String commitTime = "000"; - client.startCommitWithTime(commitTime); + String instantTime = "000"; + client.startCommitWithTime(instantTime); - List records = dataGen.generateInserts(commitTime, 200); + List records = dataGen.generateInserts(instantTime, 200); JavaRDD writeRecords = jsc.parallelize(records, 1); - JavaRDD result = client.bulkInsert(writeRecords, commitTime); + JavaRDD result = client.bulkInsert(writeRecords, instantTime); - assertTrue("Commit should succeed", client.commit(commitTime, result)); + assertTrue("Commit should succeed", client.commit(instantTime, result)); assertTrue("After explicit commit, commit file should be created", - HoodieTestUtils.doesCommitExist(basePath, commitTime)); + HoodieTestUtils.doesCommitExist(basePath, instantTime)); // Get parquet file paths from commit metadata String actionType = metaClient.getCommitActionType(); - HoodieInstant commitInstant = new HoodieInstant(false, actionType, commitTime); + HoodieInstant commitInstant = new HoodieInstant(false, actionType, instantTime); HoodieTimeline commitTimeline = metaClient.getCommitTimeline().filterCompletedInstants(); HoodieCommitMetadata commitMetadata = HoodieCommitMetadata .fromBytes(commitTimeline.getInstantDetails(commitInstant).get(), HoodieCommitMetadata.class); @@ -824,7 +824,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase { Collection commitPathNames = commitMetadata.getFileIdAndFullPaths(basePath).values(); // Read from commit file - String filename = HoodieTestUtils.getCommitFilePath(basePath, commitTime); + String filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime); FileInputStream inputStream = new FileInputStream(filename); String everything = FileIOUtils.readAsUTFString(inputStream); HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(everything, HoodieCommitMetadata.class); @@ -848,20 +848,20 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase { HoodieWriteClient client = getHoodieWriteClient(cfg); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); - String commitTime = "000"; - client.startCommitWithTime(commitTime); + String instantTime = "000"; + client.startCommitWithTime(instantTime); - List records = dataGen.generateInserts(commitTime, 200); + List records = dataGen.generateInserts(instantTime, 200); JavaRDD writeRecords = jsc.parallelize(records, 1); - JavaRDD result = client.bulkInsert(writeRecords, commitTime); + JavaRDD result = client.bulkInsert(writeRecords, instantTime); - assertTrue("Commit should succeed", client.commit(commitTime, result)); + assertTrue("Commit should succeed", client.commit(instantTime, result)); assertTrue("After explicit commit, commit file should be created", - HoodieTestUtils.doesCommitExist(basePath, commitTime)); + HoodieTestUtils.doesCommitExist(basePath, instantTime)); // Read from commit file - String filename = HoodieTestUtils.getCommitFilePath(basePath, commitTime); + String filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime); FileInputStream inputStream = new FileInputStream(filename); String everything = FileIOUtils.readAsUTFString(inputStream); HoodieCommitMetadata metadata = @@ -879,19 +879,19 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase { Assert.assertEquals(inserts, 200); // Update + Inserts such that they just expand file1 - commitTime = "001"; - client.startCommitWithTime(commitTime); + instantTime = "001"; + client.startCommitWithTime(instantTime); - records = dataGen.generateUpdates(commitTime, records); + records = dataGen.generateUpdates(instantTime, records); writeRecords = jsc.parallelize(records, 1); - result = client.upsert(writeRecords, commitTime); + result = client.upsert(writeRecords, instantTime); - assertTrue("Commit should succeed", client.commit(commitTime, result)); + assertTrue("Commit should succeed", client.commit(instantTime, result)); assertTrue("After explicit commit, commit file should be created", - HoodieTestUtils.doesCommitExist(basePath, commitTime)); + HoodieTestUtils.doesCommitExist(basePath, instantTime)); // Read from commit file - filename = HoodieTestUtils.getCommitFilePath(basePath, commitTime); + filename = HoodieTestUtils.getCommitFilePath(basePath, instantTime); inputStream = new FileInputStream(filename); everything = FileIOUtils.readAsUTFString(inputStream); metadata = HoodieCommitMetadata.fromJsonString(everything.toString(), HoodieCommitMetadata.class); @@ -918,37 +918,37 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase { @Test public void testConsistencyCheckDuringFinalize() throws Exception { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); - String commitTime = "000"; + String instantTime = "000"; HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build(); HoodieWriteClient client = getHoodieWriteClient(cfg); - Pair> result = testConsistencyCheck(metaClient, commitTime); + Pair> result = testConsistencyCheck(metaClient, instantTime); // Delete orphan marker and commit should succeed metaClient.getFs().delete(result.getKey(), false); - assertTrue("Commit should succeed", client.commit(commitTime, result.getRight())); + assertTrue("Commit should succeed", client.commit(instantTime, result.getRight())); assertTrue("After explicit commit, commit file should be created", - HoodieTestUtils.doesCommitExist(basePath, commitTime)); + HoodieTestUtils.doesCommitExist(basePath, instantTime)); // Marker directory must be removed - assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(commitTime)))); + assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime)))); } @Test public void testRollbackAfterConsistencyCheckFailure() throws Exception { - String commitTime = "000"; + String instantTime = "000"; HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build(); HoodieWriteClient client = getHoodieWriteClient(cfg); - testConsistencyCheck(metaClient, commitTime); + testConsistencyCheck(metaClient, instantTime); // Rollback of this commit should succeed - client.rollback(commitTime); + client.rollback(instantTime); assertFalse("After explicit rollback, commit file should not be present", - HoodieTestUtils.doesCommitExist(basePath, commitTime)); + HoodieTestUtils.doesCommitExist(basePath, instantTime)); // Marker directory must be removed after rollback - assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(commitTime)))); + assertFalse(metaClient.getFs().exists(new Path(metaClient.getMarkerFolderPath(instantTime)))); } - private Pair> testConsistencyCheck(HoodieTableMetaClient metaClient, String commitTime) + private Pair> testConsistencyCheck(HoodieTableMetaClient metaClient, String instantTime) throws Exception { HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false) .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true) @@ -956,24 +956,24 @@ public class TestHoodieClientOnCopyOnWriteStorage extends TestHoodieClientBase { .build(); HoodieWriteClient client = getHoodieWriteClient(cfg); - client.startCommitWithTime(commitTime); - JavaRDD writeRecords = jsc.parallelize(dataGen.generateInserts(commitTime, 200), 1); - JavaRDD result = client.bulkInsert(writeRecords, commitTime); + client.startCommitWithTime(instantTime); + JavaRDD writeRecords = jsc.parallelize(dataGen.generateInserts(instantTime, 200), 1); + JavaRDD result = client.bulkInsert(writeRecords, instantTime); result.collect(); // Create a dummy marker file to simulate the case that a marker file was created without data file. // This should fail the commit String partitionPath = Arrays - .stream(fs.globStatus(new Path(String.format("%s/*/*/*/*", metaClient.getMarkerFolderPath(commitTime))), + .stream(fs.globStatus(new Path(String.format("%s/*/*/*/*", metaClient.getMarkerFolderPath(instantTime))), path -> path.toString().endsWith(HoodieTableMetaClient.MARKER_EXTN))) .limit(1).map(status -> status.getPath().getParent().toString()).collect(Collectors.toList()).get(0); Path markerFilePath = new Path(String.format("%s/%s", partitionPath, - FSUtils.makeMarkerFile(commitTime, "1-0-1", UUID.randomUUID().toString()))); + FSUtils.makeMarkerFile(instantTime, "1-0-1", UUID.randomUUID().toString()))); metaClient.getFs().create(markerFilePath); LOG.info("Created a dummy marker path=" + markerFilePath); try { - client.commit(commitTime, result); + client.commit(instantTime, result); fail("Commit should fail due to consistency check"); } catch (HoodieCommitException cme) { assertTrue(cme.getCause() instanceof HoodieIOException); diff --git a/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieReadClient.java b/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieReadClient.java index 6329e089f..cda4d8a70 100644 --- a/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieReadClient.java +++ b/hudi-client/src/test/java/org/apache/hudi/client/TestHoodieReadClient.java @@ -74,8 +74,8 @@ public class TestHoodieReadClient extends TestHoodieClientBase { @Test public void testReadFilterExistAfterBulkInsertPrepped() throws Exception { testReadFilterExist(getConfigBuilder().withBulkInsertParallelism(1).build(), - (writeClient, recordRDD, commitTime) -> { - return writeClient.bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty()); + (writeClient, recordRDD, instantTime) -> { + return writeClient.bulkInsertPreppedRecords(recordRDD, instantTime, Option.empty()); }); } @@ -178,8 +178,8 @@ public class TestHoodieReadClient extends TestHoodieClientBase { @Test public void testTagLocationAfterBulkInsertPrepped() throws Exception { testTagLocation( - getConfigBuilder().withBulkInsertParallelism(1).build(), (writeClient, recordRDD, commitTime) -> writeClient - .bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty()), + getConfigBuilder().withBulkInsertParallelism(1).build(), (writeClient, recordRDD, instantTime) -> writeClient + .bulkInsertPreppedRecords(recordRDD, instantTime, Option.empty()), HoodieWriteClient::upsertPreppedRecords, true); } diff --git a/hudi-client/src/test/java/org/apache/hudi/common/HoodieClientTestUtils.java b/hudi-client/src/test/java/org/apache/hudi/common/HoodieClientTestUtils.java index 9003d7162..b85de2cbb 100644 --- a/hudi-client/src/test/java/org/apache/hudi/common/HoodieClientTestUtils.java +++ b/hudi-client/src/test/java/org/apache/hudi/common/HoodieClientTestUtils.java @@ -115,30 +115,30 @@ public class HoodieClientTestUtils { return toReturn; } - private static void fakeMetaFile(String basePath, String commitTime, String suffix) throws IOException { + private static void fakeMetaFile(String basePath, String instantTime, String suffix) throws IOException { String parentPath = basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME; new File(parentPath).mkdirs(); - new File(parentPath + "/" + commitTime + suffix).createNewFile(); + new File(parentPath + "/" + instantTime + suffix).createNewFile(); } - public static void fakeCommitFile(String basePath, String commitTime) throws IOException { - fakeMetaFile(basePath, commitTime, HoodieTimeline.COMMIT_EXTENSION); + public static void fakeCommitFile(String basePath, String instantTime) throws IOException { + fakeMetaFile(basePath, instantTime, HoodieTimeline.COMMIT_EXTENSION); } - public static void fakeInFlightFile(String basePath, String commitTime) throws IOException { - fakeMetaFile(basePath, commitTime, HoodieTimeline.INFLIGHT_EXTENSION); + public static void fakeInFlightFile(String basePath, String instantTime) throws IOException { + fakeMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_EXTENSION); } - public static void fakeDataFile(String basePath, String partitionPath, String commitTime, String fileId) + public static void fakeDataFile(String basePath, String partitionPath, String instantTime, String fileId) throws Exception { - fakeDataFile(basePath, partitionPath, commitTime, fileId, 0); + fakeDataFile(basePath, partitionPath, instantTime, fileId, 0); } - public static void fakeDataFile(String basePath, String partitionPath, String commitTime, String fileId, long length) + public static void fakeDataFile(String basePath, String partitionPath, String instantTime, String fileId, long length) throws Exception { String parentPath = String.format("%s/%s", basePath, partitionPath); new File(parentPath).mkdirs(); - String path = String.format("%s/%s", parentPath, FSUtils.makeDataFileName(commitTime, "1-0-1", fileId)); + String path = String.format("%s/%s", parentPath, FSUtils.makeDataFileName(instantTime, "1-0-1", fileId)); new File(path).createNewFile(); new RandomAccessFile(path, "rw").setLength(length); } @@ -161,19 +161,19 @@ public class HoodieClientTestUtils { } public static Dataset readCommit(String basePath, SQLContext sqlContext, HoodieTimeline commitTimeline, - String commitTime) { - HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime); + String instantTime) { + HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, instantTime); if (!commitTimeline.containsInstant(commitInstant)) { - throw new HoodieException("No commit exists at " + commitTime); + throw new HoodieException("No commit exists at " + instantTime); } try { HashMap paths = getLatestFileIDsToFullPath(basePath, commitTimeline, Arrays.asList(commitInstant)); LOG.info("Path :" + paths.values()); return sqlContext.read().parquet(paths.values().toArray(new String[paths.size()])) - .filter(String.format("%s ='%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime)); + .filter(String.format("%s ='%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, instantTime)); } catch (Exception e) { - throw new HoodieException("Error reading commit " + commitTime, e); + throw new HoodieException("Error reading commit " + instantTime, e); } } @@ -225,16 +225,16 @@ public class HoodieClientTestUtils { } HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter); - String commitTime = FSUtils.getCommitTime(filename); + String instantTime = FSUtils.getCommitTime(filename); HoodieParquetConfig config = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP, ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 120 * 1024 * 1024, HoodieTestUtils.getDefaultHadoopConf(), Double.valueOf(HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO)); HoodieParquetWriter writer = - new HoodieParquetWriter(commitTime, new Path(basePath + "/" + partitionPath + "/" + filename), config, schema); + new HoodieParquetWriter(instantTime, new Path(basePath + "/" + partitionPath + "/" + filename), config, schema); int seqId = 1; for (HoodieRecord record : records) { GenericRecord avroRecord = (GenericRecord) record.getData().getInsertValue(schema).get(); - HoodieAvroUtils.addCommitMetadataToRecord(avroRecord, commitTime, "" + seqId++); + HoodieAvroUtils.addCommitMetadataToRecord(avroRecord, instantTime, "" + seqId++); HoodieAvroUtils.addHoodieKeyToRecord(avroRecord, record.getRecordKey(), record.getPartitionPath(), filename); writer.writeAvro(record.getRecordKey(), avroRecord); filter.add(record.getRecordKey()); @@ -243,7 +243,7 @@ public class HoodieClientTestUtils { if (createCommitTime) { HoodieTestUtils.createMetadataFolder(basePath); - HoodieTestUtils.createCommitFiles(basePath, commitTime); + HoodieTestUtils.createCommitFiles(basePath, instantTime); } return filename; } @@ -251,10 +251,10 @@ public class HoodieClientTestUtils { public static String writeParquetFile(String basePath, String partitionPath, List records, Schema schema, BloomFilter filter, boolean createCommitTime) throws IOException, InterruptedException { Thread.sleep(1000); - String commitTime = HoodieTestUtils.makeNewCommitTime(); + String instantTime = HoodieTestUtils.makeNewCommitTime(); String fileId = UUID.randomUUID().toString(); - String filename = FSUtils.makeDataFileName(commitTime, "1-0-1", fileId); - HoodieTestUtils.createCommitFiles(basePath, commitTime); + String filename = FSUtils.makeDataFileName(instantTime, "1-0-1", fileId); + HoodieTestUtils.createCommitFiles(basePath, instantTime); return HoodieClientTestUtils.writeParquetFile(basePath, partitionPath, filename, records, schema, filter, createCommitTime); } diff --git a/hudi-client/src/test/java/org/apache/hudi/common/HoodieTestDataGenerator.java b/hudi-client/src/test/java/org/apache/hudi/common/HoodieTestDataGenerator.java index 330788161..35a80900c 100644 --- a/hudi-client/src/test/java/org/apache/hudi/common/HoodieTestDataGenerator.java +++ b/hudi-client/src/test/java/org/apache/hudi/common/HoodieTestDataGenerator.java @@ -129,12 +129,12 @@ public class HoodieTestDataGenerator { * retaining the key if optionally provided. * * @param key Hoodie key. - * @param commitTime Commit time to use. + * @param instantTime Instant time to use. * @return Raw paylaod of a test record. * @throws IOException */ - public static TestRawTripPayload generateRandomValue(HoodieKey key, String commitTime) throws IOException { - return generateRandomValue(key, commitTime, false); + public static TestRawTripPayload generateRandomValue(HoodieKey key, String instantTime) throws IOException { + return generateRandomValue(key, instantTime, false); } /** @@ -142,15 +142,15 @@ public class HoodieTestDataGenerator { * retaining the key if optionally provided. * * @param key Hoodie key. - * @param commitTime Commit time to use. + * @param instantTime Commit time to use. * @param isFlattened whether the schema of the record should be flattened. * @return Raw paylaod of a test record. * @throws IOException */ public static TestRawTripPayload generateRandomValue( - HoodieKey key, String commitTime, boolean isFlattened) throws IOException { + HoodieKey key, String instantTime, boolean isFlattened) throws IOException { GenericRecord rec = generateGenericRecord( - key.getRecordKey(), "rider-" + commitTime, "driver-" + commitTime, 0.0, + key.getRecordKey(), "rider-" + instantTime, "driver-" + instantTime, 0.0, false, isFlattened); return new TestRawTripPayload(rec.toString(), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA); } @@ -158,8 +158,8 @@ public class HoodieTestDataGenerator { /** * Generates a new avro record of the above schema format for a delete. */ - public static TestRawTripPayload generateRandomDeleteValue(HoodieKey key, String commitTime) throws IOException { - GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + commitTime, "driver-" + commitTime, 0.0, + public static TestRawTripPayload generateRandomDeleteValue(HoodieKey key, String instantTime) throws IOException { + GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + instantTime, "driver-" + instantTime, 0.0, true, false); return new TestRawTripPayload(rec.toString(), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA); } @@ -167,8 +167,8 @@ public class HoodieTestDataGenerator { /** * Generates a new avro record of the above schema format, retaining the key if optionally provided. */ - public static HoodieAvroPayload generateAvroPayload(HoodieKey key, String commitTime) { - GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + commitTime, "driver-" + commitTime, 0.0); + public static HoodieAvroPayload generateAvroPayload(HoodieKey key, String instantTime) { + GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + instantTime, "driver-" + instantTime, 0.0); return new HoodieAvroPayload(Option.of(rec)); } @@ -208,9 +208,9 @@ public class HoodieTestDataGenerator { return rec; } - public static void createCommitFile(String basePath, String commitTime, Configuration configuration) { - Arrays.asList(HoodieTimeline.makeCommitFileName(commitTime), HoodieTimeline.makeInflightCommitFileName(commitTime), - HoodieTimeline.makeRequestedCommitFileName(commitTime)) + public static void createCommitFile(String basePath, String instantTime, Configuration configuration) { + Arrays.asList(HoodieTimeline.makeCommitFileName(instantTime), HoodieTimeline.makeInflightCommitFileName(instantTime), + HoodieTimeline.makeRequestedCommitFileName(instantTime)) .forEach(f -> { Path commitFile = new Path( basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + f); @@ -235,10 +235,10 @@ public class HoodieTestDataGenerator { }); } - public static void createCompactionRequestedFile(String basePath, String commitTime, Configuration configuration) + public static void createCompactionRequestedFile(String basePath, String instantTime, Configuration configuration) throws IOException { Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" - + HoodieTimeline.makeRequestedCompactionFileName(commitTime)); + + HoodieTimeline.makeRequestedCompactionFileName(instantTime)); FileSystem fs = FSUtils.getFs(basePath, configuration); FSDataOutputStream os = fs.create(commitFile, true); os.close(); @@ -256,10 +256,10 @@ public class HoodieTestDataGenerator { } } - public static void createSavepointFile(String basePath, String commitTime, Configuration configuration) + public static void createSavepointFile(String basePath, String instantTime, Configuration configuration) throws IOException { Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" - + HoodieTimeline.makeSavePointFileName(commitTime)); + + HoodieTimeline.makeSavePointFileName(instantTime)); FileSystem fs = FSUtils.getFs(basePath, configuration); try (FSDataOutputStream os = fs.create(commitFile, true)) { HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata(); @@ -272,28 +272,28 @@ public class HoodieTestDataGenerator { * Generates new inserts with nested schema, uniformly across the partition paths above. * It also updates the list of existing keys. */ - public List generateInserts(String commitTime, Integer n) { - return generateInserts(commitTime, n, false); + public List generateInserts(String instantTime, Integer n) { + return generateInserts(instantTime, n, false); } /** * Generates new inserts, uniformly across the partition paths above. * It also updates the list of existing keys. * - * @param commitTime Commit time to use. + * @param instantTime Commit time to use. * @param n Number of records. * @param isFlattened whether the schema of the generated record is flattened * @return List of {@link HoodieRecord}s */ - public List generateInserts(String commitTime, Integer n, boolean isFlattened) { - return generateInsertsStream(commitTime, n, isFlattened).collect(Collectors.toList()); + public List generateInserts(String instantTime, Integer n, boolean isFlattened) { + return generateInsertsStream(instantTime, n, isFlattened).collect(Collectors.toList()); } /** * Generates new inserts, uniformly across the partition paths above. It also updates the list of existing keys. */ public Stream generateInsertsStream( - String commitTime, Integer n, boolean isFlattened) { + String instantTime, Integer n, boolean isFlattened) { int currSize = getNumExistingKeys(); return IntStream.range(0, n).boxed().map(i -> { @@ -305,30 +305,30 @@ public class HoodieTestDataGenerator { existingKeys.put(currSize + i, kp); numExistingKeys++; try { - return new HoodieRecord(key, generateRandomValue(key, commitTime, isFlattened)); + return new HoodieRecord(key, generateRandomValue(key, instantTime, isFlattened)); } catch (IOException e) { throw new HoodieIOException(e.getMessage(), e); } }); } - public List generateSameKeyInserts(String commitTime, List origin) throws IOException { + public List generateSameKeyInserts(String instantTime, List origin) throws IOException { List copy = new ArrayList<>(); for (HoodieRecord r : origin) { HoodieKey key = r.getKey(); - HoodieRecord record = new HoodieRecord(key, generateRandomValue(key, commitTime)); + HoodieRecord record = new HoodieRecord(key, generateRandomValue(key, instantTime)); copy.add(record); } return copy; } - public List generateInsertsWithHoodieAvroPayload(String commitTime, int limit) { + public List generateInsertsWithHoodieAvroPayload(String instantTime, int limit) { List inserts = new ArrayList<>(); int currSize = getNumExistingKeys(); for (int i = 0; i < limit; i++) { String partitionPath = partitionPaths[RAND.nextInt(partitionPaths.length)]; HoodieKey key = new HoodieKey(UUID.randomUUID().toString(), partitionPath); - HoodieRecord record = new HoodieRecord(key, generateAvroPayload(key, commitTime)); + HoodieRecord record = new HoodieRecord(key, generateAvroPayload(key, instantTime)); inserts.add(record); KeyPartition kp = new KeyPartition(); @@ -340,17 +340,17 @@ public class HoodieTestDataGenerator { return inserts; } - public List generateUpdatesWithHoodieAvroPayload(String commitTime, List baseRecords) { + public List generateUpdatesWithHoodieAvroPayload(String instantTime, List baseRecords) { List updates = new ArrayList<>(); for (HoodieRecord baseRecord : baseRecords) { - HoodieRecord record = new HoodieRecord(baseRecord.getKey(), generateAvroPayload(baseRecord.getKey(), commitTime)); + HoodieRecord record = new HoodieRecord(baseRecord.getKey(), generateAvroPayload(baseRecord.getKey(), instantTime)); updates.add(record); } return updates; } - public List generateDeletes(String commitTime, Integer n) throws IOException { - List inserts = generateInserts(commitTime, n); + public List generateDeletes(String instantTime, Integer n) throws IOException { + List inserts = generateInserts(instantTime, n); return generateDeletesFromExistingRecords(inserts); } @@ -374,20 +374,20 @@ public class HoodieTestDataGenerator { return new HoodieRecord(key, payload); } - public HoodieRecord generateUpdateRecord(HoodieKey key, String commitTime) throws IOException { - return new HoodieRecord(key, generateRandomValue(key, commitTime)); + public HoodieRecord generateUpdateRecord(HoodieKey key, String instantTime) throws IOException { + return new HoodieRecord(key, generateRandomValue(key, instantTime)); } - public List generateUpdates(String commitTime, List baseRecords) throws IOException { + public List generateUpdates(String instantTime, List baseRecords) throws IOException { List updates = new ArrayList<>(); for (HoodieRecord baseRecord : baseRecords) { - HoodieRecord record = generateUpdateRecord(baseRecord.getKey(), commitTime); + HoodieRecord record = generateUpdateRecord(baseRecord.getKey(), instantTime); updates.add(record); } return updates; } - public List generateUpdatesWithDiffPartition(String commitTime, List baseRecords) + public List generateUpdatesWithDiffPartition(String instantTime, List baseRecords) throws IOException { List updates = new ArrayList<>(); for (HoodieRecord baseRecord : baseRecords) { @@ -399,7 +399,7 @@ public class HoodieTestDataGenerator { newPartition = partitionPaths[0]; } HoodieKey key = new HoodieKey(baseRecord.getRecordKey(), newPartition); - HoodieRecord record = generateUpdateRecord(key, commitTime); + HoodieRecord record = generateUpdateRecord(key, instantTime); updates.add(record); } return updates; @@ -409,15 +409,15 @@ public class HoodieTestDataGenerator { * Generates new updates, randomly distributed across the keys above. There can be duplicates within the returned * list * - * @param commitTime Commit Timestamp + * @param instantTime Instant Timestamp * @param n Number of updates (including dups) * @return list of hoodie record updates */ - public List generateUpdates(String commitTime, Integer n) throws IOException { + public List generateUpdates(String instantTime, Integer n) throws IOException { List updates = new ArrayList<>(); for (int i = 0; i < n; i++) { KeyPartition kp = existingKeys.get(RAND.nextInt(numExistingKeys - 1)); - HoodieRecord record = generateUpdateRecord(kp.key, commitTime); + HoodieRecord record = generateUpdateRecord(kp.key, instantTime); updates.add(record); } return updates; @@ -426,12 +426,12 @@ public class HoodieTestDataGenerator { /** * Generates deduped updates of keys previously inserted, randomly distributed across the keys above. * - * @param commitTime Commit Timestamp + * @param instantTime Instant Timestamp * @param n Number of unique records * @return list of hoodie record updates */ - public List generateUniqueUpdates(String commitTime, Integer n) { - return generateUniqueUpdatesStream(commitTime, n).collect(Collectors.toList()); + public List generateUniqueUpdates(String instantTime, Integer n) { + return generateUniqueUpdatesStream(instantTime, n).collect(Collectors.toList()); } /** @@ -447,11 +447,11 @@ public class HoodieTestDataGenerator { /** * Generates deduped updates of keys previously inserted, randomly distributed across the keys above. * - * @param commitTime Commit Timestamp + * @param instantTime Commit Timestamp * @param n Number of unique records * @return stream of hoodie record updates */ - public Stream generateUniqueUpdatesStream(String commitTime, Integer n) { + public Stream generateUniqueUpdatesStream(String instantTime, Integer n) { final Set used = new HashSet<>(); if (n > numExistingKeys) { throw new IllegalArgumentException("Requested unique updates is greater than number of available keys"); @@ -467,7 +467,7 @@ public class HoodieTestDataGenerator { } used.add(kp); try { - return new HoodieRecord(kp.key, generateRandomValue(kp.key, commitTime)); + return new HoodieRecord(kp.key, generateRandomValue(kp.key, instantTime)); } catch (IOException e) { throw new HoodieIOException(e.getMessage(), e); } @@ -505,11 +505,11 @@ public class HoodieTestDataGenerator { /** * Generates deduped delete records previously inserted, randomly distributed across the keys above. * - * @param commitTime Commit Timestamp + * @param instantTime Commit Timestamp * @param n Number of unique records * @return stream of hoodie records for delete */ - public Stream generateUniqueDeleteRecordStream(String commitTime, Integer n) { + public Stream generateUniqueDeleteRecordStream(String instantTime, Integer n) { final Set used = new HashSet<>(); if (n > numExistingKeys) { throw new IllegalArgumentException("Requested unique deletes is greater than number of available keys"); @@ -528,7 +528,7 @@ public class HoodieTestDataGenerator { numExistingKeys--; used.add(kp); try { - result.add(new HoodieRecord(kp.key, generateRandomDeleteValue(kp.key, commitTime))); + result.add(new HoodieRecord(kp.key, generateRandomDeleteValue(kp.key, instantTime))); } catch (IOException e) { throw new HoodieIOException(e.getMessage(), e); } diff --git a/hudi-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryExecutor.java b/hudi-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryExecutor.java index 8fd418ad8..58f898ca8 100644 --- a/hudi-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryExecutor.java +++ b/hudi-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryExecutor.java @@ -43,7 +43,7 @@ import static org.mockito.Mockito.when; public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness { - private final String commitTime = HoodieActiveTimeline.createNewInstantTime(); + private final String instantTime = HoodieActiveTimeline.createNewInstantTime(); @Before public void setUp() throws Exception { @@ -58,7 +58,7 @@ public class TestBoundedInMemoryExecutor extends HoodieClientTestHarness { @Test public void testExecutor() { - final List hoodieRecords = dataGen.generateInserts(commitTime, 100); + final List hoodieRecords = dataGen.generateInserts(instantTime, 100); HoodieWriteConfig hoodieWriteConfig = mock(HoodieWriteConfig.class); when(hoodieWriteConfig.getWriteBufferLimitBytes()).thenReturn(1024); diff --git a/hudi-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryQueue.java b/hudi-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryQueue.java index acd2ec151..72b3eff28 100644 --- a/hudi-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryQueue.java +++ b/hudi-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryQueue.java @@ -59,7 +59,7 @@ import static org.mockito.Mockito.when; public class TestBoundedInMemoryQueue extends HoodieClientTestHarness { - private final String commitTime = HoodieActiveTimeline.createNewInstantTime(); + private final String instantTime = HoodieActiveTimeline.createNewInstantTime(); @Before public void setUp() throws Exception { @@ -79,7 +79,7 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness { @Test(timeout = 60000) public void testRecordReading() throws Exception { final int numRecords = 128; - final List hoodieRecords = dataGen.generateInserts(commitTime, numRecords); + final List hoodieRecords = dataGen.generateInserts(instantTime, numRecords); final BoundedInMemoryQueue> queue = new BoundedInMemoryQueue(FileIOUtils.KB, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA)); // Produce @@ -126,7 +126,7 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness { Map> keyToProducerAndIndexMap = new HashMap<>(); for (int i = 0; i < numProducers; i++) { - List pRecs = dataGen.generateInserts(commitTime, numRecords); + List pRecs = dataGen.generateInserts(instantTime, numRecords); int j = 0; for (HoodieRecord r : pRecs) { Assert.assertTrue(!keyToProducerAndIndexMap.containsKey(r.getRecordKey())); @@ -209,7 +209,7 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness { @Test(timeout = 60000) public void testMemoryLimitForBuffering() throws Exception { final int numRecords = 128; - final List hoodieRecords = dataGen.generateInserts(commitTime, numRecords); + final List hoodieRecords = dataGen.generateInserts(instantTime, numRecords); // maximum number of records to keep in memory. final int recordLimit = 5; final SizeEstimator> sizeEstimator = new DefaultSizeEstimator<>(); @@ -258,7 +258,7 @@ public class TestBoundedInMemoryQueue extends HoodieClientTestHarness { @Test(timeout = 60000) public void testException() throws Exception { final int numRecords = 256; - final List hoodieRecords = dataGen.generateInserts(commitTime, numRecords); + final List hoodieRecords = dataGen.generateInserts(instantTime, numRecords); final SizeEstimator>> sizeEstimator = new DefaultSizeEstimator<>(); // queue memory limit HoodieInsertValueGenResult payload = diff --git a/hudi-client/src/test/java/org/apache/hudi/index/TestHoodieIndex.java b/hudi-client/src/test/java/org/apache/hudi/index/TestHoodieIndex.java index 46239db38..09b6debf6 100644 --- a/hudi-client/src/test/java/org/apache/hudi/index/TestHoodieIndex.java +++ b/hudi-client/src/test/java/org/apache/hudi/index/TestHoodieIndex.java @@ -125,7 +125,7 @@ public class TestHoodieIndex extends HoodieClientTestHarness { } @Override - public boolean rollbackCommit(String commitTime) { + public boolean rollbackCommit(String instantTime) { return false; } diff --git a/hudi-client/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageWriterFactory.java b/hudi-client/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageWriterFactory.java index b34a28fcf..6758377b3 100755 --- a/hudi-client/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageWriterFactory.java +++ b/hudi-client/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageWriterFactory.java @@ -40,18 +40,18 @@ public class TestHoodieStorageWriterFactory extends TestHoodieClientBase { @Test public void testGetStorageWriter() throws IOException { // parquet file format. - final String commitTime = "100"; + final String instantTime = "100"; final Path parquetPath = new Path(basePath + "/partition/path/f1_1-0-1_000.parquet"); final HoodieWriteConfig cfg = getConfig(); HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc); - HoodieStorageWriter parquetWriter = HoodieStorageWriterFactory.getStorageWriter(commitTime, + HoodieStorageWriter parquetWriter = HoodieStorageWriterFactory.getStorageWriter(instantTime, parquetPath, table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA); Assert.assertTrue(parquetWriter instanceof HoodieParquetWriter); // other file format exception. final Path logPath = new Path(basePath + "/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1"); try { - HoodieStorageWriter logWriter = HoodieStorageWriterFactory.getStorageWriter(commitTime, logPath, + HoodieStorageWriter logWriter = HoodieStorageWriterFactory.getStorageWriter(instantTime, logPath, table, cfg, HoodieTestDataGenerator.AVRO_SCHEMA); fail("should fail since log storage writer is not supported yet."); } catch (Exception e) { diff --git a/hudi-client/src/test/java/org/apache/hudi/table/TestCleaner.java b/hudi-client/src/test/java/org/apache/hudi/table/TestCleaner.java index f6ad2304b..1af19f528 100644 --- a/hudi-client/src/test/java/org/apache/hudi/table/TestCleaner.java +++ b/hudi-client/src/test/java/org/apache/hudi/table/TestCleaner.java @@ -131,9 +131,9 @@ public class TestCleaner extends TestHoodieClientBase { HoodieTable table = HoodieTable.getHoodieTable(metaClient, client.getConfig(), jsc); assertFalse(table.getCompletedCommitsTimeline().empty()); - String commitTime = table.getCompletedCommitsTimeline().getInstants().findFirst().get().getTimestamp(); + String instantTime = table.getCompletedCommitsTimeline().getInstants().findFirst().get().getTimestamp(); assertFalse(table.getCompletedCleanTimeline().empty()); - assertEquals("The clean instant should be the same as the commit instant", commitTime, + assertEquals("The clean instant should be the same as the commit instant", instantTime, table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp()); HoodieIndex index = HoodieIndex.createIndex(cfg, jsc); @@ -173,7 +173,7 @@ public class TestCleaner extends TestHoodieClientBase { @Test public void testBulkInsertPreppedAndCleanByVersions() throws Exception { testInsertAndCleanByVersions( - (client, recordRDD, commitTime) -> client.bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty()), + (client, recordRDD, instantTime) -> client.bulkInsertPreppedRecords(recordRDD, instantTime, Option.empty()), HoodieWriteClient::upsertPreppedRecords, true); } @@ -325,7 +325,7 @@ public class TestCleaner extends TestHoodieClientBase { @Test public void testBulkInsertPreppedAndCleanByCommits() throws Exception { testInsertAndCleanByCommits( - (client, recordRDD, commitTime) -> client.bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty()), + (client, recordRDD, instantTime) -> client.bulkInsertPreppedRecords(recordRDD, instantTime, Option.empty()), HoodieWriteClient::upsertPreppedRecords, true); } @@ -598,7 +598,7 @@ public class TestCleaner extends TestHoodieClientBase { @Test public void testUpgradeDowngrade() { - String commitTime = "000"; + String instantTime = "000"; String partition1 = DEFAULT_PARTITION_PATHS[0]; String partition2 = DEFAULT_PARTITION_PATHS[1]; @@ -616,7 +616,7 @@ public class TestCleaner extends TestHoodieClientBase { // create partition1 clean stat. HoodieCleanStat cleanStat1 = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, partition1, deletePathPatterns1, successDeleteFiles1, - failedDeleteFiles1, commitTime); + failedDeleteFiles1, instantTime); List deletePathPatterns2 = new ArrayList<>(); List successDeleteFiles2 = new ArrayList<>(); @@ -625,7 +625,7 @@ public class TestCleaner extends TestHoodieClientBase { // create partition2 empty clean stat. HoodieCleanStat cleanStat2 = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_COMMITS, partition2, deletePathPatterns2, successDeleteFiles2, - failedDeleteFiles2, commitTime); + failedDeleteFiles2, instantTime); // map with absolute file path. Map oldExpected = new HashMap<>(); @@ -639,7 +639,7 @@ public class TestCleaner extends TestHoodieClientBase { newExpected.put(partition2, new Tuple3<>(deletePathPatterns2, successDeleteFiles2, failedDeleteFiles2)); HoodieCleanMetadata metadata = - CleanerUtils.convertCleanMetadata(metaClient, commitTime, Option.of(0L), Arrays.asList(cleanStat1, cleanStat2)); + CleanerUtils.convertCleanMetadata(metaClient, instantTime, Option.of(0L), Arrays.asList(cleanStat1, cleanStat2)); metadata.setVersion(CleanerUtils.CLEAN_METADATA_VERSION_1); // NOw upgrade and check @@ -1107,15 +1107,15 @@ public class TestCleaner extends TestHoodieClientBase { /** * Utility method to create temporary data files. * - * @param commitTime Commit Timestamp + * @param instantTime Commit Timestamp * @param numFiles Number for files to be generated * @return generated files * @throws IOException in case of error */ - private List createMarkerFiles(String commitTime, int numFiles) throws IOException { + private List createMarkerFiles(String instantTime, int numFiles) throws IOException { List files = new ArrayList<>(); for (int i = 0; i < numFiles; i++) { - files.add(HoodieTestUtils.createNewMarkerFile(basePath, "2019/03/29", commitTime)); + files.add(HoodieTestUtils.createNewMarkerFile(basePath, "2019/03/29", instantTime)); } return files; } diff --git a/hudi-client/src/test/java/org/apache/hudi/table/TestCopyOnWriteTable.java b/hudi-client/src/test/java/org/apache/hudi/table/TestCopyOnWriteTable.java index e62f5e574..c88233df8 100644 --- a/hudi-client/src/test/java/org/apache/hudi/table/TestCopyOnWriteTable.java +++ b/hudi-client/src/test/java/org/apache/hudi/table/TestCopyOnWriteTable.java @@ -93,7 +93,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness { String fileName = UUID.randomUUID().toString(); String partitionPath = "2016/05/04"; - String commitTime = HoodieTestUtils.makeNewCommitTime(); + String instantTime = HoodieTestUtils.makeNewCommitTime(); HoodieWriteConfig config = makeHoodieClientConfig(); metaClient = HoodieTableMetaClient.reload(metaClient); HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc); @@ -103,12 +103,12 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness { when(record.getPartitionPath()).thenReturn(partitionPath); String writeToken = FSUtils.makeWriteToken(TaskContext.getPartitionId(), TaskContext.get().stageId(), TaskContext.get().taskAttemptId()); - HoodieCreateHandle io = new HoodieCreateHandle(config, commitTime, table, partitionPath, fileName); + HoodieCreateHandle io = new HoodieCreateHandle(config, instantTime, table, partitionPath, fileName); return Pair.of(io.makeNewPath(record.getPartitionPath()), writeToken); }).collect().get(0); Assert.assertEquals(newPathWithWriteToken.getKey().toString(), this.basePath + "/" + partitionPath + "/" - + FSUtils.makeDataFileName(commitTime, newPathWithWriteToken.getRight(), fileName)); + + FSUtils.makeDataFileName(instantTime, newPathWithWriteToken.getRight(), fileName)); } private HoodieWriteConfig makeHoodieClientConfig() throws Exception { @@ -306,7 +306,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness { @Test public void testInsertRecords() throws Exception { HoodieWriteConfig config = makeHoodieClientConfig(); - String commitTime = HoodieTestUtils.makeNewCommitTime(); + String instantTime = HoodieTestUtils.makeNewCommitTime(); HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc); metaClient = HoodieTableMetaClient.reload(metaClient); @@ -318,7 +318,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness { // Insert new records final List recs2 = records; List returnedStatuses = jsc.parallelize(Arrays.asList(1)).map(x -> { - return table.handleInsert(commitTime, FSUtils.createNewFileIdPfx(), recs2.iterator()); + return table.handleInsert(instantTime, FSUtils.createNewFileIdPfx(), recs2.iterator()); }).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect(); // TODO: check the actual files and make sure 11 records, total were written. @@ -340,7 +340,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness { final List recs3 = records; returnedStatuses = jsc.parallelize(Arrays.asList(1)).map(x -> { - return table.handleInsert(commitTime, FSUtils.createNewFileIdPfx(), recs3.iterator()); + return table.handleInsert(instantTime, FSUtils.createNewFileIdPfx(), recs3.iterator()); }).flatMap(x -> HoodieClientTestUtils.collectStatuses(x).iterator()).collect(); assertEquals(3, returnedStatuses.size()); @@ -359,7 +359,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness { public void testFileSizeUpsertRecords() throws Exception { HoodieWriteConfig config = makeHoodieClientConfigBuilder().withStorageConfig(HoodieStorageConfig.newBuilder() .limitFileSize(64 * 1024).parquetBlockSize(64 * 1024).parquetPageSize(64 * 1024).build()).build(); - String commitTime = HoodieTestUtils.makeNewCommitTime(); + String instantTime = HoodieTestUtils.makeNewCommitTime(); metaClient = HoodieTableMetaClient.reload(metaClient); HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc); @@ -374,13 +374,13 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness { // Insert new records jsc.parallelize(Arrays.asList(1)) - .map(i -> table.handleInsert(commitTime, FSUtils.createNewFileIdPfx(), records.iterator())) + .map(i -> table.handleInsert(instantTime, FSUtils.createNewFileIdPfx(), records.iterator())) .map(x -> HoodieClientTestUtils.collectStatuses(x)).collect(); // Check the updated file int counts = 0; for (File file : new File(basePath + "/2016/01/31").listFiles()) { - if (file.getName().endsWith(".parquet") && FSUtils.getCommitTime(file.getName()).equals(commitTime)) { + if (file.getName().endsWith(".parquet") && FSUtils.getCommitTime(file.getName()).equals(instantTime)) { LOG.info(file.getName() + "-" + file.length()); counts++; } @@ -471,11 +471,11 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness { .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1000 * 1024).build()).build(); metaClient = HoodieTableMetaClient.reload(metaClient); final HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc); - String commitTime = "000"; + String instantTime = "000"; // Perform inserts of 100 records to test CreateHandle and BufferedExecutor - final List inserts = dataGen.generateInsertsWithHoodieAvroPayload(commitTime, 100); + final List inserts = dataGen.generateInsertsWithHoodieAvroPayload(instantTime, 100); final List> ws = jsc.parallelize(Arrays.asList(1)).map(x -> { - return table.handleInsert(commitTime, UUID.randomUUID().toString(), inserts.iterator()); + return table.handleInsert(instantTime, UUID.randomUUID().toString(), inserts.iterator()); }).map(x -> (List) HoodieClientTestUtils.collectStatuses(x)).collect(); WriteStatus writeStatus = ws.get(0).get(0); @@ -483,12 +483,12 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness { metaClient.getFs().create(new Path(basePath + "/.hoodie/000.commit")).close(); final HoodieCopyOnWriteTable table2 = new HoodieCopyOnWriteTable(config, jsc); final List updates = - dataGen.generateUpdatesWithHoodieAvroPayload(commitTime, inserts); + dataGen.generateUpdatesWithHoodieAvroPayload(instantTime, inserts); String partitionPath = updates.get(0).getPartitionPath(); long numRecordsInPartition = updates.stream().filter(u -> u.getPartitionPath().equals(partitionPath)).count(); final List> updateStatus = jsc.parallelize(Arrays.asList(1)).map(x -> { - return table.handleUpdate(commitTime, partitionPath, fileId, updates.iterator()); + return table.handleUpdate(instantTime, partitionPath, fileId, updates.iterator()); }).map(x -> (List) HoodieClientTestUtils.collectStatuses(x)).collect(); assertEquals(updates.size() - numRecordsInPartition, updateStatus.get(0).get(0).getTotalErrorRecords()); } diff --git a/hudi-client/src/test/java/org/apache/hudi/table/TestMergeOnReadTable.java b/hudi-client/src/test/java/org/apache/hudi/table/TestMergeOnReadTable.java index 4d6c5bcc9..6dbb89fc2 100644 --- a/hudi-client/src/test/java/org/apache/hudi/table/TestMergeOnReadTable.java +++ b/hudi-client/src/test/java/org/apache/hudi/table/TestMergeOnReadTable.java @@ -839,11 +839,11 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness { Assert.assertTrue(numLogFiles > 0); // Do a compaction - String commitTime = writeClient.scheduleCompaction(Option.empty()).get().toString(); - statuses = writeClient.compact(commitTime); + String instantTime = writeClient.scheduleCompaction(Option.empty()).get().toString(); + statuses = writeClient.compact(instantTime); assertEquals(statuses.map(status -> status.getStat().getPath().contains("parquet")).count(), numLogFiles); Assert.assertEquals(statuses.count(), numLogFiles); - writeClient.commitCompaction(commitTime, statuses, Option.empty()); + writeClient.commitCompaction(instantTime, statuses, Option.empty()); } } @@ -991,14 +991,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness { instant = new HoodieInstant(State.INFLIGHT, commitActionType, "000"); activeTimeline.saveAsComplete(instant, Option.empty()); - String commitTime = "001"; - client.startCommitWithTime(commitTime); + String instantTime = "001"; + client.startCommitWithTime(instantTime); - List records = dataGen.generateInserts(commitTime, 200); + List records = dataGen.generateInserts(instantTime, 200); JavaRDD writeRecords = jsc.parallelize(records, 1); - JavaRDD statuses = client.insert(writeRecords, commitTime); - assertTrue("Commit should succeed", client.commit(commitTime, statuses)); + JavaRDD statuses = client.insert(writeRecords, instantTime); + assertTrue("Commit should succeed", client.commit(instantTime, statuses)); // Read from commit file table = HoodieTable.getHoodieTable(metaClient, cfg, jsc); @@ -1018,12 +1018,12 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness { } Assert.assertEquals(inserts, 200); - commitTime = "002"; - client.startCommitWithTime(commitTime); - records = dataGen.generateUpdates(commitTime, records); + instantTime = "002"; + client.startCommitWithTime(instantTime); + records = dataGen.generateUpdates(instantTime, records); writeRecords = jsc.parallelize(records, 1); - statuses = client.upsert(writeRecords, commitTime); - assertTrue("Commit should succeed", client.commit(commitTime, statuses)); + statuses = client.upsert(writeRecords, instantTime); + assertTrue("Commit should succeed", client.commit(instantTime, statuses)); // Read from commit file table = HoodieTable.getHoodieTable(metaClient, cfg, jsc); @@ -1047,7 +1047,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness { Assert.assertEquals(inserts, 200); Assert.assertEquals(upserts, 200); - client.rollback(commitTime); + client.rollback(instantTime); // Read from commit file table = HoodieTable.getHoodieTable(metaClient, cfg, jsc); @@ -1084,14 +1084,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness { Map fileIdToInsertsMap = new HashMap<>(); Map fileIdToUpsertsMap = new HashMap<>(); - String commitTime = "000"; - client.startCommitWithTime(commitTime); + String instantTime = "000"; + client.startCommitWithTime(instantTime); - List records = dataGen.generateInserts(commitTime, 200); + List records = dataGen.generateInserts(instantTime, 200); JavaRDD writeRecords = jsc.parallelize(records, 1); - JavaRDD statuses = client.insert(writeRecords, commitTime); - assertTrue("Commit should succeed", client.commit(commitTime, statuses)); + JavaRDD statuses = client.insert(writeRecords, instantTime); + assertTrue("Commit should succeed", client.commit(instantTime, statuses)); // Read from commit file HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc); @@ -1113,14 +1113,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness { } Assert.assertEquals(inserts, 200); - commitTime = "001"; - client.startCommitWithTime(commitTime); + instantTime = "001"; + client.startCommitWithTime(instantTime); // generate updates + inserts. inserts should be handled into small files - records = dataGen.generateUpdates(commitTime, records); - records.addAll(dataGen.generateInserts(commitTime, 200)); + records = dataGen.generateUpdates(instantTime, records); + records.addAll(dataGen.generateInserts(instantTime, 200)); writeRecords = jsc.parallelize(records, 1); - statuses = client.upsert(writeRecords, commitTime); - assertTrue("Commit should succeed", client.commit(commitTime, statuses)); + statuses = client.upsert(writeRecords, instantTime); + assertTrue("Commit should succeed", client.commit(instantTime, statuses)); // Read from commit file table = HoodieTable.getHoodieTable(metaClient, cfg, jsc); @@ -1148,10 +1148,10 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness { Assert.assertEquals(upserts, 200); // Test small file handling after compaction - commitTime = "002"; - client.scheduleCompactionAtInstant(commitTime, Option.of(metadata.getExtraMetadata())); - statuses = client.compact(commitTime); - client.commitCompaction(commitTime, statuses, Option.empty()); + instantTime = "002"; + client.scheduleCompactionAtInstant(instantTime, Option.of(metadata.getExtraMetadata())); + statuses = client.compact(instantTime); + client.commitCompaction(instantTime, statuses, Option.empty()); // Read from commit file table = HoodieTable.getHoodieTable(metaClient, cfg, jsc); @@ -1172,14 +1172,14 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness { } // Write inserts + updates - commitTime = "003"; - client.startCommitWithTime(commitTime); + instantTime = "003"; + client.startCommitWithTime(instantTime); // generate updates + inserts. inserts should be handled into small files - records = dataGen.generateUpdates(commitTime, records); - records.addAll(dataGen.generateInserts(commitTime, 200)); + records = dataGen.generateUpdates(instantTime, records); + records.addAll(dataGen.generateInserts(instantTime, 200)); writeRecords = jsc.parallelize(records, 1); - statuses = client.upsert(writeRecords, commitTime); - assertTrue("Commit should succeed", client.commit(commitTime, statuses)); + statuses = client.upsert(writeRecords, instantTime); + assertTrue("Commit should succeed", client.commit(instantTime, statuses)); // Read from commit file table = HoodieTable.getHoodieTable(metaClient, cfg, jsc); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieFileGroup.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieFileGroup.java index 83e38d4c3..ef33e8abe 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieFileGroup.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieFileGroup.java @@ -160,15 +160,15 @@ public class HoodieFileGroup implements Serializable { } /** - * Obtain the latest file slice, upto a commitTime i.e <= maxCommitTime. + * Obtain the latest file slice, upto a instantTime i.e <= maxInstantTime. */ - public Option getLatestFileSliceBeforeOrOn(String maxCommitTime) { + public Option getLatestFileSliceBeforeOrOn(String maxInstantTime) { return Option.fromJavaOptional(getAllFileSlices().filter(slice -> HoodieTimeline - .compareTimestamps(slice.getBaseInstantTime(), maxCommitTime, HoodieTimeline.LESSER_OR_EQUAL)).findFirst()); + .compareTimestamps(slice.getBaseInstantTime(), maxInstantTime, HoodieTimeline.LESSER_OR_EQUAL)).findFirst()); } /** - * Obtain the latest file slice, upto a commitTime i.e < maxInstantTime. + * Obtain the latest file slice, upto an instantTime i.e < maxInstantTime. * * @param maxInstantTime Max Instant Time * @return diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java index 013869c52..faad46653 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java @@ -65,9 +65,9 @@ public class HoodiePartitionMetadata { /** * Construct metadata object to be written out. */ - public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath, Path partitionPath) { + public HoodiePartitionMetadata(FileSystem fs, String instantTime, Path basePath, Path partitionPath) { this(fs, partitionPath); - props.setProperty(COMMIT_TIME_KEY, commitTime); + props.setProperty(COMMIT_TIME_KEY, instantTime); props.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth())); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java index 843cc7030..c0b41ae78 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java @@ -160,8 +160,8 @@ public class HoodieRecord implements Serializable return sb.toString(); } - public static String generateSequenceId(String commitTime, int partitionId, long recordIndex) { - return commitTime + "_" + partitionId + "_" + recordIndex; + public static String generateSequenceId(String instantTime, int partitionId, long recordIndex) { + return instantTime + "_" + partitionId + "_" + recordIndex; } public String getPartitionPath() { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTimeline.java index 575a9ea1a..bfc18d2f3 100755 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTimeline.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTimeline.java @@ -142,7 +142,7 @@ public interface HoodieTimeline extends Serializable { /** * Create a new Timeline with all the instants after startTs. */ - HoodieTimeline findInstantsAfter(String commitTime, int numCommits); + HoodieTimeline findInstantsAfter(String instantTime, int numCommits); /** * Custom Filter of Instants. @@ -280,16 +280,16 @@ public interface HoodieTimeline extends Serializable { return new HoodieInstant(true, instant.getAction(), instant.getTimestamp()); } - static String makeCommitFileName(String commitTime) { - return StringUtils.join(commitTime, HoodieTimeline.COMMIT_EXTENSION); + static String makeCommitFileName(String instantTime) { + return StringUtils.join(instantTime, HoodieTimeline.COMMIT_EXTENSION); } - static String makeInflightCommitFileName(String commitTime) { - return StringUtils.join(commitTime, HoodieTimeline.INFLIGHT_COMMIT_EXTENSION); + static String makeInflightCommitFileName(String instantTime) { + return StringUtils.join(instantTime, HoodieTimeline.INFLIGHT_COMMIT_EXTENSION); } - static String makeRequestedCommitFileName(String commitTime) { - return StringUtils.join(commitTime, HoodieTimeline.REQUESTED_COMMIT_EXTENSION); + static String makeRequestedCommitFileName(String instantTime) { + return StringUtils.join(instantTime, HoodieTimeline.REQUESTED_COMMIT_EXTENSION); } static String makeCleanerFileName(String instant) { @@ -312,28 +312,28 @@ public interface HoodieTimeline extends Serializable { return StringUtils.join(instant, HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION); } - static String makeInflightSavePointFileName(String commitTime) { - return StringUtils.join(commitTime, HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION); + static String makeInflightSavePointFileName(String instantTime) { + return StringUtils.join(instantTime, HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION); } - static String makeSavePointFileName(String commitTime) { - return StringUtils.join(commitTime, HoodieTimeline.SAVEPOINT_EXTENSION); + static String makeSavePointFileName(String instantTime) { + return StringUtils.join(instantTime, HoodieTimeline.SAVEPOINT_EXTENSION); } - static String makeInflightDeltaFileName(String commitTime) { - return StringUtils.join(commitTime, HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION); + static String makeInflightDeltaFileName(String instantTime) { + return StringUtils.join(instantTime, HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION); } - static String makeRequestedDeltaFileName(String commitTime) { - return StringUtils.join(commitTime, HoodieTimeline.REQUESTED_DELTA_COMMIT_EXTENSION); + static String makeRequestedDeltaFileName(String instantTime) { + return StringUtils.join(instantTime, HoodieTimeline.REQUESTED_DELTA_COMMIT_EXTENSION); } - static String makeInflightCompactionFileName(String commitTime) { - return StringUtils.join(commitTime, HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION); + static String makeInflightCompactionFileName(String instantTime) { + return StringUtils.join(instantTime, HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION); } - static String makeRequestedCompactionFileName(String commitTime) { - return StringUtils.join(commitTime, HoodieTimeline.REQUESTED_COMPACTION_EXTENSION); + static String makeRequestedCompactionFileName(String instantTime) { + return StringUtils.join(instantTime, HoodieTimeline.REQUESTED_COMPACTION_EXTENSION); } static String makeRestoreFileName(String instant) { @@ -344,8 +344,8 @@ public interface HoodieTimeline extends Serializable { return StringUtils.join(instant, HoodieTimeline.INFLIGHT_RESTORE_EXTENSION); } - static String makeDeltaFileName(String commitTime) { - return commitTime + HoodieTimeline.DELTA_COMMIT_EXTENSION; + static String makeDeltaFileName(String instantTime) { + return instantTime + HoodieTimeline.DELTA_COMMIT_EXTENSION; } static String getCommitFromCommitFile(String commitFileName) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormat.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormat.java index a7165fb9b..d1d35c0dc 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormat.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormat.java @@ -121,7 +121,7 @@ public interface HoodieLogFormat { // File Id private String logFileId; // File Commit Time stamp - private String commitTime; + private String instantTime; // version number for this log file. If not specified, then the current version will be // computed by inspecting the file system private Integer logVersion; @@ -173,7 +173,7 @@ public interface HoodieLogFormat { } public WriterBuilder overBaseCommit(String baseCommit) { - this.commitTime = baseCommit; + this.instantTime = baseCommit; return this; } @@ -195,7 +195,7 @@ public interface HoodieLogFormat { if (logFileId == null) { throw new IllegalArgumentException("FileID is not specified"); } - if (commitTime == null) { + if (instantTime == null) { throw new IllegalArgumentException("BaseCommitTime is not specified"); } if (fileExtension == null) { @@ -212,7 +212,7 @@ public interface HoodieLogFormat { if (logVersion == null) { LOG.info("Computing the next log version for " + logFileId + " in " + parentPath); Option> versionAndWriteToken = - FSUtils.getLatestLogVersion(fs, parentPath, logFileId, fileExtension, commitTime); + FSUtils.getLatestLogVersion(fs, parentPath, logFileId, fileExtension, instantTime); if (versionAndWriteToken.isPresent()) { logVersion = versionAndWriteToken.get().getKey(); logWriteToken = versionAndWriteToken.get().getValue(); @@ -233,7 +233,7 @@ public interface HoodieLogFormat { } Path logPath = new Path(parentPath, - FSUtils.makeLogFileName(logFileId, fileExtension, commitTime, logVersion, logWriteToken)); + FSUtils.makeLogFileName(logFileId, fileExtension, instantTime, logVersion, logWriteToken)); LOG.info("HoodieLogFile on path " + logPath); HoodieLogFile logFile = new HoodieLogFile(logPath); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java index a2ad80c0e..d544628ef 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java @@ -123,14 +123,14 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline { } private HoodieInstant readCommit(GenericRecord record, boolean loadDetails) { - final String commitTime = record.get(HoodiePartitionMetadata.COMMIT_TIME_KEY).toString(); + final String instantTime = record.get(HoodiePartitionMetadata.COMMIT_TIME_KEY).toString(); final String action = record.get(ACTION_TYPE_KEY).toString(); if (loadDetails) { Option.ofNullable(record.get(getMetadataKey(action))).map(actionData -> - this.readCommits.put(commitTime, actionData.toString().getBytes(StandardCharsets.UTF_8)) + this.readCommits.put(instantTime, actionData.toString().getBytes(StandardCharsets.UTF_8)) ); } - return new HoodieInstant(false, action, commitTime); + return new HoodieInstant(false, action, instantTime); } private String getMetadataKey(String action) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java index 86431c904..b3d9ae53b 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java @@ -130,9 +130,9 @@ public class HoodieDefaultTimeline implements HoodieTimeline { } @Override - public HoodieDefaultTimeline findInstantsAfter(String commitTime, int numCommits) { + public HoodieDefaultTimeline findInstantsAfter(String instantTime, int numCommits) { return new HoodieDefaultTimeline(instants.stream() - .filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER)).limit(numCommits), + .filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), instantTime, GREATER)).limit(numCommits), details); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/FSUtils.java index add03d571..5eff951e2 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/FSUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/FSUtils.java @@ -105,12 +105,12 @@ public class FSUtils { return String.format("%d-%d-%d", taskPartitionId, stageId, taskAttemptId); } - public static String makeDataFileName(String commitTime, String writeToken, String fileId) { - return String.format("%s_%s_%s.parquet", fileId, writeToken, commitTime); + public static String makeDataFileName(String instantTime, String writeToken, String fileId) { + return String.format("%s_%s_%s.parquet", fileId, writeToken, instantTime); } - public static String makeMarkerFile(String commitTime, String writeToken, String fileId) { - return String.format("%s_%s_%s%s", fileId, writeToken, commitTime, HoodieTableMetaClient.MARKER_EXTN); + public static String makeMarkerFile(String instantTime, String writeToken, String fileId) { + return String.format("%s_%s_%s%s", fileId, writeToken, instantTime, HoodieTableMetaClient.MARKER_EXTN); } public static String translateMarkerToDataPath(String basePath, String markerPath, String instantTs) { @@ -125,8 +125,8 @@ public class FSUtils { HoodieFileFormat.PARQUET.getFileExtension()); } - public static String maskWithoutFileId(String commitTime, int taskPartitionId) { - return String.format("*_%s_%s%s", taskPartitionId, commitTime, HoodieFileFormat.PARQUET.getFileExtension()); + public static String maskWithoutFileId(String instantTime, int taskPartitionId) { + return String.format("*_%s_%s%s", taskPartitionId, instantTime, HoodieFileFormat.PARQUET.getFileExtension()); } public static String getCommitFromCommitFile(String commitFileName) { @@ -282,7 +282,7 @@ public class FSUtils { } /** - * Get the first part of the file name in the log file. That will be the fileId. Log file do not have commitTime in + * Get the first part of the file name in the log file. That will be the fileId. Log file do not have instantTime in * the file name. */ public static String getFileIdFromLogPath(Path path) { @@ -304,7 +304,7 @@ public class FSUtils { } /** - * Get the first part of the file name in the log file. That will be the fileId. Log file do not have commitTime in + * Get the first part of the file name in the log file. That will be the fileId. Log file do not have instantTime in * the file name. */ public static String getBaseCommitTimeFromLogPath(Path path) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/HoodieAvroUtils.java index 02e48e3a4..95cb8b9a8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/HoodieAvroUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/HoodieAvroUtils.java @@ -180,8 +180,8 @@ public class HoodieAvroUtils { /** * Adds the Hoodie commit metadata into the provided Generic Record. */ - public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime, String commitSeqno) { - record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime); + public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String instantTime, String commitSeqno) { + record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, instantTime); record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, commitSeqno); return record; } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/HoodieTestUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/model/HoodieTestUtils.java index cae3d8957..c5fa5b302 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/model/HoodieTestUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/model/HoodieTestUtils.java @@ -124,16 +124,16 @@ public class HoodieTestUtils { return new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); } - public static void createCommitFiles(String basePath, String... commitTimes) throws IOException { - for (String commitTime : commitTimes) { + public static void createCommitFiles(String basePath, String... instantTimes) throws IOException { + for (String instantTime : instantTimes) { new File( basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" - + HoodieTimeline.makeRequestedCommitFileName(commitTime)).createNewFile(); + + HoodieTimeline.makeRequestedCommitFileName(instantTime)).createNewFile(); new File( basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" - + HoodieTimeline.makeInflightCommitFileName(commitTime)).createNewFile(); + + HoodieTimeline.makeInflightCommitFileName(instantTime)).createNewFile(); new File( - basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCommitFileName(commitTime)) + basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCommitFileName(instantTime)) .createNewFile(); } } @@ -142,20 +142,20 @@ public class HoodieTestUtils { new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME).mkdirs(); } - public static void createInflightCommitFiles(String basePath, String... commitTimes) throws IOException { + public static void createInflightCommitFiles(String basePath, String... instantTimes) throws IOException { - for (String commitTime : commitTimes) { + for (String instantTime : instantTimes) { new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" - + HoodieTimeline.makeRequestedCommitFileName(commitTime)).createNewFile(); + + HoodieTimeline.makeRequestedCommitFileName(instantTime)).createNewFile(); new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeInflightCommitFileName( - commitTime)).createNewFile(); + instantTime)).createNewFile(); } } - public static void createPendingCleanFiles(HoodieTableMetaClient metaClient, String... commitTimes) { - for (String commitTime : commitTimes) { - Arrays.asList(HoodieTimeline.makeRequestedCleanerFileName(commitTime), - HoodieTimeline.makeInflightCleanerFileName(commitTime)).forEach(f -> { + public static void createPendingCleanFiles(HoodieTableMetaClient metaClient, String... instantTimes) { + for (String instantTime : instantTimes) { + Arrays.asList(HoodieTimeline.makeRequestedCleanerFileName(instantTime), + HoodieTimeline.makeInflightCleanerFileName(instantTime)).forEach(f -> { FSDataOutputStream os = null; try { Path commitFile = new Path( @@ -179,59 +179,59 @@ public class HoodieTestUtils { } } - public static String createNewDataFile(String basePath, String partitionPath, String commitTime) + public static String createNewDataFile(String basePath, String partitionPath, String instantTime) throws IOException { String fileID = UUID.randomUUID().toString(); - return createDataFile(basePath, partitionPath, commitTime, fileID); + return createDataFile(basePath, partitionPath, instantTime, fileID); } - public static String createNewMarkerFile(String basePath, String partitionPath, String commitTime) + public static String createNewMarkerFile(String basePath, String partitionPath, String instantTime) throws IOException { String fileID = UUID.randomUUID().toString(); - return createMarkerFile(basePath, partitionPath, commitTime, fileID); + return createMarkerFile(basePath, partitionPath, instantTime, fileID); } - public static String createDataFile(String basePath, String partitionPath, String commitTime, String fileID) + public static String createDataFile(String basePath, String partitionPath, String instantTime, String fileID) throws IOException { String folderPath = basePath + "/" + partitionPath + "/"; new File(folderPath).mkdirs(); - new File(folderPath + FSUtils.makeDataFileName(commitTime, DEFAULT_WRITE_TOKEN, fileID)).createNewFile(); + new File(folderPath + FSUtils.makeDataFileName(instantTime, DEFAULT_WRITE_TOKEN, fileID)).createNewFile(); return fileID; } - public static String createMarkerFile(String basePath, String partitionPath, String commitTime, String fileID) + public static String createMarkerFile(String basePath, String partitionPath, String instantTime, String fileID) throws IOException { String folderPath = - basePath + "/" + HoodieTableMetaClient.TEMPFOLDER_NAME + "/" + commitTime + "/" + partitionPath + "/"; + basePath + "/" + HoodieTableMetaClient.TEMPFOLDER_NAME + "/" + instantTime + "/" + partitionPath + "/"; new File(folderPath).mkdirs(); - File f = new File(folderPath + FSUtils.makeMarkerFile(commitTime, DEFAULT_WRITE_TOKEN, fileID)); + File f = new File(folderPath + FSUtils.makeMarkerFile(instantTime, DEFAULT_WRITE_TOKEN, fileID)); f.createNewFile(); return f.getAbsolutePath(); } - public static String createNewLogFile(FileSystem fs, String basePath, String partitionPath, String commitTime, + public static String createNewLogFile(FileSystem fs, String basePath, String partitionPath, String instantTime, String fileID, Option version) throws IOException { String folderPath = basePath + "/" + partitionPath + "/"; boolean makeDir = fs.mkdirs(new Path(folderPath)); if (!makeDir) { throw new IOException("cannot create directory for path " + folderPath); } - boolean createFile = fs.createNewFile(new Path(folderPath + FSUtils.makeLogFileName(fileID, ".log", commitTime, + boolean createFile = fs.createNewFile(new Path(folderPath + FSUtils.makeLogFileName(fileID, ".log", instantTime, version.orElse(DEFAULT_LOG_VERSION), HoodieLogFormat.UNKNOWN_WRITE_TOKEN))); if (!createFile) { throw new IOException( - StringUtils.format("cannot create data file for commit %s and fileId %s", commitTime, fileID)); + StringUtils.format("cannot create data file for commit %s and fileId %s", instantTime, fileID)); } return fileID; } - public static void createCompactionCommitFiles(FileSystem fs, String basePath, String... commitTimes) + public static void createCompactionCommitFiles(FileSystem fs, String basePath, String... instantTimes) throws IOException { - for (String commitTime : commitTimes) { + for (String instantTime : instantTimes) { boolean createFile = fs.createNewFile(new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" - + HoodieTimeline.makeCommitFileName(commitTime))); + + HoodieTimeline.makeCommitFileName(instantTime))); if (!createFile) { - throw new IOException("cannot create commit file for commit " + commitTime); + throw new IOException("cannot create commit file for commit " + instantTime); } } } @@ -244,67 +244,67 @@ public class HoodieTestUtils { AvroUtils.serializeCompactionPlan(plan)); } - public static String getDataFilePath(String basePath, String partitionPath, String commitTime, String fileID) { - return basePath + "/" + partitionPath + "/" + FSUtils.makeDataFileName(commitTime, DEFAULT_WRITE_TOKEN, fileID); + public static String getDataFilePath(String basePath, String partitionPath, String instantTime, String fileID) { + return basePath + "/" + partitionPath + "/" + FSUtils.makeDataFileName(instantTime, DEFAULT_WRITE_TOKEN, fileID); } - public static String getLogFilePath(String basePath, String partitionPath, String commitTime, String fileID, + public static String getLogFilePath(String basePath, String partitionPath, String instantTime, String fileID, Option version) { - return basePath + "/" + partitionPath + "/" + FSUtils.makeLogFileName(fileID, ".log", commitTime, + return basePath + "/" + partitionPath + "/" + FSUtils.makeLogFileName(fileID, ".log", instantTime, version.orElse(DEFAULT_LOG_VERSION), HoodieLogFormat.UNKNOWN_WRITE_TOKEN); } - public static String getCommitFilePath(String basePath, String commitTime) { - return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + HoodieTimeline.COMMIT_EXTENSION; + public static String getCommitFilePath(String basePath, String instantTime) { + return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + instantTime + HoodieTimeline.COMMIT_EXTENSION; } - public static String getInflightCommitFilePath(String basePath, String commitTime) { - return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + public static String getInflightCommitFilePath(String basePath, String instantTime) { + return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + instantTime + HoodieTimeline.INFLIGHT_COMMIT_EXTENSION; } - public static String getRequestedCompactionFilePath(String basePath, String commitTime) { - return basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + commitTime + public static String getRequestedCompactionFilePath(String basePath, String instantTime) { + return basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + instantTime + HoodieTimeline.INFLIGHT_COMMIT_EXTENSION; } - public static boolean doesDataFileExist(String basePath, String partitionPath, String commitTime, + public static boolean doesDataFileExist(String basePath, String partitionPath, String instantTime, String fileID) { - return new File(getDataFilePath(basePath, partitionPath, commitTime, fileID)).exists(); + return new File(getDataFilePath(basePath, partitionPath, instantTime, fileID)).exists(); } - public static boolean doesLogFileExist(String basePath, String partitionPath, String commitTime, String fileID, + public static boolean doesLogFileExist(String basePath, String partitionPath, String instantTime, String fileID, Option version) { - return new File(getLogFilePath(basePath, partitionPath, commitTime, fileID, version)).exists(); + return new File(getLogFilePath(basePath, partitionPath, instantTime, fileID, version)).exists(); } - public static boolean doesCommitExist(String basePath, String commitTime) { + public static boolean doesCommitExist(String basePath, String instantTime) { return new File( - basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + HoodieTimeline.COMMIT_EXTENSION) + basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + instantTime + HoodieTimeline.COMMIT_EXTENSION) .exists(); } - public static boolean doesInflightExist(String basePath, String commitTime) { + public static boolean doesInflightExist(String basePath, String instantTime) { return new File( - basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + HoodieTimeline.INFLIGHT_EXTENSION) + basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + instantTime + HoodieTimeline.INFLIGHT_EXTENSION) .exists(); } public static void createCleanFiles(HoodieTableMetaClient metaClient, String basePath, - String commitTime, Configuration configuration) + String instantTime, Configuration configuration) throws IOException { - createPendingCleanFiles(metaClient, commitTime); + createPendingCleanFiles(metaClient, instantTime); Path commitFile = new Path( - basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCleanerFileName(commitTime)); + basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCleanerFileName(instantTime)); FileSystem fs = FSUtils.getFs(basePath, configuration); try (FSDataOutputStream os = fs.create(commitFile, true)) { HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS, DEFAULT_PARTITION_PATHS[rand.nextInt(DEFAULT_PARTITION_PATHS.length)], new ArrayList<>(), new ArrayList<>(), - new ArrayList<>(), commitTime); + new ArrayList<>(), instantTime); // Create the clean metadata HoodieCleanMetadata cleanMetadata = - CleanerUtils.convertCleanMetadata(metaClient, commitTime, Option.of(0L), Collections.singletonList(cleanStats)); + CleanerUtils.convertCleanMetadata(metaClient, instantTime, Option.of(0L), Collections.singletonList(cleanStats)); // Write empty clean metadata os.write(AvroUtils.serializeCleanMetadata(cleanMetadata).get()); } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieWriteStat.java b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieWriteStat.java index e7e234b88..f163e21b6 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieWriteStat.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieWriteStat.java @@ -37,7 +37,7 @@ public class TestHoodieWriteStat { @Test public void testSetPaths() { - String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); + String instantTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); String basePathString = "/data/tables/some-hoodie-table"; String partitionPathString = "2017/12/31"; String fileName = UUID.randomUUID().toString(); @@ -46,7 +46,7 @@ public class TestHoodieWriteStat { Path basePath = new Path(basePathString); Path partitionPath = new Path(basePath, partitionPathString); - Path finalizeFilePath = new Path(partitionPath, FSUtils.makeDataFileName(commitTime, writeToken, fileName)); + Path finalizeFilePath = new Path(partitionPath, FSUtils.makeDataFileName(instantTime, writeToken, fileName)); HoodieWriteStat writeStat = new HoodieWriteStat(); writeStat.setPath(basePath, finalizeFilePath); assertEquals(finalizeFilePath, new Path(basePath, writeStat.getPath())); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/string/TestHoodieActiveTimeline.java b/hudi-common/src/test/java/org/apache/hudi/common/table/string/TestHoodieActiveTimeline.java index d77392fc0..8a41f8cf9 100755 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/string/TestHoodieActiveTimeline.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/table/string/TestHoodieActiveTimeline.java @@ -407,7 +407,7 @@ public class TestHoodieActiveTimeline extends HoodieCommonTestHarness { private List getAllInstants() { timeline = new HoodieActiveTimeline(metaClient); List allInstants = new ArrayList<>(); - long commitTime = 1; + long instantTime = 1; for (State state : State.values()) { if (state == State.INVALID) { continue; @@ -432,7 +432,7 @@ public class TestHoodieActiveTimeline extends HoodieCommonTestHarness { action = HoodieTimeline.COMMIT_ACTION; } - allInstants.add(new HoodieInstant(state, action, String.format("%03d", commitTime++))); + allInstants.add(new HoodieInstant(state, action, String.format("%03d", instantTime++))); } } return allInstants; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java b/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java index 18d6b73e2..28a65b929 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java @@ -100,12 +100,12 @@ public class SchemaTestUtil { public static List generateHoodieTestRecords(int from, int limit) throws IOException, URISyntaxException { List records = generateTestRecords(from, limit); - String commitTime = HoodieActiveTimeline.createNewInstantTime(); + String instantTime = HoodieActiveTimeline.createNewInstantTime(); Schema hoodieFieldsSchema = HoodieAvroUtils.addMetadataFields(getSimpleSchema()); return records.stream().map(s -> HoodieAvroUtils.rewriteRecord((GenericRecord) s, hoodieFieldsSchema)).map(p -> { p.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, UUID.randomUUID().toString()); p.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, "0000/00/00"); - p.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime); + p.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, instantTime); return p; }).collect(Collectors.toList()); @@ -124,12 +124,12 @@ public class SchemaTestUtil { } public static List updateHoodieTestRecords(List oldRecordKeys, List newRecords, - String commitTime) { + String instantTime) { return newRecords.stream().map(p -> { ((GenericRecord) p).put(HoodieRecord.RECORD_KEY_METADATA_FIELD, oldRecordKeys.remove(0)); ((GenericRecord) p).put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, "0000/00/00"); - ((GenericRecord) p).put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime); + ((GenericRecord) p).put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, instantTime); return p; }).collect(Collectors.toList()); @@ -173,9 +173,9 @@ public class SchemaTestUtil { return new Schema.Parser().parse(SchemaTestUtil.class.getResourceAsStream("/timestamp-test-evolved.avsc")); } - public static GenericRecord generateAvroRecordFromJson(Schema schema, int recordNumber, String commitTime, + public static GenericRecord generateAvroRecordFromJson(Schema schema, int recordNumber, String instantTime, String fileId) throws IOException { - TestRecord record = new TestRecord(commitTime, recordNumber, fileId); + TestRecord record = new TestRecord(instantTime, recordNumber, fileId); MercifulJsonConverter converter = new MercifulJsonConverter(); return converter.convert(record.toJsonString(), schema); } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestFSUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestFSUtils.java index 1a194d161..a77c61b5c 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestFSUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestFSUtils.java @@ -68,16 +68,16 @@ public class TestFSUtils extends HoodieCommonTestHarness { @Test public void testMakeDataFileName() { - String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); + String instantTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); String fileName = UUID.randomUUID().toString(); - assertEquals(FSUtils.makeDataFileName(commitTime, TEST_WRITE_TOKEN, fileName), fileName + "_" + TEST_WRITE_TOKEN + "_" + commitTime + ".parquet"); + assertEquals(FSUtils.makeDataFileName(instantTime, TEST_WRITE_TOKEN, fileName), fileName + "_" + TEST_WRITE_TOKEN + "_" + instantTime + ".parquet"); } @Test public void testMaskFileName() { - String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); + String instantTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); int taskPartitionId = 2; - assertEquals(FSUtils.maskWithoutFileId(commitTime, taskPartitionId), "*_" + taskPartitionId + "_" + commitTime + ".parquet"); + assertEquals(FSUtils.maskWithoutFileId(instantTime, taskPartitionId), "*_" + taskPartitionId + "_" + instantTime + ".parquet"); } @Test @@ -140,17 +140,17 @@ public class TestFSUtils extends HoodieCommonTestHarness { @Test public void testGetCommitTime() { - String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); + String instantTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); String fileName = UUID.randomUUID().toString(); - String fullFileName = FSUtils.makeDataFileName(commitTime, TEST_WRITE_TOKEN, fileName); - assertEquals(FSUtils.getCommitTime(fullFileName), commitTime); + String fullFileName = FSUtils.makeDataFileName(instantTime, TEST_WRITE_TOKEN, fileName); + assertEquals(FSUtils.getCommitTime(fullFileName), instantTime); } @Test public void testGetFileNameWithoutMeta() { - String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); + String instantTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); String fileName = UUID.randomUUID().toString(); - String fullFileName = FSUtils.makeDataFileName(commitTime, TEST_WRITE_TOKEN, fileName); + String fullFileName = FSUtils.makeDataFileName(instantTime, TEST_WRITE_TOKEN, fileName); assertEquals(FSUtils.getFileId(fullFileName), fileName); } @@ -271,15 +271,15 @@ public class TestFSUtils extends HoodieCommonTestHarness { @Test public void testDeleteOlderRollbackFiles() throws Exception { - String[] commitTimes = new String[]{"20160501010101", "20160501020101", "20160501030101", "20160501040101", + String[] instantTimes = new String[]{"20160501010101", "20160501020101", "20160501030101", "20160501040101", "20160502020601", "20160502030601", "20160502040601", "20160502050601", "20160506030611", "20160506040611", "20160506050611", "20160506060611"}; List hoodieInstants = new ArrayList<>(); // create rollback files - for (String commitTime : commitTimes) { + for (String instantTime : instantTimes) { new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" - + commitTime + HoodieTimeline.ROLLBACK_EXTENSION).createNewFile(); - hoodieInstants.add(new HoodieInstant(false, HoodieTimeline.ROLLBACK_ACTION, commitTime)); + + instantTime + HoodieTimeline.ROLLBACK_EXTENSION).createNewFile(); + hoodieInstants.add(new HoodieInstant(false, HoodieTimeline.ROLLBACK_ACTION, instantTime)); } FSUtils.deleteOlderRollbackMetaFiles(FSUtils.getFs(basePath, new Configuration()), @@ -295,15 +295,15 @@ public class TestFSUtils extends HoodieCommonTestHarness { @Test public void testDeleteOlderCleanMetaFiles() throws Exception { - String[] commitTimes = new String[]{"20160501010101", "20160501020101", "20160501030101", "20160501040101", + String[] instantTimes = new String[]{"20160501010101", "20160501020101", "20160501030101", "20160501040101", "20160502020601", "20160502030601", "20160502040601", "20160502050601", "20160506030611", "20160506040611", "20160506050611", "20160506060611"}; List hoodieInstants = new ArrayList<>(); // create rollback files - for (String commitTime : commitTimes) { + for (String instantTime : instantTimes) { new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" - + commitTime + HoodieTimeline.CLEAN_EXTENSION).createNewFile(); - hoodieInstants.add(new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, commitTime)); + + instantTime + HoodieTimeline.CLEAN_EXTENSION).createNewFile(); + hoodieInstants.add(new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, instantTime)); } FSUtils.deleteOlderCleanMetaFiles(FSUtils.getFs(basePath, new Configuration()), basePath + "/.hoodie", hoodieInstants.stream()); @@ -318,7 +318,7 @@ public class TestFSUtils extends HoodieCommonTestHarness { @Test public void testFileNameRelatedFunctions() throws Exception { - String commitTime = "20160501010101"; + String instantTime = "20160501010101"; String partitionStr = "2016/05/01"; int taskPartitionId = 456; String writeToken = "456"; @@ -328,13 +328,13 @@ public class TestFSUtils extends HoodieCommonTestHarness { final String LOG_EXTENTION = "." + LOG_STR; // data file name - String dataFileName = FSUtils.makeDataFileName(commitTime, writeToken, fileId); - assertTrue(commitTime.equals(FSUtils.getCommitTime(dataFileName))); + String dataFileName = FSUtils.makeDataFileName(instantTime, writeToken, fileId); + assertTrue(instantTime.equals(FSUtils.getCommitTime(dataFileName))); assertTrue(fileId.equals(FSUtils.getFileId(dataFileName))); - String logFileName = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, commitTime, version, writeToken); + String logFileName = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, instantTime, version, writeToken); assertTrue(FSUtils.isLogFile(new Path(logFileName))); - assertTrue(commitTime.equals(FSUtils.getBaseCommitTimeFromLogPath(new Path(logFileName)))); + assertTrue(instantTime.equals(FSUtils.getBaseCommitTimeFromLogPath(new Path(logFileName)))); assertTrue(fileId.equals(FSUtils.getFileIdFromLogPath(new Path(logFileName)))); assertTrue(version == FSUtils.getFileVersionFromLog(new Path(logFileName))); assertTrue(LOG_STR.equals(FSUtils.getFileExtensionFromLog(new Path(logFileName)))); @@ -342,16 +342,16 @@ public class TestFSUtils extends HoodieCommonTestHarness { // create three versions of log file String partitionPath = basePath + "/" + partitionStr; new File(partitionPath).mkdirs(); - String log1 = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, commitTime, 1, writeToken); + String log1 = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, instantTime, 1, writeToken); new File(partitionPath + "/" + log1).createNewFile(); - String log2 = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, commitTime, 2, writeToken); + String log2 = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, instantTime, 2, writeToken); new File(partitionPath + "/" + log2).createNewFile(); - String log3 = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, commitTime, 3, writeToken); + String log3 = FSUtils.makeLogFileName(fileId, LOG_EXTENTION, instantTime, 3, writeToken); new File(partitionPath + "/" + log3).createNewFile(); assertTrue(3 == FSUtils.getLatestLogVersion(FSUtils.getFs(basePath, new Configuration()), - new Path(partitionPath), fileId, LOG_EXTENTION, commitTime).get().getLeft()); + new Path(partitionPath), fileId, LOG_EXTENTION, instantTime).get().getLeft()); assertTrue(4 == FSUtils.computeNextLogVersion(FSUtils.getFs(basePath, new Configuration()), - new Path(partitionPath), fileId, LOG_EXTENTION, commitTime)); + new Path(partitionPath), fileId, LOG_EXTENTION, instantTime)); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestRecord.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestRecord.java index dd7dea47d..2afc25955 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestRecord.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestRecord.java @@ -74,15 +74,15 @@ public class TestRecord implements Serializable { private TestNestedRecord testNestedRecord; private String[] stringArray; - public TestRecord(String commitTime, int recordNumber, String fileId) { - this._hoodie_commit_time = commitTime; + public TestRecord(String instantTime, int recordNumber, String fileId) { + this._hoodie_commit_time = instantTime; this._hoodie_record_key = "key" + recordNumber; - this._hoodie_partition_path = commitTime; + this._hoodie_partition_path = instantTime; this._hoodie_file_name = fileId; - this._hoodie_commit_seqno = commitTime + recordNumber; + this._hoodie_commit_seqno = instantTime + recordNumber; - String commitTimeSuffix = "@" + commitTime; - int commitHashCode = commitTime.hashCode(); + String commitTimeSuffix = "@" + instantTime; + int commitHashCode = instantTime.hashCode(); this.field1 = "field" + recordNumber; this.field2 = "field" + recordNumber + commitTimeSuffix; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestExternalSpillableMap.java b/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestExternalSpillableMap.java index 6e3de9e55..c4b0d5c5f 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestExternalSpillableMap.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestExternalSpillableMap.java @@ -215,7 +215,7 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness { String newCommitTime = HoodieActiveTimeline.createNewInstantTime(); List keysToBeUpdated = new ArrayList<>(); keysToBeUpdated.add(key); - // Update the commitTime for this record + // Update the instantTime for this record List updatedRecords = SchemaTestUtil.updateHoodieTestRecords(keysToBeUpdated, recordsToUpdate, newCommitTime); // Upsert this updated record @@ -238,7 +238,7 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness { // Upsert this updated record SpillableMapTestUtils.upsertRecords(updatedRecords, records); gRecord = (GenericRecord) records.get(key).getData().getInsertValue(schema).get(); - // The record returned for this key should have the updated commitTime + // The record returned for this key should have the updated instantTime assert newCommitTime.contentEquals(gRecord.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString()); } diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java index 559a573d5..426629983 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java @@ -170,10 +170,10 @@ public class InputFormatTestUtil { } private static Iterable generateAvroRecords(Schema schema, int numberOfRecords, - String commitTime, String fileId) throws IOException { + String instantTime, String fileId) throws IOException { List records = new ArrayList<>(numberOfRecords); for (int i = 0; i < numberOfRecords; i++) { - records.add(SchemaTestUtil.generateAvroRecordFromJson(schema, i, commitTime, fileId)); + records.add(SchemaTestUtil.generateAvroRecordFromJson(schema, i, instantTime, fileId)); } return records; } diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java index 6e4aba6cd..a206f3615 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java @@ -265,19 +265,19 @@ public class TestHoodieRealtimeRecordReader { // initial commit Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema()); HoodieTestUtils.init(hadoopConf, basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ); - String commitTime = "100"; + String instantTime = "100"; final int numRecords = 1000; final int firstBatchLastRecordKey = numRecords - 1; final int secondBatchLastRecordKey = 2 * numRecords - 1; - File partitionDir = InputFormatTestUtil.prepareParquetTable(basePath, schema, 1, numRecords, commitTime); - InputFormatTestUtil.commit(basePath, commitTime); + File partitionDir = InputFormatTestUtil.prepareParquetTable(basePath, schema, 1, numRecords, instantTime); + InputFormatTestUtil.commit(basePath, instantTime); // Add the paths FileInputFormat.setInputPaths(jobConf, partitionDir.getPath()); // insert new records to log file String newCommitTime = "101"; HoodieLogFormat.Writer writer = - writeDataBlockToLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, numRecords, numRecords, 0); + writeDataBlockToLogFile(partitionDir, schema, "fileid0", instantTime, newCommitTime, numRecords, numRecords, 0); long size = writer.getCurrentSize(); writer.close(); assertTrue("block - size should be > 0", size > 0); @@ -285,7 +285,7 @@ public class TestHoodieRealtimeRecordReader { // create a split with baseFile (parquet file written earlier) and new log file(s) String logFilePath = writer.getLogFile().getPath().toString(); HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit( - new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf), + new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + instantTime + ".parquet"), 0, 1, jobConf), basePath.getRoot().getPath(), Collections.singletonList(logFilePath), newCommitTime); // create a RecordReader to be used by HoodieRealtimeRecordReader @@ -340,18 +340,18 @@ public class TestHoodieRealtimeRecordReader { // initial commit Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getComplexEvolvedSchema()); HoodieTestUtils.init(hadoopConf, basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ); - String commitTime = "100"; + String instantTime = "100"; int numberOfRecords = 100; int numberOfLogRecords = numberOfRecords / 2; - File partitionDir = InputFormatTestUtil.prepareParquetTable(basePath, schema, 1, numberOfRecords, commitTime); - InputFormatTestUtil.commit(basePath, commitTime); + File partitionDir = InputFormatTestUtil.prepareParquetTable(basePath, schema, 1, numberOfRecords, instantTime); + InputFormatTestUtil.commit(basePath, instantTime); // Add the paths FileInputFormat.setInputPaths(jobConf, partitionDir.getPath()); // update files or generate new log file String newCommitTime = "101"; HoodieLogFormat.Writer writer = - writeLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, numberOfLogRecords); + writeLogFile(partitionDir, schema, "fileid0", instantTime, newCommitTime, numberOfLogRecords); long size = writer.getCurrentSize(); writer.close(); assertTrue("block - size should be > 0", size > 0); @@ -360,7 +360,7 @@ public class TestHoodieRealtimeRecordReader { // create a split with baseFile (parquet file written earlier) and new log file(s) String logFilePath = writer.getLogFile().getPath().toString(); HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit( - new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf), + new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + instantTime + ".parquet"), 0, 1, jobConf), basePath.getRoot().getPath(), Collections.singletonList(logFilePath), newCommitTime); // create a RecordReader to be used by HoodieRealtimeRecordReader @@ -385,7 +385,7 @@ public class TestHoodieRealtimeRecordReader { String recordCommitTime; // check if the record written is with latest commit, here "101" if (numRecordsRead > numberOfLogRecords) { - recordCommitTime = commitTime; + recordCommitTime = instantTime; } else { recordCommitTime = newCommitTime; } @@ -466,12 +466,12 @@ public class TestHoodieRealtimeRecordReader { List logFilePaths = new ArrayList<>(); Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema()); HoodieTestUtils.init(hadoopConf, basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ); - String commitTime = "100"; + String instantTime = "100"; int numberOfRecords = 100; int numberOfLogRecords = numberOfRecords / 2; File partitionDir = - InputFormatTestUtil.prepareSimpleParquetTable(basePath, schema, 1, numberOfRecords, commitTime); - InputFormatTestUtil.commit(basePath, commitTime); + InputFormatTestUtil.prepareSimpleParquetTable(basePath, schema, 1, numberOfRecords, instantTime); + InputFormatTestUtil.commit(basePath, instantTime); // Add the paths FileInputFormat.setInputPaths(jobConf, partitionDir.getPath()); List firstSchemaFields = schema.getFields(); @@ -480,7 +480,7 @@ public class TestHoodieRealtimeRecordReader { schema = SchemaTestUtil.getComplexEvolvedSchema(); String newCommitTime = "101"; HoodieLogFormat.Writer writer = - writeDataBlockToLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, numberOfLogRecords, 0, 1); + writeDataBlockToLogFile(partitionDir, schema, "fileid0", instantTime, newCommitTime, numberOfLogRecords, 0, 1); long size = writer.getCurrentSize(); logFilePaths.add(writer.getLogFile().getPath().toString()); writer.close(); @@ -488,14 +488,14 @@ public class TestHoodieRealtimeRecordReader { // write rollback for the previous block in new log file version newCommitTime = "102"; - writer = writeRollbackBlockToLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, "101", 1); + writer = writeRollbackBlockToLogFile(partitionDir, schema, "fileid0", instantTime, newCommitTime, "101", 1); logFilePaths.add(writer.getLogFile().getPath().toString()); writer.close(); InputFormatTestUtil.deltaCommit(basePath, newCommitTime); // create a split with baseFile (parquet file written earlier) and new log file(s) HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit( - new FileSplit(new Path(partitionDir + "/fileid0_1_" + commitTime + ".parquet"), 0, 1, jobConf), + new FileSplit(new Path(partitionDir + "/fileid0_1_" + instantTime + ".parquet"), 0, 1, jobConf), basePath.getRoot().getPath(), logFilePaths, newCommitTime); // create a RecordReader to be used by HoodieRealtimeRecordReader diff --git a/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java b/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java index 7265f33a8..f8042192e 100644 --- a/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java +++ b/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java @@ -150,8 +150,8 @@ public class TestHiveSyncTool { @Test public void testBasicSync() throws Exception { TestUtil.hiveSyncConfig.useJdbc = this.useJdbc; - String commitTime = "100"; - TestUtil.createCOWTable(commitTime, 5); + String instantTime = "100"; + TestUtil.createCOWTable(instantTime, 5); HoodieHiveClient hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem); assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + " should not exist initially", @@ -166,7 +166,7 @@ public class TestHiveSyncTool { hiveClient.getDataSchema().getColumns().size() + 1); assertEquals("Table partitions should match the number of partitions we wrote", 5, hiveClient.scanTablePartitions(TestUtil.hiveSyncConfig.tableName).size()); - assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", commitTime, + assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", instantTime, hiveClient.getLastCommitTimeSynced(TestUtil.hiveSyncConfig.tableName).get()); } @@ -248,9 +248,9 @@ public class TestHiveSyncTool { @Test public void testSyncMergeOnRead() throws Exception { TestUtil.hiveSyncConfig.useJdbc = this.useJdbc; - String commitTime = "100"; + String instantTime = "100"; String deltaCommitTime = "101"; - TestUtil.createMORTable(commitTime, deltaCommitTime, 5); + TestUtil.createMORTable(instantTime, deltaCommitTime, 5); String roTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE; HoodieHiveClient hiveClient = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem); @@ -291,10 +291,10 @@ public class TestHiveSyncTool { @Test public void testSyncMergeOnReadRT() throws Exception { TestUtil.hiveSyncConfig.useJdbc = this.useJdbc; - String commitTime = "100"; + String instantTime = "100"; String deltaCommitTime = "101"; String snapshotTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE; - TestUtil.createMORTable(commitTime, deltaCommitTime, 5); + TestUtil.createMORTable(instantTime, deltaCommitTime, 5); HoodieHiveClient hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem); @@ -338,8 +338,8 @@ public class TestHiveSyncTool { @Test public void testMultiPartitionKeySync() throws Exception { TestUtil.hiveSyncConfig.useJdbc = this.useJdbc; - String commitTime = "100"; - TestUtil.createCOWTable(commitTime, 5); + String instantTime = "100"; + TestUtil.createCOWTable(instantTime, 5); HiveSyncConfig hiveSyncConfig = HiveSyncConfig.copy(TestUtil.hiveSyncConfig); hiveSyncConfig.partitionValueExtractorClass = MultiPartKeysValueExtractor.class.getCanonicalName(); @@ -360,7 +360,7 @@ public class TestHiveSyncTool { hiveClient.getDataSchema().getColumns().size() + 3); assertEquals("Table partitions should match the number of partitions we wrote", 5, hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size()); - assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", commitTime, + assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES", instantTime, hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get()); } } diff --git a/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestUtil.java b/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestUtil.java index 571e949df..c02743ba1 100644 --- a/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestUtil.java +++ b/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestUtil.java @@ -153,7 +153,7 @@ public class TestUtil { } } - static void createCOWTable(String commitTime, int numberOfPartitions) + static void createCOWTable(String instantTime, int numberOfPartitions) throws IOException, InitializationError, URISyntaxException { Path path = new Path(hiveSyncConfig.basePath); FileIOUtils.deleteDirectory(new File(hiveSyncConfig.basePath)); @@ -162,12 +162,12 @@ public class TestUtil { boolean result = fileSystem.mkdirs(path); checkResult(result); DateTime dateTime = DateTime.now(); - HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, commitTime); + HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, instantTime); createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName); - createCommitFile(commitMetadata, commitTime); + createCommitFile(commitMetadata, instantTime); } - static void createMORTable(String commitTime, String deltaCommitTime, int numberOfPartitions) + static void createMORTable(String instantTime, String deltaCommitTime, int numberOfPartitions) throws IOException, InitializationError, URISyntaxException, InterruptedException { Path path = new Path(hiveSyncConfig.basePath); FileIOUtils.deleteDirectory(new File(hiveSyncConfig.basePath)); @@ -177,38 +177,38 @@ public class TestUtil { boolean result = fileSystem.mkdirs(path); checkResult(result); DateTime dateTime = DateTime.now(); - HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, commitTime); + HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, instantTime); createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName); createdTablesSet .add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE); HoodieCommitMetadata compactionMetadata = new HoodieCommitMetadata(); commitMetadata.getPartitionToWriteStats() .forEach((key, value) -> value.forEach(l -> compactionMetadata.addWriteStat(key, l))); - createCompactionCommitFile(compactionMetadata, commitTime); + createCompactionCommitFile(compactionMetadata, instantTime); // Write a delta commit HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), true); createDeltaCommitFile(deltaMetadata, deltaCommitTime); } static void addCOWPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, DateTime startFrom, - String commitTime) throws IOException, URISyntaxException { + String instantTime) throws IOException, URISyntaxException { HoodieCommitMetadata commitMetadata = - createPartitions(numberOfPartitions, isParquetSchemaSimple, startFrom, commitTime); + createPartitions(numberOfPartitions, isParquetSchemaSimple, startFrom, instantTime); createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName); - createCommitFile(commitMetadata, commitTime); + createCommitFile(commitMetadata, instantTime); } static void addMORPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, boolean isLogSchemaSimple, - DateTime startFrom, String commitTime, String deltaCommitTime) + DateTime startFrom, String instantTime, String deltaCommitTime) throws IOException, URISyntaxException, InterruptedException { HoodieCommitMetadata commitMetadata = - createPartitions(numberOfPartitions, isParquetSchemaSimple, startFrom, commitTime); + createPartitions(numberOfPartitions, isParquetSchemaSimple, startFrom, instantTime); createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_READ_OPTIMIZED_TABLE); createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE); HoodieCommitMetadata compactionMetadata = new HoodieCommitMetadata(); commitMetadata.getPartitionToWriteStats() .forEach((key, value) -> value.forEach(l -> compactionMetadata.addWriteStat(key, l))); - createCompactionCommitFile(compactionMetadata, commitTime); + createCompactionCommitFile(compactionMetadata, instantTime); HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), isLogSchemaSimple); createDeltaCommitFile(deltaMetadata, deltaCommitTime); } @@ -232,7 +232,7 @@ public class TestUtil { } private static HoodieCommitMetadata createPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, - DateTime startFrom, String commitTime) throws IOException, URISyntaxException { + DateTime startFrom, String instantTime) throws IOException, URISyntaxException { startFrom = startFrom.withTimeAtStartOfDay(); HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata(); @@ -241,20 +241,20 @@ public class TestUtil { Path partPath = new Path(hiveSyncConfig.basePath + "/" + partitionPath); fileSystem.makeQualified(partPath); fileSystem.mkdirs(partPath); - List writeStats = createTestData(partPath, isParquetSchemaSimple, commitTime); + List writeStats = createTestData(partPath, isParquetSchemaSimple, instantTime); startFrom = startFrom.minusDays(1); writeStats.forEach(s -> commitMetadata.addWriteStat(partitionPath, s)); } return commitMetadata; } - private static List createTestData(Path partPath, boolean isParquetSchemaSimple, String commitTime) + private static List createTestData(Path partPath, boolean isParquetSchemaSimple, String instantTime) throws IOException, URISyntaxException { List writeStats = new ArrayList<>(); for (int i = 0; i < 5; i++) { // Create 5 files String fileId = UUID.randomUUID().toString(); - Path filePath = new Path(partPath.toString() + "/" + FSUtils.makeDataFileName(commitTime, "1-0-1", fileId)); + Path filePath = new Path(partPath.toString() + "/" + FSUtils.makeDataFileName(instantTime, "1-0-1", fileId)); generateParquetData(filePath, isParquetSchemaSimple); HoodieWriteStat writeStat = new HoodieWriteStat(); writeStat.setFileId(fileId); @@ -313,20 +313,20 @@ public class TestUtil { } } - private static void createCommitFile(HoodieCommitMetadata commitMetadata, String commitTime) throws IOException { + private static void createCommitFile(HoodieCommitMetadata commitMetadata, String instantTime) throws IOException { byte[] bytes = commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8); Path fullPath = new Path(hiveSyncConfig.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" - + HoodieTimeline.makeCommitFileName(commitTime)); + + HoodieTimeline.makeCommitFileName(instantTime)); FSDataOutputStream fsout = fileSystem.create(fullPath, true); fsout.write(bytes); fsout.close(); } - private static void createCompactionCommitFile(HoodieCommitMetadata commitMetadata, String commitTime) + private static void createCompactionCommitFile(HoodieCommitMetadata commitMetadata, String instantTime) throws IOException { byte[] bytes = commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8); Path fullPath = new Path(hiveSyncConfig.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" - + HoodieTimeline.makeCommitFileName(commitTime)); + + HoodieTimeline.makeCommitFileName(instantTime)); FSDataOutputStream fsout = fileSystem.create(fullPath, true); fsout.write(bytes); fsout.close(); diff --git a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java index 99a795d88..b47041629 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java +++ b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java @@ -196,20 +196,20 @@ public class DataSourceUtils { } public static JavaRDD doWriteOperation(HoodieWriteClient client, JavaRDD hoodieRecords, - String commitTime, String operation) { + String instantTime, String operation) { if (operation.equals(DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL())) { - return client.bulkInsert(hoodieRecords, commitTime); + return client.bulkInsert(hoodieRecords, instantTime); } else if (operation.equals(DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL())) { - return client.insert(hoodieRecords, commitTime); + return client.insert(hoodieRecords, instantTime); } else { // default is upsert - return client.upsert(hoodieRecords, commitTime); + return client.upsert(hoodieRecords, instantTime); } } public static JavaRDD doDeleteOperation(HoodieWriteClient client, JavaRDD hoodieKeys, - String commitTime) { - return client.delete(hoodieKeys, commitTime); + String instantTime) { + return client.delete(hoodieKeys, instantTime); } public static HoodieRecord createHoodieRecord(GenericRecord gr, Comparable orderingVal, HoodieKey hKey, diff --git a/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala index 326595f5e..b3cf74272 100644 --- a/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala +++ b/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala @@ -78,7 +78,7 @@ private[hudi] object HoodieSparkSqlWriter { val jsc = new JavaSparkContext(sparkContext) val basePath = new Path(parameters("path")) - val commitTime = HoodieActiveTimeline.createNewInstantTime() + val instantTime = HoodieActiveTimeline.createNewInstantTime() val fs = basePath.getFileSystem(sparkContext.hadoopConfiguration) var exists = fs.exists(new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME)) @@ -140,8 +140,8 @@ private[hudi] object HoodieSparkSqlWriter { log.info("new batch has no new records, skipping...") (true, common.util.Option.empty()) } - client.startCommitWithTime(commitTime) - val writeStatuses = DataSourceUtils.doWriteOperation(client, hoodieRecords, commitTime, operation) + client.startCommitWithTime(instantTime) + val writeStatuses = DataSourceUtils.doWriteOperation(client, hoodieRecords, instantTime, operation) (writeStatuses, client) } else { @@ -172,14 +172,14 @@ private[hudi] object HoodieSparkSqlWriter { ) // Issue deletes - client.startCommitWithTime(commitTime) - val writeStatuses = DataSourceUtils.doDeleteOperation(client, hoodieKeysToDelete, commitTime) + client.startCommitWithTime(instantTime) + val writeStatuses = DataSourceUtils.doDeleteOperation(client, hoodieKeysToDelete, instantTime) (writeStatuses, client) } // Check for errors and commit the write. - val writeSuccessful = checkWriteStatus(writeStatuses, parameters, writeClient, commitTime, basePath, operation, jsc) - (writeSuccessful, common.util.Option.ofNullable(commitTime)) + val writeSuccessful = checkWriteStatus(writeStatuses, parameters, writeClient, instantTime, basePath, operation, jsc) + (writeSuccessful, common.util.Option.ofNullable(instantTime)) } /** @@ -246,7 +246,7 @@ private[hudi] object HoodieSparkSqlWriter { private def checkWriteStatus(writeStatuses: JavaRDD[WriteStatus], parameters: Map[String, String], client: HoodieWriteClient[_], - commitTime: String, + instantTime: String, basePath: Path, operation: String, jsc: JavaSparkContext): Boolean = { @@ -256,17 +256,17 @@ private[hudi] object HoodieSparkSqlWriter { val metaMap = parameters.filter(kv => kv._1.startsWith(parameters(COMMIT_METADATA_KEYPREFIX_OPT_KEY))) val commitSuccess = if (metaMap.isEmpty) { - client.commit(commitTime, writeStatuses) + client.commit(instantTime, writeStatuses) } else { - client.commit(commitTime, writeStatuses, + client.commit(instantTime, writeStatuses, common.util.Option.of(new util.HashMap[String, String](mapAsJavaMap(metaMap)))) } if (commitSuccess) { - log.info("Commit " + commitTime + " successful!") + log.info("Commit " + instantTime + " successful!") } else { - log.info("Commit " + commitTime + " failed!") + log.info("Commit " + instantTime + " failed!") } val hiveSyncEnabled = parameters.get(HIVE_SYNC_ENABLED_OPT_KEY).exists(r => r.toBoolean) diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java index 7d944d355..8c81ddf35 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java @@ -132,8 +132,8 @@ public class HoodieSnapshotCopier implements Serializable { if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) { return true; } else { - String commitTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName()); - return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp, + String instantTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName()); + return HoodieTimeline.compareTimestamps(instantTime, latestCommitTimestamp, HoodieTimeline.LESSER_OR_EQUAL); } }); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotExporter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotExporter.java index c39daa7d5..dfe3d682a 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotExporter.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotExporter.java @@ -215,8 +215,8 @@ public class HoodieSnapshotExporter { if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) { return true; } else { - String commitTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName()); - return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp, + String instantTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName()); + return HoodieTimeline.compareTimestamps(instantTime, latestCommitTimestamp, HoodieTimeline.LESSER_OR_EQUAL); } }); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java index 633f1def2..31eef7ed8 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java @@ -361,16 +361,16 @@ public class DeltaSync implements Serializable { boolean isEmpty = records.isEmpty(); - String commitTime = startCommit(); - LOG.info("Starting commit : " + commitTime); + String instantTime = startCommit(); + LOG.info("Starting commit : " + instantTime); JavaRDD writeStatusRDD; if (cfg.operation == Operation.INSERT) { - writeStatusRDD = writeClient.insert(records, commitTime); + writeStatusRDD = writeClient.insert(records, instantTime); } else if (cfg.operation == Operation.UPSERT) { - writeStatusRDD = writeClient.upsert(records, commitTime); + writeStatusRDD = writeClient.upsert(records, instantTime); } else if (cfg.operation == Operation.BULK_INSERT) { - writeStatusRDD = writeClient.bulkInsert(records, commitTime); + writeStatusRDD = writeClient.bulkInsert(records, instantTime); } else { throw new HoodieDeltaStreamerException("Unknown operation :" + cfg.operation); } @@ -391,9 +391,9 @@ public class DeltaSync implements Serializable { + totalErrorRecords + "/" + totalRecords); } - boolean success = writeClient.commit(commitTime, writeStatusRDD, Option.of(checkpointCommitMetadata)); + boolean success = writeClient.commit(instantTime, writeStatusRDD, Option.of(checkpointCommitMetadata)); if (success) { - LOG.info("Commit " + commitTime + " successful!"); + LOG.info("Commit " + instantTime + " successful!"); // Schedule compaction if needed if (cfg.isAsyncCompactionEnabled()) { @@ -407,8 +407,8 @@ public class DeltaSync implements Serializable { hiveSyncTimeMs = hiveSyncContext != null ? hiveSyncContext.stop() : 0; } } else { - LOG.info("Commit " + commitTime + " failed!"); - throw new HoodieException("Commit " + commitTime + " failed!"); + LOG.info("Commit " + instantTime + " failed!"); + throw new HoodieException("Commit " + instantTime + " failed!"); } } else { LOG.error("Delta Sync found errors when writing. Errors/Total=" + totalErrorRecords + "/" + totalRecords); @@ -420,8 +420,8 @@ public class DeltaSync implements Serializable { } }); // Rolling back instant - writeClient.rollback(commitTime); - throw new HoodieException("Commit " + commitTime + " failed and rolled-back !"); + writeClient.rollback(instantTime); + throw new HoodieException("Commit " + instantTime + " failed and rolled-back !"); } long overallTimeMs = overallTimerContext != null ? overallTimerContext.stop() : 0; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java index 666c2606b..a440eb06f 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java @@ -102,10 +102,10 @@ public class HiveIncrPullSource extends AvroSource { return Option.of(commitTimes.get(0)); } - for (String commitTime : commitTimes) { + for (String instantTime : commitTimes) { // TODO(vc): Add an option to delete consumed commits - if (commitTime.compareTo(latestTargetCommit.get()) > 0) { - return Option.of(commitTime); + if (instantTime.compareTo(latestTargetCommit.get()) > 0) { + return Option.of(instantTime); } } return Option.empty(); diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java index 175edde65..86ac361af 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java @@ -80,7 +80,7 @@ public abstract class AbstractBaseTestSource extends AvroSource { super(props, sparkContext, sparkSession, schemaProvider); } - protected static Stream fetchNextBatch(TypedProperties props, int sourceLimit, String commitTime, + protected static Stream fetchNextBatch(TypedProperties props, int sourceLimit, String instantTime, int partition) { int maxUniqueKeys = props.getInteger(TestSourceConfig.MAX_UNIQUE_RECORDS_PROP, TestSourceConfig.DEFAULT_MAX_UNIQUE_RECORDS); @@ -116,14 +116,14 @@ public abstract class AbstractBaseTestSource extends AvroSource { LOG.info("After adjustments => NumInserts=" + numInserts + ", NumUpdates=" + (numUpdates - 50) + ", NumDeletes=50, maxUniqueRecords=" + maxUniqueKeys); // if we generate update followed by deletes -> some keys in update batch might be picked up for deletes. Hence generating delete batch followed by updates - deleteStream = dataGenerator.generateUniqueDeleteRecordStream(commitTime, 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator)); - updateStream = dataGenerator.generateUniqueUpdatesStream(commitTime, numUpdates - 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator)); + deleteStream = dataGenerator.generateUniqueDeleteRecordStream(instantTime, 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator)); + updateStream = dataGenerator.generateUniqueUpdatesStream(instantTime, numUpdates - 50).map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator)); } else { LOG.info("After adjustments => NumInserts=" + numInserts + ", NumUpdates=" + numUpdates + ", maxUniqueRecords=" + maxUniqueKeys); - updateStream = dataGenerator.generateUniqueUpdatesStream(commitTime, numUpdates) + updateStream = dataGenerator.generateUniqueUpdatesStream(instantTime, numUpdates) .map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator)); } - Stream insertStream = dataGenerator.generateInsertsStream(commitTime, numInserts, false) + Stream insertStream = dataGenerator.generateInsertsStream(instantTime, numInserts, false) .map(hr -> AbstractBaseTestSource.toGenericRecord(hr, dataGenerator)); return Stream.concat(deleteStream, Stream.concat(updateStream, insertStream)); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractDFSSourceTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractDFSSourceTestBase.java index 42cbebc5f..b4a023e4c 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractDFSSourceTestBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractDFSSourceTestBase.java @@ -99,14 +99,14 @@ public abstract class AbstractDFSSourceTestBase extends UtilitiesTestBase { * Generates a batch of test data and writes the data to a file. * * @param filename The name of the file. - * @param commitTime The commit time. + * @param instantTime The commit time. * @param n The number of records to generate. * @return The file path. * @throws IOException */ - Path generateOneFile(String filename, String commitTime, int n) throws IOException { + Path generateOneFile(String filename, String instantTime, int n) throws IOException { Path path = new Path(dfsRoot, filename + fileSuffix); - writeNewDataToFile(dataGenerator.generateInserts(commitTime, n, useFlattenedSchema), path); + writeNewDataToFile(dataGenerator.generateInserts(instantTime, n, useFlattenedSchema), path); return path; } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java index 7153b2ef3..4aaa1d611 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java @@ -52,12 +52,12 @@ public class DistributedTestDataSource extends AbstractBaseTestSource { @Override protected InputBatch> fetchNewData(Option lastCkptStr, long sourceLimit) { int nextCommitNum = lastCkptStr.map(s -> Integer.parseInt(s) + 1).orElse(0); - String commitTime = String.format("%05d", nextCommitNum); + String instantTime = String.format("%05d", nextCommitNum); LOG.info("Source Limit is set to " + sourceLimit); // No new data. if (sourceLimit <= 0) { - return new InputBatch<>(Option.empty(), commitTime); + return new InputBatch<>(Option.empty(), instantTime); } TypedProperties newProps = new TypedProperties(); @@ -76,8 +76,8 @@ public class DistributedTestDataSource extends AbstractBaseTestSource { if (!dataGeneratorMap.containsKey(p)) { initDataGen(newProps, p); } - return fetchNextBatch(newProps, perPartitionSourceLimit, commitTime, p).iterator(); + return fetchNextBatch(newProps, perPartitionSourceLimit, instantTime, p).iterator(); }, true); - return new InputBatch<>(Option.of(avroRDD), commitTime); + return new InputBatch<>(Option.of(avroRDD), instantTime); } } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java index 0b52db96c..2667cc735 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java @@ -49,7 +49,7 @@ public class TestDataSource extends AbstractBaseTestSource { protected InputBatch> fetchNewData(Option lastCheckpointStr, long sourceLimit) { int nextCommitNum = lastCheckpointStr.map(s -> Integer.parseInt(s) + 1).orElse(0); - String commitTime = String.format("%05d", nextCommitNum); + String instantTime = String.format("%05d", nextCommitNum); LOG.info("Source Limit is set to " + sourceLimit); // No new data. @@ -58,8 +58,8 @@ public class TestDataSource extends AbstractBaseTestSource { } List records = - fetchNextBatch(props, (int) sourceLimit, commitTime, DEFAULT_PARTITION_NUM).collect(Collectors.toList()); + fetchNextBatch(props, (int) sourceLimit, instantTime, DEFAULT_PARTITION_NUM).collect(Collectors.toList()); JavaRDD avroRDD = sparkContext.parallelize(records, 4); - return new InputBatch<>(Option.of(avroRDD), commitTime); + return new InputBatch<>(Option.of(avroRDD), instantTime); } }