From 6aba00e84fade0b800e2d73c2f16be948af48d54 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Sun, 13 Feb 2022 06:41:58 -0800 Subject: [PATCH] [MINOR] Fix typos in Spark client related classes (#4781) --- .../hudi/cli/commands/ExportCommand.java | 9 +++--- .../commands/TestUpgradeDowngradeCommand.java | 2 +- .../HoodieTestCommitMetadataGenerator.java | 4 +-- .../apache/hudi/client/HoodieReadClient.java | 2 +- .../hudi/client/SparkRDDWriteClient.java | 2 +- .../SparkSizeBasedClusteringPlanStrategy.java | 4 +-- .../strategy/SparkRejectUpdateStrategy.java | 2 +- .../client/utils/SparkValidatorUtils.java | 6 ++-- .../SqlQueryInequalityPreCommitValidator.java | 8 +++--- ...qlQuerySingleResultPreCommitValidator.java | 6 ++-- .../io/storage/row/HoodieRowCreateHandle.java | 2 +- .../keygen/NonpartitionedKeyGenerator.java | 5 ++-- .../hudi/keygen/RowKeyGeneratorHelper.java | 9 +++--- .../SparkHoodieBackedTableMetadataWriter.java | 6 ++-- .../bootstrap/BootstrapMetadataHandler.java | 7 +++-- .../SparkBootstrapCommitActionExecutor.java | 4 +-- ...ExecuteClusteringCommitActionExecutor.java | 2 +- .../commit/BaseSparkCommitActionExecutor.java | 16 ++++++----- .../SparkInsertOverwritePartitioner.java | 3 +- .../hudi/client/TestClientRollback.java | 2 +- .../client/TestHoodieClientMultiWriter.java | 2 +- .../hudi/client/TestTableSchemaEvolution.java | 6 ++-- .../index/bloom/TestKeyRangeLookupTree.java | 2 +- .../hbase/TestSparkHoodieHBaseIndex.java | 4 +-- .../hudi/io/TestHoodieTimelineArchiveLog.java | 5 ++-- .../TestHoodieSparkKeyGeneratorFactory.java | 4 +-- .../hudi/table/TestConsistencyGuard.java | 4 +-- .../action/compact/TestAsyncCompaction.java | 9 +++--- .../action/compact/TestInlineCompaction.java | 5 ++-- .../TestHoodieCompactionStrategy.java | 8 +++--- .../HoodieClientRollbackTestBase.java | 7 +++-- ...TestMergeOnReadRollbackActionExecutor.java | 2 +- .../table/marker/TestWriteMarkersBase.java | 2 +- .../hudi/testutils/HoodieClientTestUtils.java | 2 +- .../hudi/metadata/HoodieMetadataPayload.java | 4 +-- .../metadata/HoodieTableMetadataUtil.java | 9 +++--- .../src/test/java/HoodieJavaStreamingApp.java | 4 +-- .../org/apache/hudi/TestDataSourceUtils.java | 2 +- .../apache/hudi/functional/TestBootstrap.java | 8 +++--- .../hudi/functional/TestOrcBootstrap.java | 8 +++--- .../hudi/payload/TestAWSDmsAvroPayload.java | 5 ++-- .../functional/TestCOWDataSourceStorage.scala | 4 +-- .../utilities/perf/TimelineServerPerf.java | 6 ++-- .../HoodieDeltaStreamerTestBase.java | 28 +++++++++---------- ...estHoodieDeltaStreamerWithMultiWriter.java | 8 +++--- 45 files changed, 131 insertions(+), 118 deletions(-) diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java index 119ccb0dc..3e5fb8fe0 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java @@ -35,13 +35,14 @@ import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.timeline.TimelineMetadataUtils; import org.apache.hudi.exception.HoodieException; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; + import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; import org.apache.avro.specific.SpecificData; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; import org.springframework.shell.core.CommandMarker; import org.springframework.shell.core.annotation.CliCommand; import org.springframework.shell.core.annotation.CliOption; @@ -69,8 +70,8 @@ public class ExportCommand implements CommandMarker { @CliCommand(value = "export instants", help = "Export Instants and their metadata from the Timeline") public String exportInstants( @CliOption(key = {"limit"}, help = "Limit Instants", unspecifiedDefaultValue = "-1") final Integer limit, - @CliOption(key = {"actions"}, help = "Comma seperated list of Instant actions to export", - unspecifiedDefaultValue = "clean,commit,deltacommit,rollback,savepoint,restore") final String filter, + @CliOption(key = {"actions"}, help = "Comma separated list of Instant actions to export", + unspecifiedDefaultValue = "clean,commit,deltacommit,rollback,savepoint,restore") final String filter, @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = {"localFolder"}, help = "Local Folder to export to", mandatory = true) String localFolder) throws Exception { diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java index cba6d901b..b3650fa02 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java @@ -106,7 +106,7 @@ public class TestUpgradeDowngradeCommand extends CLIFunctionalTestHarness { assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.ZERO.versionCode()); assertTableVersionFromPropertyFile(); - // verify marker files are non existant + // verify marker files are non existent for (String partitionPath : DEFAULT_PARTITION_PATHS) { assertEquals(0, FileCreateUtils.getTotalMarkerFileCount(tablePath, partitionPath, "101", IOType.MERGE)); } diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java index 105a9f639..f59dca4e1 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java @@ -73,9 +73,9 @@ public class HoodieTestCommitMetadataGenerator extends HoodieTestDataGenerator { } public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration, - Option writes, Option updates, Map extraMetdata) throws Exception { + Option writes, Option updates, Map extraMetadata) throws Exception { createCommitFileWithMetadata(basePath, commitTime, configuration, UUID.randomUUID().toString(), - UUID.randomUUID().toString(), writes, updates, extraMetdata); + UUID.randomUUID().toString(), writes, updates, extraMetadata); } public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration, diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieReadClient.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieReadClient.java index 8988aa582..e9bdc427e 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieReadClient.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieReadClient.java @@ -65,7 +65,7 @@ public class HoodieReadClient> implements Seria /** * TODO: We need to persist the index type into hoodie.properties and be able to access the index just with a simple - * basepath pointing to the table. Until, then just always assume a BloomIndex + * base path pointing to the table. Until, then just always assume a BloomIndex */ private final transient HoodieIndex index; private HoodieTable>, JavaRDD, JavaRDD> hoodieTable; diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java index 6282d7beb..63f8804bc 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java @@ -504,7 +504,7 @@ public class SparkRDDWriteClient extends @Override protected void preCommit(HoodieInstant inflightInstant, HoodieCommitMetadata metadata) { // Create a Hoodie table after startTxn which encapsulated the commits and files visible. - // Important to create this after the lock to ensure latest commits show up in the timeline without need for reload + // Important to create this after the lock to ensure the latest commits show up in the timeline without need for reload HoodieTable table = createTable(config, hadoopConf); TransactionUtils.resolveWriteConflictIfAny(table, this.txnManager.getCurrentTransactionOwner(), Option.of(metadata), config, txnManager.getLastCompletedTransactionOwner()); diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSizeBasedClusteringPlanStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSizeBasedClusteringPlanStrategy.java index b38931c2d..6629569d0 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSizeBasedClusteringPlanStrategy.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSizeBasedClusteringPlanStrategy.java @@ -87,7 +87,7 @@ public class SparkSizeBasedClusteringPlanStrategy getFileSlicesEligibleForClustering(final String partition) { return super.getFileSlicesEligibleForClustering(partition) - // Only files that have basefile size smaller than small file size are eligible. + // Only files that have base file size smaller than small file size are eligible. .filter(slice -> slice.getBaseFile().map(HoodieBaseFile::getFileSize).orElse(0L) < getWriteConfig().getClusteringSmallFileLimit()); } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkRejectUpdateStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkRejectUpdateStrategy.java index b12d9ad43..ad60d9c88 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkRejectUpdateStrategy.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkRejectUpdateStrategy.java @@ -37,7 +37,7 @@ import java.util.Set; /** * Update strategy based on following. - * if some file group have update record, throw exception + * if some file groups have update record, throw exception */ public class SparkRejectUpdateStrategy> extends UpdateStrategy>> { private static final Logger LOG = LogManager.getLogger(SparkRejectUpdateStrategy.class); diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java index 604abbd5c..9e72390e4 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java @@ -31,13 +31,13 @@ import org.apache.hudi.table.HoodieSparkTable; import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.action.HoodieWriteMetadata; import org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; -import scala.collection.JavaConverters; import java.util.Arrays; import java.util.HashSet; @@ -47,6 +47,8 @@ import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import java.util.stream.Stream; +import scala.collection.JavaConverters; + /** * Spark validator utils to verify and run any precommit validators configured. */ @@ -97,7 +99,7 @@ public class SparkValidatorUtils { } /** - * Run validators in a separate threadpool for parallelism. Each of validator can submit a distributed spark job if needed. + * Run validators in a separate thread pool for parallelism. Each of validator can submit a distributed spark job if needed. */ private static CompletableFuture runValidatorAsync(SparkPreCommitValidator validator, HoodieWriteMetadata writeMetadata, Dataset beforeState, Dataset afterState, String instantTime) { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryInequalityPreCommitValidator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryInequalityPreCommitValidator.java index 454638c2d..026334fde 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryInequalityPreCommitValidator.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryInequalityPreCommitValidator.java @@ -34,11 +34,11 @@ import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; /** - * Validator to run sql query and compare table state + * Validator to run sql query and compare table state * 1) before new commit started. * 2) current inflight commit (if successful). - * - * Expects query results dont match. + *

+ * Expects query results do not match. */ public class SqlQueryInequalityPreCommitValidator> extends SqlQueryPreCommitValidator { private static final Logger LOG = LogManager.getLogger(SqlQueryInequalityPreCommitValidator.class); @@ -66,7 +66,7 @@ public class SqlQueryInequalityPreCommitValidator * Example configuration: "query1#expectedResult1;query2#expectedResult2;" */ public class SqlQuerySingleResultPreCommitValidator> extends SqlQueryPreCommitValidator { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java index 3566a8d8f..5cdb2ff68 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java @@ -45,7 +45,7 @@ import java.io.Serializable; import java.util.concurrent.atomic.AtomicLong; /** - * Create handle with InternalRow for datasource implemention of bulk insert. + * Create handle with InternalRow for datasource implementation of bulk insert. */ public class HoodieRowCreateHandle implements Serializable { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java index 1664c86f9..032c750f0 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java @@ -18,9 +18,10 @@ package org.apache.hudi.keygen; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.keygen.constant.KeyGeneratorOptions; + +import org.apache.avro.generic.GenericRecord; import org.apache.spark.sql.Row; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.types.StructType; @@ -31,7 +32,7 @@ import java.util.List; import java.util.stream.Collectors; /** - * Simple Key generator for unpartitioned Hive Tables. + * Simple Key generator for non-partitioned Hive Tables. */ public class NonpartitionedKeyGenerator extends BuiltinKeyGenerator { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java index 24f6e7a4f..6a28fbe95 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java @@ -40,9 +40,9 @@ import java.util.stream.IntStream; import scala.Option; -import static org.apache.hudi.keygen.KeyGenUtils.HUDI_DEFAULT_PARTITION_PATH; import static org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH_SEPARATOR; import static org.apache.hudi.keygen.KeyGenUtils.EMPTY_RECORDKEY_PLACEHOLDER; +import static org.apache.hudi.keygen.KeyGenUtils.HUDI_DEFAULT_PARTITION_PATH; import static org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER; /** @@ -230,9 +230,10 @@ public class RowKeyGeneratorHelper { /** * Generate the tree style positions for the field requested for as per the defined struct type. - * @param structType schema of interest - * @param field field of interest for which the positions are requested for - * @param isRecordKey {@code true} if the field requested for is a record key. {@code false} incase of a partition path. + * + * @param structType schema of interest + * @param field field of interest for which the positions are requested for + * @param isRecordKey {@code true} if the field requested for is a record key. {@code false} in case of a partition path. * @return the positions of the field as per the struct type. */ public static List getNestedFieldIndices(StructType structType, String field, boolean isRecordKey) { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java index 1a32ae5e9..c905f92c2 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java @@ -18,7 +18,6 @@ package org.apache.hudi.metadata; -import org.apache.avro.specific.SpecificRecordBase; import org.apache.hudi.client.SparkRDDWriteClient; import org.apache.hudi.client.WriteStatus; import org.apache.hudi.client.common.HoodieSparkEngineContext; @@ -35,6 +34,7 @@ import org.apache.hudi.data.HoodieJavaRDD; import org.apache.hudi.exception.HoodieMetadataException; import org.apache.hudi.metrics.DistributedRegistry; +import org.apache.avro.specific.SpecificRecordBase; import org.apache.hadoop.conf.Configuration; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; @@ -51,8 +51,8 @@ public class SparkHoodieBackedTableMetadataWriter extends HoodieBackedTableMetad /** * Return a Spark based implementation of {@code HoodieTableMetadataWriter} which can be used to * write to the metadata table. - * - * If the metadata table does not exist, an attempt is made to bootstrap it but there is no guarantted that + *

+ * If the metadata table does not exist, an attempt is made to bootstrap it but there is no guaranteed that * table will end up bootstrapping at this time. * * @param conf diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapMetadataHandler.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapMetadataHandler.java index 75daca739..237fe6cf8 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapMetadataHandler.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapMetadataHandler.java @@ -26,10 +26,11 @@ import org.apache.hudi.keygen.KeyGeneratorInterface; */ public interface BootstrapMetadataHandler { /** - * Execute bootstrap with only metatata. + * Execute bootstrap with only metadata. + * * @param srcPartitionPath source partition path. - * @param partitionPath destination partition path. - * @param keyGenerator key generator to use. + * @param partitionPath destination partition path. + * @param keyGenerator key generator to use. * @return the {@link BootstrapWriteStatus} which has the result of execution. */ BootstrapWriteStatus runMetadataBootstrap(String srcPartitionPath, String partitionPath, KeyGeneratorInterface keyGenerator); diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java index 3c097cdcf..a970e8f0f 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java @@ -113,9 +113,9 @@ public class SparkBootstrapCommitActionExecutor validate(); try { HoodieTableMetaClient metaClient = table.getMetaClient(); - Option completetedInstant = + Option completedInstant = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant(); - ValidationUtils.checkArgument(!completetedInstant.isPresent(), + ValidationUtils.checkArgument(!completedInstant.isPresent(), "Active Timeline is expected to be empty for bootstrap to be performed. " + "If you want to re-bootstrap, please rollback bootstrap first !!"); Map>>> partitionSelections = listAndProcessSourcePartitions(); diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkExecuteClusteringCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkExecuteClusteringCommitActionExecutor.java index 5b0224b0f..c8896e2cd 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkExecuteClusteringCommitActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkExecuteClusteringCommitActionExecutor.java @@ -116,7 +116,7 @@ public class SparkExecuteClusteringCommitActionExecutor> getPartitionToReplacedFileIds(HoodieWriteMetadata> writeMetadata) { Set newFilesWritten = writeMetadata.getWriteStats().get().stream() .map(s -> new HoodieFileGroupId(s.getPartitionPath(), s.getFileId())).collect(Collectors.toSet()); - // for the below execution strategy, new filegroup id would be same as old filegroup id + // for the below execution strategy, new file group id would be same as old file group id if (SparkSingleFileSortExecutionStrategy.class.getName().equals(config.getClusteringExecutionStrategyClass())) { return ClusteringUtils.getFileGroupsFromClusteringPlan(clusteringPlan) .collect(Collectors.groupingBy(fg -> fg.getPartitionPath(), Collectors.mapping(fg -> fg.getFileId(), Collectors.toList()))); diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java index e6c9b5f98..a9710a0d0 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java @@ -20,16 +20,16 @@ package org.apache.hudi.table.action.commit; import org.apache.hudi.client.WriteStatus; import org.apache.hudi.client.utils.SparkMemoryUtils; -import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.client.utils.SparkValidatorUtils; +import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.model.HoodieCommitMetadata; +import org.apache.hudi.common.model.HoodieFileGroupId; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecordLocation; import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.model.HoodieWriteStat; -import org.apache.hudi.common.model.HoodieFileGroupId; import org.apache.hudi.common.model.WriteOperationType; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieInstant; @@ -44,9 +44,9 @@ import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieUpsertException; import org.apache.hudi.execution.SparkLazyInsertIterable; import org.apache.hudi.io.CreateHandleFactory; +import org.apache.hudi.io.HoodieConcatHandle; import org.apache.hudi.io.HoodieMergeHandle; import org.apache.hudi.io.HoodieSortedMergeHandle; -import org.apache.hudi.io.HoodieConcatHandle; import org.apache.hudi.keygen.BaseKeyGenerator; import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory; import org.apache.hudi.table.HoodieSparkTable; @@ -55,27 +55,29 @@ import org.apache.hudi.table.WorkloadProfile; import org.apache.hudi.table.WorkloadStat; import org.apache.hudi.table.action.HoodieWriteMetadata; import org.apache.hudi.table.action.cluster.strategy.UpdateStrategy; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.Partitioner; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.storage.StorageLevel; -import scala.Tuple2; import java.io.IOException; import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.Instant; -import java.util.stream.Collectors; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; -import java.util.Set; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import scala.Tuple2; import static org.apache.hudi.common.util.ClusteringUtils.getAllFileGroupsInPendingClusteringPlans; @@ -126,7 +128,7 @@ public abstract class BaseSparkCommitActionExecutor pendingClusteringInstantsToRollback = getAllFileGroupsInPendingClusteringPlans(table.getMetaClient()).entrySet().stream() diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwritePartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwritePartitioner.java index 75dfbda30..dd545d526 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwritePartitioner.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwritePartitioner.java @@ -22,6 +22,7 @@ import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.WorkloadProfile; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; @@ -44,7 +45,7 @@ public class SparkInsertOverwritePartitioner extends UpsertPartitioner { * Returns a list of small files in the given partition path. */ protected List getSmallFiles(String partitionPath) { - // for overwrite, we ignore all existing files. So dont consider any file to be smallFiles + // for overwrite, we ignore all existing files. So do not consider any file to be smallFiles return Collections.emptyList(); } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java index 08960d97d..0c788def8 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java @@ -171,7 +171,7 @@ public class TestClientRollback extends HoodieClientTestBase { } /** - * Test Cases for effects of rollbacking completed/inflight commits. + * Test Cases for effects of rolling back completed/inflight commits. */ @Test public void testRollbackCommit() throws Exception { diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java index f490d7fda..035799c01 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java @@ -584,7 +584,7 @@ public class TestHoodieClientMultiWriter extends HoodieClientTestBase { private void createCommitWithInserts(HoodieWriteConfig cfg, SparkRDDWriteClient client, String prevCommitTime, String newCommitTime, int numRecords, boolean doCommit) throws Exception { - // Finish first base commmit + // Finish first base commit JavaRDD result = insertFirstBatch(cfg, client, newCommitTime, prevCommitTime, numRecords, SparkRDDWriteClient::bulkInsert, false, false, numRecords); if (doCommit) { diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java index b2831419e..3fb454940 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java @@ -147,7 +147,7 @@ public class TestTableSchemaEvolution extends HoodieClientTestBase { + TIP_NESTED_SCHEMA + EXTRA_FIELD_SCHEMA + EXTRA_FIELD_SCHEMA.replace("new_field", "new_new_field") + TRIP_SCHEMA_SUFFIX; assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, multipleAddedFieldSchema), - "Multiple added fields with defauls are compatible"); + "Multiple added fields with defaults are compatible"); assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, TRIP_SCHEMA_PREFIX + EXTRA_TYPE_SCHEMA + MAP_TYPE_SCHEMA @@ -205,7 +205,7 @@ public class TestTableSchemaEvolution extends HoodieClientTestBase { final List failedRecords = generateInsertsWithSchema("004", numRecords, TRIP_EXAMPLE_SCHEMA_DEVOLVED); try { // We cannot use insertBatch directly here because we want to insert records - // with a devolved schema and insertBatch inserts records using the TRIP_EXMPLE_SCHEMA. + // with a devolved schema and insertBatch inserts records using the TRIP_EXAMPLE_SCHEMA. writeBatch(client, "005", "004", Option.empty(), "003", numRecords, (String s, Integer a) -> failedRecords, SparkRDDWriteClient::insert, false, 0, 0, 0, false); fail("Insert with devolved scheme should fail"); @@ -233,7 +233,7 @@ public class TestTableSchemaEvolution extends HoodieClientTestBase { client = getHoodieWriteClient(hoodieEvolvedWriteConfig); // We cannot use insertBatch directly here because we want to insert records - // with a evolved schemaand insertBatch inserts records using the TRIP_EXMPLE_SCHEMA. + // with an evolved schema and insertBatch inserts records using the TRIP_EXAMPLE_SCHEMA. final List evolvedRecords = generateInsertsWithSchema("005", numRecords, TRIP_EXAMPLE_SCHEMA_EVOLVED); writeBatch(client, "005", "004", Option.empty(), initCommitTime, numRecords, (String s, Integer a) -> evolvedRecords, SparkRDDWriteClient::insert, false, 0, 0, 0, false); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestKeyRangeLookupTree.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestKeyRangeLookupTree.java index 012d0dfa3..1c6973db7 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestKeyRangeLookupTree.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestKeyRangeLookupTree.java @@ -80,7 +80,7 @@ public class TestKeyRangeLookupTree { * Tests for many duplicate entries in the tree. */ @Test - public void testFileGroupLookUpManyDulicateEntries() { + public void testFileGroupLookUpManyDuplicateEntries() { KeyRangeNode toInsert = new KeyRangeNode(Long.toString(1200), Long.toString(2000), UUID.randomUUID().toString()); updateExpectedMatchesToTest(toInsert); keyRangeLookupTree.insert(toInsert); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestSparkHoodieHBaseIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestSparkHoodieHBaseIndex.java index d8a488d38..de17ddf0a 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestSparkHoodieHBaseIndex.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestSparkHoodieHBaseIndex.java @@ -191,7 +191,7 @@ public class TestSparkHoodieHBaseIndex extends SparkClientFunctionalTestHarness final String newCommitTime = "001"; final int numRecords = 10; final String oldPartitionPath = "1970/01/01"; - final String emptyHoodieRecordPayloadClasssName = EmptyHoodieRecordPayload.class.getName(); + final String emptyHoodieRecordPayloadClassName = EmptyHoodieRecordPayload.class.getName(); List newRecords = dataGen.generateInserts(newCommitTime, numRecords); List oldRecords = new LinkedList(); @@ -226,7 +226,7 @@ public class TestSparkHoodieHBaseIndex extends SparkClientFunctionalTestHarness assertEquals(numRecords * 2L, taggedRecords.stream().count()); // Verify the number of deleted records assertEquals(numRecords, taggedRecords.stream().filter(record -> record.getKey().getPartitionPath().equals(oldPartitionPath) - && record.getData().getClass().getName().equals(emptyHoodieRecordPayloadClasssName)).count()); + && record.getData().getClass().getName().equals(emptyHoodieRecordPayloadClassName)).count()); // Verify the number of inserted records assertEquals(numRecords, taggedRecords.stream().filter(record -> !record.getKey().getPartitionPath().equals(oldPartitionPath)).count()); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiveLog.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiveLog.java index 4902d7426..099deaadf 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiveLog.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiveLog.java @@ -18,7 +18,6 @@ package org.apache.hudi.io; -import org.apache.hadoop.fs.FileStatus; import org.apache.hudi.avro.model.HoodieRollbackMetadata; import org.apache.hudi.client.utils.MetadataConversionUtils; import org.apache.hudi.common.config.HoodieMetadataConfig; @@ -52,6 +51,7 @@ import org.apache.hudi.table.HoodieTimelineArchiveLog; import org.apache.hudi.testutils.HoodieClientTestHarness; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; @@ -655,7 +655,8 @@ public class TestHoodieTimelineArchiveLog extends HoodieClientTestHarness { public void testArchiveTableWithCleanCommits(boolean enableMetadata) throws Exception { HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 2, 4, 2); - // min archival commits is 2 and max archival commits is 4(either clean commits has to be > 4 or commits has to be greater than 4. + // min archival commits is 2 and max archival commits is 4 + // (either clean commits has to be > 4 or commits has to be greater than 4) // and so, after 5th commit, 3 commits will be archived. // 1,2,3,4,5,6 : after archival -> 1,5,6 (because, 2,3,4,5 and 6 are clean commits and are eligible for archival) // after 7th and 8th commit no-op wrt archival. diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java index dffe1eaa9..816c1fb86 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java @@ -25,8 +25,8 @@ import org.apache.hudi.keygen.KeyGenerator; import org.apache.hudi.keygen.SimpleKeyGenerator; import org.apache.hudi.keygen.TestComplexKeyGenerator; import org.apache.hudi.keygen.constant.KeyGeneratorOptions; - import org.apache.hudi.keygen.constant.KeyGeneratorType; + import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -58,7 +58,7 @@ public class TestHoodieSparkKeyGeneratorFactory { // set both class name and keyGenerator type props.put(HoodieWriteConfig.KEYGENERATOR_TYPE.key(), KeyGeneratorType.CUSTOM.name()); KeyGenerator keyGenerator3 = HoodieSparkKeyGeneratorFactory.createKeyGenerator(props); - // KEYGENERATOR_TYPE_PROP was overitten by KEYGENERATOR_CLASS_PROP + // KEYGENERATOR_TYPE_PROP was overwritten by KEYGENERATOR_CLASS_PROP Assertions.assertEquals(SimpleKeyGenerator.class.getName(), keyGenerator3.getClass().getName()); // set wrong class name diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java index afbe94937..22fafe4a5 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java @@ -169,9 +169,9 @@ public class TestConsistencyGuard extends HoodieClientTestHarness { return getConsistencyGuardConfig(3, 10, 10); } - private ConsistencyGuardConfig getConsistencyGuardConfig(int maxChecks, int initalSleep, int maxSleep) { + private ConsistencyGuardConfig getConsistencyGuardConfig(int maxChecks, int initialSleep, int maxSleep) { return ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true) - .withInitialConsistencyCheckIntervalMs(initalSleep).withMaxConsistencyCheckIntervalMs(maxSleep) + .withInitialConsistencyCheckIntervalMs(initialSleep).withMaxConsistencyCheckIntervalMs(maxSleep) .withMaxConsistencyChecks(maxChecks).build(); } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestAsyncCompaction.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestAsyncCompaction.java index c2879fb1a..7c73c74f3 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestAsyncCompaction.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestAsyncCompaction.java @@ -18,8 +18,6 @@ package org.apache.hudi.table.action.compact; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; import org.apache.hudi.client.HoodieReadClient; import org.apache.hudi.client.SparkRDDWriteClient; import org.apache.hudi.common.config.HoodieMetadataConfig; @@ -32,6 +30,9 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.table.HoodieSparkTable; import org.apache.hudi.table.HoodieTable; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.JavaRDD; import org.junit.jupiter.api.Test; @@ -204,8 +205,8 @@ public class TestAsyncCompaction extends CompactionTestBase { String compactionInstantTime = "006"; int numRecs = 2000; - final List initalRecords = dataGen.generateInserts(firstInstantTime, numRecs); - final List records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), initalRecords, cfg, true, + final List initialRecords = dataGen.generateInserts(firstInstantTime, numRecs); + final List records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), initialRecords, cfg, true, new ArrayList<>()); // Schedule compaction but do not run them diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java index ef52953a2..310ff4fe8 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java @@ -28,6 +28,7 @@ import org.apache.hudi.config.HoodieCompactionConfig; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.table.HoodieSparkTable; import org.apache.hudi.table.marker.WriteMarkersFactory; + import org.junit.jupiter.api.Test; import java.util.ArrayList; @@ -62,7 +63,7 @@ public class TestInlineCompaction extends CompactionTestBase { runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>()); HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build(); - // Then: ensure no compaction is executedm since there are only 2 delta commits + // Then: ensure no compaction is executed since there are only 2 delta commits assertEquals(2, metaClient.getActiveTimeline().getWriteTimeline().countInstants()); } } @@ -152,7 +153,7 @@ public class TestInlineCompaction extends CompactionTestBase { runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>()); HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build(); - // Then: ensure no compaction is executedm since there are only 3 delta commits + // Then: ensure no compaction is executed since there are only 3 delta commits assertEquals(3, metaClient.getActiveTimeline().getWriteTimeline().countInstants()); // 4th commit, that will trigger compaction metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build(); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/strategy/TestHoodieCompactionStrategy.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/strategy/TestHoodieCompactionStrategy.java index dee1fadd7..0c7190092 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/strategy/TestHoodieCompactionStrategy.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/strategy/TestHoodieCompactionStrategy.java @@ -143,10 +143,10 @@ public class TestHoodieCompactionStrategy { "DayBasedCompactionStrategy should have resulted in fewer compactions"); assertEquals(2, returned.size(), "DayBasedCompactionStrategy should have resulted in fewer compactions"); - int comparision = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(), + int comparison = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(), returned.get(0).getPartitionPath()); // Either the partition paths are sorted in descending order or they are equal - assertTrue(comparision >= 0, "DayBasedCompactionStrategy should sort partitions in descending order"); + assertTrue(comparison >= 0, "DayBasedCompactionStrategy should sort partitions in descending order"); } @Test @@ -192,10 +192,10 @@ public class TestHoodieCompactionStrategy { assertEquals(5, returned.size(), "BoundedPartitionAwareCompactionStrategy should have resulted in fewer compactions"); - int comparision = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(), + int comparison = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(), returned.get(0).getPartitionPath()); // Either the partition paths are sorted in descending order or they are equal - assertTrue(comparision >= 0, "BoundedPartitionAwareCompactionStrategy should sort partitions in descending order"); + assertTrue(comparison >= 0, "BoundedPartitionAwareCompactionStrategy should sort partitions in descending order"); } @Test diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/HoodieClientRollbackTestBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/HoodieClientRollbackTestBase.java index 3b0829b16..243f49f90 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/HoodieClientRollbackTestBase.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/HoodieClientRollbackTestBase.java @@ -33,6 +33,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.table.HoodieTable; import org.apache.hudi.testutils.Assertions; import org.apache.hudi.testutils.HoodieClientTestBase; + import org.apache.spark.api.java.JavaRDD; import java.io.IOException; @@ -78,18 +79,18 @@ public class HoodieClientRollbackTestBase extends HoodieClientTestBase { } - //2. assert filegroup and get the first partition fileslice + //2. assert file group and get the first partition file slice HoodieTable table = this.getHoodieTable(metaClient, cfg); SyncableFileSystemView fsView = getFileSystemViewWithUnCommittedSlices(table.getMetaClient()); List firstPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_FIRST_PARTITION_PATH).collect(Collectors.toList()); assertEquals(1, firstPartitionCommit2FileGroups.size()); firstPartitionCommit2FileSlices.addAll(firstPartitionCommit2FileGroups.get(0).getAllFileSlices().collect(Collectors.toList())); - //3. assert filegroup and get the second partition fileslice + //3. assert file group and get the second partition file slice List secondPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_SECOND_PARTITION_PATH).collect(Collectors.toList()); assertEquals(1, secondPartitionCommit2FileGroups.size()); secondPartitionCommit2FileSlices.addAll(secondPartitionCommit2FileGroups.get(0).getAllFileSlices().collect(Collectors.toList())); - //4. assert fileslice + //4. assert file slice HoodieTableType tableType = this.getTableType(); if (tableType.equals(HoodieTableType.COPY_ON_WRITE)) { assertEquals(2, firstPartitionCommit2FileSlices.size()); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java index 4e98b220f..877f0e29c 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java @@ -112,7 +112,7 @@ public class TestMergeOnReadRollbackActionExecutor extends HoodieClientRollbackT assertTrue(meta.getSuccessDeleteFiles() == null || meta.getSuccessDeleteFiles().size() == 0); } - //4. assert filegroup after rollback, and compare to the rollbackstat + //4. assert file group after rollback, and compare to the rollbackstat // assert the first partition data and log file size List firstPartitionRollBack1FileGroups = table.getFileSystemView().getAllFileGroups(DEFAULT_FIRST_PARTITION_PATH).collect(Collectors.toList()); assertEquals(1, firstPartitionRollBack1FileGroups.size()); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersBase.java index 5f96041b3..6ba783c74 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersBase.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersBase.java @@ -103,7 +103,7 @@ public abstract class TestWriteMarkersBase extends HoodieCommonTestHarness { @ParameterizedTest @ValueSource(booleans = {true, false}) public void testDataPathsWhenCreatingOrMerging(boolean isTablePartitioned) throws IOException { - // add markfiles + // add marker files createSomeMarkers(isTablePartitioned); // add invalid file createInvalidFile(isTablePartitioned ? "2020/06/01" : "", "invalid_file3"); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java index 6dffd535b..05d7f9944 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java @@ -207,7 +207,7 @@ public class HoodieClientTestUtils { } /** - * Reads the paths under the a hoodie table out as a DataFrame. + * Reads the paths under the hoodie table out as a DataFrame. */ public static Dataset read(JavaSparkContext jsc, String basePath, SQLContext sqlContext, FileSystem fs, String... paths) { diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java index 6000e04f3..3517e17b1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java @@ -275,7 +275,7 @@ public class HoodieMetadataPayload implements HoodieRecordPayload streamFuture = executor.submit(() -> { LOG.info("===== Streaming Starting ====="); @@ -211,7 +211,7 @@ public class HoodieJavaStreamingApp { Dataset inputDF3 = newSpark.read().json(jssc.parallelize(deletes, 2)); executor = Executors.newFixedThreadPool(2); - // thread for spark strucutured streaming + // thread for spark structured streaming try { Future streamFuture = executor.submit(() -> { LOG.info("===== Streaming Starting ====="); diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java index 0c5a2122d..bf3520f09 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java @@ -191,7 +191,7 @@ public class TestDataSourceUtils { @Test public void testCreateUserDefinedBulkInsertPartitionerRowsWithInValidPartitioner() throws HoodieException { - config = HoodieWriteConfig.newBuilder().withPath("/").withUserDefinedBulkInsertPartitionerClass("NonExistantUserDefinedClass").build(); + config = HoodieWriteConfig.newBuilder().withPath("/").withUserDefinedBulkInsertPartitionerClass("NonExistentUserDefinedClass").build(); Exception exception = assertThrows(HoodieException.class, () -> { DataSourceUtils.createUserDefinedBulkInsertPartitionerWithRows(config); diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java index 9b7d55347..21a645660 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java @@ -179,7 +179,7 @@ public class TestBootstrap extends HoodieClientTestBase { } @Test - public void testMetadataBootstrapUnpartitionedCOW() throws Exception { + public void testMetadataBootstrapNonpartitionedCOW() throws Exception { testBootstrapCommon(false, false, EffectiveMode.METADATA_BOOTSTRAP_MODE); } @@ -229,7 +229,7 @@ public class TestBootstrap extends HoodieClientTestBase { bootstrapInstants = Arrays.asList(bootstrapCommitInstantTs); break; default: - bootstrapModeSelectorClass = TestRandomBootstapModeSelector.class.getName(); + bootstrapModeSelectorClass = TestRandomBootstrapModeSelector.class.getName(); bootstrapCommitInstantTs = HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS; checkNumRawFiles = false; isBootstrapIndexCreated = true; @@ -523,11 +523,11 @@ public class TestBootstrap extends HoodieClientTestBase { }).collect(Collectors.toList())); } - public static class TestRandomBootstapModeSelector extends BootstrapModeSelector { + public static class TestRandomBootstrapModeSelector extends BootstrapModeSelector { private int currIdx = new Random().nextInt(2); - public TestRandomBootstapModeSelector(HoodieWriteConfig writeConfig) { + public TestRandomBootstrapModeSelector(HoodieWriteConfig writeConfig) { super(writeConfig); } diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java index 91eab6ddd..1992b9777 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java @@ -172,7 +172,7 @@ public class TestOrcBootstrap extends HoodieClientTestBase { } @Test - public void testMetadataBootstrapUnpartitionedCOW() throws Exception { + public void testMetadataBootstrapNonpartitionedCOW() throws Exception { testBootstrapCommon(false, false, EffectiveMode.METADATA_BOOTSTRAP_MODE); } @@ -222,7 +222,7 @@ public class TestOrcBootstrap extends HoodieClientTestBase { bootstrapInstants = Arrays.asList(bootstrapCommitInstantTs); break; default: - bootstrapModeSelectorClass = TestRandomBootstapModeSelector.class.getName(); + bootstrapModeSelectorClass = TestRandomBootstrapModeSelector.class.getName(); bootstrapCommitInstantTs = HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS; checkNumRawFiles = false; isBootstrapIndexCreated = true; @@ -438,10 +438,10 @@ public class TestOrcBootstrap extends HoodieClientTestBase { }).collect(Collectors.toList())); } - public static class TestRandomBootstapModeSelector extends BootstrapModeSelector { + public static class TestRandomBootstrapModeSelector extends BootstrapModeSelector { private int currIdx = new Random().nextInt(2); - public TestRandomBootstapModeSelector(HoodieWriteConfig writeConfig) { + public TestRandomBootstrapModeSelector(HoodieWriteConfig writeConfig) { super(writeConfig); } diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/payload/TestAWSDmsAvroPayload.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/payload/TestAWSDmsAvroPayload.java index 802096a3a..cf3d9a94d 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/payload/TestAWSDmsAvroPayload.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/payload/TestAWSDmsAvroPayload.java @@ -25,7 +25,6 @@ import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; - import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -98,7 +97,7 @@ public class TestAWSDmsAvroPayload { try { Option outputPayload = payload.combineAndGetUpdateValue(oldRecord, avroSchema); - // expect nothing to be comitted to table + // expect nothing to be committed to table assertFalse(outputPayload.isPresent()); } catch (Exception e) { fail("Unexpected exception"); @@ -123,7 +122,7 @@ public class TestAWSDmsAvroPayload { try { OverwriteWithLatestAvroPayload output = payload.preCombine(insertPayload); Option outputPayload = output.getInsertValue(avroSchema); - // expect nothing to be comitted to table + // expect nothing to be committed to table assertFalse(outputPayload.isPresent()); } catch (Exception e) { fail("Unexpected exception"); diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSourceStorage.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSourceStorage.scala index bf616e2cb..0d7d62618 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSourceStorage.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSourceStorage.scala @@ -99,9 +99,9 @@ class TestCOWDataSourceStorage extends SparkClientFunctionalTestHarness { var updateDf: DataFrame = null if (classOf[TimestampBasedKeyGenerator].getName.equals(keyGenClass)) { // update current_ts to be same as original record so that partition path does not change with timestamp based key gen - val orignalRow = inputDF1.filter(col("_row_key") === verificationRowKey).collectAsList().get(0) + val originalRow = inputDF1.filter(col("_row_key") === verificationRowKey).collectAsList().get(0) updateDf = snapshotDF1.filter(col("_row_key") === verificationRowKey).withColumn(verificationCol, lit(updatedVerificationVal)) - .withColumn("current_ts", lit(orignalRow.getAs("current_ts"))) + .withColumn("current_ts", lit(originalRow.getAs("current_ts"))) } else { updateDf = snapshotDF1.filter(col("_row_key") === verificationRowKey).withColumn(verificationCol, lit(updatedVerificationVal)) } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java index ac15897f5..d992976da 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java @@ -74,7 +74,7 @@ public class TimelineServerPerf implements Serializable { public TimelineServerPerf(Config cfg) throws IOException { this.cfg = cfg; useExternalTimelineServer = (cfg.serverHost != null); - TimelineService.Config timelineServiceConf = cfg.getTimelinServerConfig(); + TimelineService.Config timelineServiceConf = cfg.getTimelineServerConfig(); this.timelineServer = new TimelineService( new HoodieLocalEngineContext(FSUtils.prepareHadoopConf(new Configuration())), new Configuration(), timelineServiceConf, FileSystem.get(new Configuration()), @@ -281,7 +281,7 @@ public class TimelineServerPerf implements Serializable { description = " Server Host (Set it for externally managed timeline service") public String serverHost = null; - @Parameter(names = {"--view-storage", "-st"}, description = "View Storage Type. Defaut - SPILLABLE_DISK") + @Parameter(names = {"--view-storage", "-st"}, description = "View Storage Type. Default - SPILLABLE_DISK") public FileSystemViewStorageType viewStorageType = FileSystemViewStorageType.SPILLABLE_DISK; @Parameter(names = {"--max-view-mem-per-table", "-mv"}, @@ -310,7 +310,7 @@ public class TimelineServerPerf implements Serializable { @Parameter(names = {"--help", "-h"}) public Boolean help = false; - public TimelineService.Config getTimelinServerConfig() { + public TimelineService.Config getTimelineServerConfig() { TimelineService.Config c = new TimelineService.Config(); c.viewStorageType = viewStorageType; c.baseStorePathForFileGroups = baseStorePathForFileGroups; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java index b7e6f1870..b24faf7c0 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java @@ -279,35 +279,35 @@ public class HoodieDeltaStreamerTestBase extends UtilitiesTestBase { HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(); if (useCustomSchema) { Helpers.saveORCToDFS(Helpers.toGenericRecords( - dataGenerator.generateInsertsAsPerSchema("000", numRecords, schemaStr), - schema), new Path(path), HoodieTestDataGenerator.ORC_TRIP_SCHEMA); + dataGenerator.generateInsertsAsPerSchema("000", numRecords, schemaStr), + schema), new Path(path), HoodieTestDataGenerator.ORC_TRIP_SCHEMA); } else { Helpers.saveORCToDFS(Helpers.toGenericRecords( - dataGenerator.generateInserts("000", numRecords)), new Path(path)); + dataGenerator.generateInserts("000", numRecords)), new Path(path)); } } - static void addCommitToTimeline(HoodieTableMetaClient metaCient) throws IOException { - addCommitToTimeline(metaCient, Collections.emptyMap()); + static void addCommitToTimeline(HoodieTableMetaClient metaClient) throws IOException { + addCommitToTimeline(metaClient, Collections.emptyMap()); } - static void addCommitToTimeline(HoodieTableMetaClient metaCient, Map extraMetadata) throws IOException { - addCommitToTimeline(metaCient, WriteOperationType.UPSERT, HoodieTimeline.COMMIT_ACTION, extraMetadata); + static void addCommitToTimeline(HoodieTableMetaClient metaClient, Map extraMetadata) throws IOException { + addCommitToTimeline(metaClient, WriteOperationType.UPSERT, HoodieTimeline.COMMIT_ACTION, extraMetadata); } - static void addReplaceCommitToTimeline(HoodieTableMetaClient metaCient, Map extraMetadata) throws IOException { - addCommitToTimeline(metaCient, WriteOperationType.CLUSTER, HoodieTimeline.REPLACE_COMMIT_ACTION, extraMetadata); + static void addReplaceCommitToTimeline(HoodieTableMetaClient metaClient, Map extraMetadata) throws IOException { + addCommitToTimeline(metaClient, WriteOperationType.CLUSTER, HoodieTimeline.REPLACE_COMMIT_ACTION, extraMetadata); } - static void addCommitToTimeline(HoodieTableMetaClient metaCient, WriteOperationType writeOperationType, String commitActiontype, + static void addCommitToTimeline(HoodieTableMetaClient metaClient, WriteOperationType writeOperationType, String commitActiontype, Map extraMetadata) throws IOException { HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata(); commitMetadata.setOperationType(writeOperationType); - extraMetadata.forEach((k,v) -> commitMetadata.getExtraMetadata().put(k, v)); + extraMetadata.forEach((k, v) -> commitMetadata.getExtraMetadata().put(k, v)); String commitTime = HoodieActiveTimeline.createNewInstantTime(); - metaCient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.REQUESTED, commitActiontype, commitTime)); - metaCient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.INFLIGHT, commitActiontype, commitTime)); - metaCient.getActiveTimeline().saveAsComplete( + metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.REQUESTED, commitActiontype, commitTime)); + metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.INFLIGHT, commitActiontype, commitTime)); + metaClient.getActiveTimeline().saveAsComplete( new HoodieInstant(HoodieInstant.State.INFLIGHT, commitActiontype, commitTime), Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java index 5d79e8390..e383236af 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java @@ -367,22 +367,22 @@ public class TestHoodieDeltaStreamerWithMultiWriter extends SparkClientFunctiona return true; }; - AtomicBoolean continousFailed = new AtomicBoolean(false); + AtomicBoolean continuousFailed = new AtomicBoolean(false); AtomicBoolean backfillFailed = new AtomicBoolean(false); try { Future regularIngestionJobFuture = service.submit(() -> { try { deltaStreamerTestRunner(ingestionJob, cfgIngestionJob, conditionForRegularIngestion, jobId); } catch (Throwable ex) { - continousFailed.set(true); + continuousFailed.set(true); LOG.error("Continuous job failed " + ex.getMessage()); throw new RuntimeException(ex); } }); Future backfillJobFuture = service.submit(() -> { try { - // trigger backfill atleast after 1 requested entry is added to timline from continuous job. If not, there is a chance that backfill will complete even before - // continous job starts. + // trigger backfill atleast after 1 requested entry is added to timeline from continuous job. If not, there is a chance that backfill will complete even before + // continuous job starts. awaitCondition(new GetCommitsAfterInstant(tableBasePath, lastSuccessfulCommit)); backfillJob.sync(); } catch (Throwable ex) {