diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBaseBloomIndexCheckFunction.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBaseBloomIndexCheckFunction.java index 2ca563aa3..441a212c5 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBaseBloomIndexCheckFunction.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBaseBloomIndexCheckFunction.java @@ -49,8 +49,8 @@ public class HoodieBaseBloomIndexCheckFunction } @Override - public Iterator> apply(Iterator> fileParitionRecordKeyTripletItr) { - return new LazyKeyCheckIterator(fileParitionRecordKeyTripletItr); + public Iterator> apply(Iterator> filePartitionRecordKeyTripletItr) { + return new LazyKeyCheckIterator(filePartitionRecordKeyTripletItr); } class LazyKeyCheckIterator extends LazyIterableIterator, List> { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java index ab65e504e..604abbd5c 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java @@ -138,7 +138,7 @@ public class SparkValidatorUtils { } /** - * Get reads from paritions modified including any inflight commits. + * Get reads from partitions modified including any inflight commits. * Note that this only works for COW tables */ public static Dataset getRecordsFromPendingCommits(SQLContext sqlContext, diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndexCheckFunction.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndexCheckFunction.java index a8fac5fa7..148203c9b 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndexCheckFunction.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndexCheckFunction.java @@ -53,8 +53,8 @@ public class HoodieBloomIndexCheckFunction @Override public Iterator> call(Integer partition, - Iterator> fileParitionRecordKeyTripletItr) { - return new LazyKeyCheckIterator(fileParitionRecordKeyTripletItr); + Iterator> filePartitionRecordKeyTripletItr) { + return new LazyKeyCheckIterator(filePartitionRecordKeyTripletItr); } class LazyKeyCheckIterator extends LazyIterableIterator, List> { diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala index 21a60087f..8a43d8762 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala @@ -98,7 +98,7 @@ class HoodieCatalogTable(val spark: SparkSession, val table: CatalogTable) exten lazy val tableTypeName: String = tableType.name() /** - * Recored Field List(Primary Key List) + * Record Field List(Primary Key List) */ lazy val primaryKeys: Array[String] = tableConfig.getRecordKeyFields.orElse(Array.empty) @@ -108,7 +108,7 @@ class HoodieCatalogTable(val spark: SparkSession, val table: CatalogTable) exten lazy val preCombineKey: Option[String] = Option(tableConfig.getPreCombineField) /** - * Paritition Fields + * Partition Fields */ lazy val partitionFields: Array[String] = tableConfig.getPartitionFields.orElse(Array.empty) diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java index 610122cb3..9185d09aa 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java @@ -186,14 +186,14 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase { } int metadataRecordKeyIndex = resultSchema.fieldIndex(HoodieRecord.RECORD_KEY_METADATA_FIELD); - int metadataParitionPathIndex = resultSchema.fieldIndex(HoodieRecord.PARTITION_PATH_METADATA_FIELD); + int metadataPartitionPathIndex = resultSchema.fieldIndex(HoodieRecord.PARTITION_PATH_METADATA_FIELD); int metadataCommitTimeIndex = resultSchema.fieldIndex(HoodieRecord.COMMIT_TIME_METADATA_FIELD); int metadataCommitSeqNoIndex = resultSchema.fieldIndex(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD); int metadataFilenameIndex = resultSchema.fieldIndex(HoodieRecord.FILENAME_METADATA_FIELD); result.toJavaRDD().foreach(entry -> { assertTrue(entry.get(metadataRecordKeyIndex).equals(entry.getAs("_row_key"))); - assertTrue(entry.get(metadataParitionPathIndex).equals(entry.getAs("partition"))); + assertTrue(entry.get(metadataPartitionPathIndex).equals(entry.getAs("partition"))); assertTrue(entry.get(metadataCommitSeqNoIndex).equals("")); assertTrue(entry.get(metadataCommitTimeIndex).equals("")); assertTrue(entry.get(metadataFilenameIndex).equals("")); diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala index a42a1ced1..e8b179804 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala @@ -245,12 +245,12 @@ class TestCOWDataSource extends HoodieClientTestBase { spark.sql(String.format("select count(*) from tmpTable")).show() // step4: Query the rows count from hoodie table for partition1 DEFAULT_FIRST_PARTITION_PATH - val recordCountForParititon1 = spark.sql(String.format("select count(*) from tmpTable where partition = '%s'", HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)).collect() - assertEquals("6", recordCountForParititon1(0).get(0).toString) + val recordCountForPartition1 = spark.sql(String.format("select count(*) from tmpTable where partition = '%s'", HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)).collect() + assertEquals("6", recordCountForPartition1(0).get(0).toString) // step5: Query the rows count from hoodie table for partition2 DEFAULT_SECOND_PARTITION_PATH - val recordCountForParititon2 = spark.sql(String.format("select count(*) from tmpTable where partition = '%s'", HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH)).collect() - assertEquals("7", recordCountForParititon2(0).get(0).toString) + val recordCountForPartition2 = spark.sql(String.format("select count(*) from tmpTable where partition = '%s'", HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH)).collect() + assertEquals("7", recordCountForPartition2(0).get(0).toString) // step6: Query the rows count from hoodie table for partition2 DEFAULT_SECOND_PARTITION_PATH using spark.collect and then filter mode val recordsForPartitionColumn = spark.sql(String.format("select partition from tmpTable")).collect() @@ -292,12 +292,12 @@ class TestCOWDataSource extends HoodieClientTestBase { spark.sql(String.format("select count(*) from tmpTable")).show() // step3: Query the rows count from hoodie table for partition1 DEFAULT_FIRST_PARTITION_PATH - val recordCountForParititon1 = spark.sql(String.format("select count(*) from tmpTable where partition = '%s'", HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)).collect() - assertEquals("0", recordCountForParititon1(0).get(0).toString) + val recordCountForPartition1 = spark.sql(String.format("select count(*) from tmpTable where partition = '%s'", HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)).collect() + assertEquals("0", recordCountForPartition1(0).get(0).toString) // step4: Query the rows count from hoodie table for partition2 DEFAULT_SECOND_PARTITION_PATH - val recordCountForParititon2 = spark.sql(String.format("select count(*) from tmpTable where partition = '%s'", HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH)).collect() - assertEquals("7", recordCountForParititon2(0).get(0).toString) + val recordCountForPartition2 = spark.sql(String.format("select count(*) from tmpTable where partition = '%s'", HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH)).collect() + assertEquals("7", recordCountForPartition2(0).get(0).toString) // step5: Query the rows count from hoodie table val recordCount = spark.sql(String.format("select count(*) from tmpTable")).collect() @@ -417,7 +417,7 @@ class TestCOWDataSource extends HoodieClientTestBase { .mode(SaveMode.Overwrite) } - @Test def testSparkPartitonByWithCustomKeyGenerator(): Unit = { + @Test def testSparkPartitionByWithCustomKeyGenerator(): Unit = { // Without fieldType, the default is SIMPLE var writer = getDataFrameWriter(classOf[CustomKeyGenerator].getName) writer.partitionBy("current_ts") @@ -465,7 +465,7 @@ class TestCOWDataSource extends HoodieClientTestBase { } } - @Test def testSparkPartitonByWithSimpleKeyGenerator() { + @Test def testSparkPartitionByWithSimpleKeyGenerator() { // Use the `driver` field as the partition key var writer = getDataFrameWriter(classOf[SimpleKeyGenerator].getName) writer.partitionBy("driver") @@ -484,7 +484,7 @@ class TestCOWDataSource extends HoodieClientTestBase { assertTrue(recordsReadDF.filter(col("_hoodie_partition_path") =!= lit("default")).count() == 0) } - @Test def testSparkPartitonByWithComplexKeyGenerator() { + @Test def testSparkPartitionByWithComplexKeyGenerator() { // Use the `driver` field as the partition key var writer = getDataFrameWriter(classOf[ComplexKeyGenerator].getName) writer.partitionBy("driver") @@ -503,7 +503,7 @@ class TestCOWDataSource extends HoodieClientTestBase { assertTrue(recordsReadDF.filter(col("_hoodie_partition_path") =!= concat(col("driver"), lit("/"), col("rider"))).count() == 0) } - @Test def testSparkPartitonByWithTimestampBasedKeyGenerator() { + @Test def testSparkPartitionByWithTimestampBasedKeyGenerator() { val writer = getDataFrameWriter(classOf[TimestampBasedKeyGenerator].getName) writer.partitionBy("current_ts") .option(Config.TIMESTAMP_TYPE_FIELD_PROP, "EPOCHMILLISECONDS") @@ -517,7 +517,7 @@ class TestCOWDataSource extends HoodieClientTestBase { assertTrue(recordsReadDF.filter(col("_hoodie_partition_path") =!= udf_date_format(col("current_ts"))).count() == 0) } - @Test def testSparkPartitonByWithGlobalDeleteKeyGenerator() { + @Test def testSparkPartitionByWithGlobalDeleteKeyGenerator() { val writer = getDataFrameWriter(classOf[GlobalDeleteKeyGenerator].getName) writer.partitionBy("driver") .mode(SaveMode.Overwrite) @@ -528,7 +528,7 @@ class TestCOWDataSource extends HoodieClientTestBase { assertTrue(recordsReadDF.filter(col("_hoodie_partition_path") =!= lit("")).count() == 0) } - @Test def testSparkPartitonByWithNonpartitionedKeyGenerator() { + @Test def testSparkPartitionByWithNonpartitionedKeyGenerator() { // Empty string column var writer = getDataFrameWriter(classOf[NonpartitionedKeyGenerator].getName) writer.partitionBy("") diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveStylePartitionValueExtractor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveStylePartitionValueExtractor.java index 4bb20f5e5..6741d4690 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveStylePartitionValueExtractor.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveStylePartitionValueExtractor.java @@ -22,7 +22,7 @@ import java.util.Collections; import java.util.List; /** - * Extractor for Hive Style Partitioned tables, when the parition folders are key value pairs. + * Extractor for Hive Style Partitioned tables, when the partition folders are key value pairs. * *

This implementation extracts the partition value of yyyy-mm-dd from the path of type datestr=yyyy-mm-dd. */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DatePartitionPathSelector.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DatePartitionPathSelector.java index 71e6a57db..5399f6fe4 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DatePartitionPathSelector.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DatePartitionPathSelector.java @@ -131,9 +131,9 @@ public class DatePartitionPathSelector extends DFSPathSelector { long lastCheckpointTime = lastCheckpointStr.map(Long::parseLong).orElse(Long.MIN_VALUE); HoodieSparkEngineContext context = new HoodieSparkEngineContext(sparkContext); SerializableConfiguration serializedConf = new SerializableConfiguration(fs.getConf()); - List prunedParitionPaths = pruneDatePartitionPaths(context, fs, props.getString(ROOT_INPUT_PATH_PROP), currentDate); + List prunedPartitionPaths = pruneDatePartitionPaths(context, fs, props.getString(ROOT_INPUT_PATH_PROP), currentDate); - List eligibleFiles = context.flatMap(prunedParitionPaths, + List eligibleFiles = context.flatMap(prunedPartitionPaths, path -> { FileSystem fs = new Path(path).getFileSystem(serializedConf.get()); return listEligibleFiles(fs, new Path(path), lastCheckpointTime).stream();