1
0

[HUDI-2911] Removing default value for PARTITIONPATH_FIELD_NAME resulting in incorrect KeyGenerator configuration (#4195)

This commit is contained in:
Alexey Kudinkin
2021-12-03 04:33:38 -08:00
committed by GitHub
parent e483f7c776
commit bed7f9897a
5 changed files with 71 additions and 5 deletions

View File

@@ -25,8 +25,9 @@ import org.apache.hudi.common.config.HoodieConfig
import org.apache.hudi.common.model._
import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver}
import org.apache.hudi.common.testutils.HoodieTestDataGenerator
import org.apache.hudi.common.util.PartitionPathEncodeUtils
import org.apache.hudi.config.{HoodieBootstrapConfig, HoodieWriteConfig}
import org.apache.hudi.exception.HoodieException
import org.apache.hudi.exception.{ExceptionUtil, HoodieException}
import org.apache.hudi.execution.bulkinsert.BulkInsertSortMode
import org.apache.hudi.functional.TestBootstrap
import org.apache.hudi.hive.HiveSyncConfig
@@ -40,13 +41,16 @@ import org.apache.spark.sql.hudi.command.SqlKeyGenerator
import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
import org.apache.spark.sql.{DataFrame, Dataset, Row, SQLContext, SaveMode, SparkSession}
import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertTrue, fail}
import org.junit.jupiter.api.function.Executable
import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.{CsvSource, EnumSource, ValueSource}
import org.mockito.ArgumentMatchers.any
import org.mockito.Mockito.{spy, times, verify}
import org.scalatest.Assertions.assertThrows
import org.scalatest.Matchers.{assertResult, be, convertToAnyShouldWrapper, intercept}
import java.io.IOException
import java.time.Instant
import java.util.{Collections, Date, UUID}
import scala.collection.JavaConversions._
@@ -346,7 +350,6 @@ class TestHoodieSparkSqlWriter {
@Test
def testInsertDatasetWithoutPrecombineField(): Unit = {
//create a new table
val fooTableModifier = commonTableModifier.updated(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
.updated(DataSourceWriteOptions.INSERT_DROP_DUPS.key, "false")
@@ -375,6 +378,28 @@ class TestHoodieSparkSqlWriter {
assert(df.except(trimmedDf).count() == 0)
}
/**
* Test case for insert dataset without partitioning field
*/
@Test
def testInsertDatasetWithoutPartitionField(): Unit = {
val tableOpts =
commonTableModifier
.updated(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
// generate the inserts
val schema = DataSourceTestUtils.getStructTypeExampleSchema
val structType = AvroConversionUtils.convertAvroSchemaToStructType(schema)
val records = DataSourceTestUtils.generateRandomRows(1)
val recordsSeq = convertRowListToSeq(records)
val df = spark.createDataFrame(sc.parallelize(recordsSeq), structType)
// try write to Hudi
assertThrows[IOException] {
HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, tableOpts - DataSourceWriteOptions.PARTITIONPATH_FIELD.key, df)
}
}
/**
* Test case for bulk insert dataset with datasource impl multiple rounds.
*/