[HUDI-1550] Honor ordering field for MOR Spark datasource reader (#2497)
This commit is contained in:
@@ -17,11 +17,14 @@
|
||||
|
||||
package org.apache.hudi.functional
|
||||
|
||||
import org.apache.hudi.DataSourceWriteOptions.{KEYGENERATOR_CLASS_OPT_KEY, PARTITIONPATH_FIELD_OPT_KEY, PAYLOAD_CLASS_OPT_KEY, PRECOMBINE_FIELD_OPT_KEY, RECORDKEY_FIELD_OPT_KEY}
|
||||
import org.apache.hudi.common.fs.FSUtils
|
||||
import org.apache.hudi.common.model.DefaultHoodieRecordPayload
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator
|
||||
import org.apache.hudi.config.{HoodieCompactionConfig, HoodieWriteConfig}
|
||||
import org.apache.hudi.config.HoodieWriteConfig
|
||||
import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers}
|
||||
import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings
|
||||
import org.apache.hudi.keygen.NonpartitionedKeyGenerator
|
||||
import org.apache.hudi.testutils.HoodieClientTestBase
|
||||
import org.apache.log4j.LogManager
|
||||
import org.apache.spark.sql._
|
||||
@@ -29,7 +32,6 @@ import org.apache.spark.sql.functions._
|
||||
import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
|
||||
import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
|
||||
|
||||
|
||||
import scala.collection.JavaConversions._
|
||||
|
||||
/**
|
||||
@@ -502,6 +504,44 @@ class TestMORDataSource extends HoodieClientTestBase {
|
||||
hudiSnapshotDF2.show(1)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testPreCombineFiledForReadMOR(): Unit = {
|
||||
writeData((1, "a0",10, 100))
|
||||
checkAnswer((1, "a0",10, 100))
|
||||
|
||||
writeData((1, "a0", 12, 99))
|
||||
// The value has not update, because the version 99 < 100
|
||||
checkAnswer((1, "a0",10, 100))
|
||||
|
||||
writeData((1, "a0", 12, 101))
|
||||
// The value has update
|
||||
checkAnswer((1, "a0", 12, 101))
|
||||
}
|
||||
|
||||
private def writeData(data: (Int, String, Int, Int)): Unit = {
|
||||
val _spark = spark
|
||||
import _spark.implicits._
|
||||
val df = Seq(data).toDF("id", "name", "value", "version")
|
||||
df.write.format("org.apache.hudi")
|
||||
.options(commonOpts)
|
||||
// use DefaultHoodieRecordPayload here
|
||||
.option(PAYLOAD_CLASS_OPT_KEY, classOf[DefaultHoodieRecordPayload].getCanonicalName)
|
||||
.option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL)
|
||||
.option(RECORDKEY_FIELD_OPT_KEY, "id")
|
||||
.option(PRECOMBINE_FIELD_OPT_KEY, "version")
|
||||
.option(PARTITIONPATH_FIELD_OPT_KEY, "")
|
||||
.option(KEYGENERATOR_CLASS_OPT_KEY, classOf[NonpartitionedKeyGenerator].getName)
|
||||
.mode(SaveMode.Append)
|
||||
.save(basePath)
|
||||
}
|
||||
|
||||
private def checkAnswer(expect: (Int, String, Int, Int)): Unit = {
|
||||
val readDf = spark.read.format("org.apache.hudi")
|
||||
.load(basePath + "/*")
|
||||
val row1 = readDf.select("id", "name", "value", "version").take(1)(0)
|
||||
assertEquals(Row(expect.productIterator.toSeq: _*), row1)
|
||||
}
|
||||
|
||||
def verifySchemaAndTypes(df: DataFrame): Unit = {
|
||||
assertEquals("amount,currency,tip_history,_hoodie_commit_seqno",
|
||||
df.select("fare.amount", "fare.currency", "tip_history", "_hoodie_commit_seqno")
|
||||
|
||||
Reference in New Issue
Block a user