1
0

[HUDI-1425] Performance loss with the additional hoodieRecords.isEmpty() in HoodieSparkSqlWriter#write (#2296)

This commit is contained in:
pengzhiwei
2021-07-29 12:30:18 +08:00
committed by GitHub
parent efbbb67420
commit bbadac7de1
5 changed files with 35 additions and 8 deletions

View File

@@ -194,11 +194,6 @@ object HoodieSparkSqlWriter {
} else {
hoodieAllIncomingRecords
}
if (hoodieRecords.isEmpty()) {
log.info("new batch has no new records, skipping...")
(true, common.util.Option.empty())
}
client.startCommitWithTime(instantTime, commitActionType)
val writeResult = DataSourceUtils.doWriteOperation(client, hoodieRecords, instantTime, operation)
(writeResult, client)

View File

@@ -574,7 +574,7 @@ class TestCOWDataSource extends HoodieClientTestBase {
fail("should fail when invalid PartitionKeyType is provided!")
} catch {
case e: Exception =>
assertTrue(e.getMessage.contains("No enum constant org.apache.hudi.keygen.CustomAvroKeyGenerator.PartitionKeyType.DUMMY"))
assertTrue(e.getCause.getMessage.contains("No enum constant org.apache.hudi.keygen.CustomAvroKeyGenerator.PartitionKeyType.DUMMY"))
}
}
@@ -770,7 +770,6 @@ class TestCOWDataSource extends HoodieClientTestBase {
}
}
@Test def testSchemaNotEqualData(): Unit = {
val opts = commonOpts ++ Map("hoodie.avro.schema.validate" -> "true")
val schema1 = StructType(StructField("_row_key", StringType, true) :: StructField("name", StringType, true)::
@@ -785,11 +784,23 @@ class TestCOWDataSource extends HoodieClientTestBase {
.options(opts)
.mode(SaveMode.Append)
.save(basePath)
val recordsReadDF = spark.read.format("org.apache.hudi")
.load(basePath + "/*/*")
val resultSchema = new StructType(recordsReadDF.schema.filter(p=> !p.name.startsWith("_hoodie")).toArray)
assertEquals(resultSchema, schema1)
}
@ParameterizedTest
@ValueSource(booleans = Array(true, false))
def testWithEmptyInput(allowEmptyCommit: Boolean): Unit = {
val inputDF1 = spark.read.json(spark.sparkContext.parallelize(Seq.empty[String], 1))
inputDF1.write.format("org.apache.hudi")
.options(commonOpts)
.option(DataSourceWriteOptions.OPERATION_OPT_KEY.key(), DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
.option(HoodieWriteConfig.ALLOW_EMPTY_COMMIT.key(), allowEmptyCommit.toString)
.mode(SaveMode.Overwrite)
.save(basePath)
assertEquals(allowEmptyCommit, HoodieDataSourceHelpers.hasNewCommits(fs, basePath, "000"))
}
}