1
0

[HUDI-3979] Optimize out mandatory columns when no merging is performed (#5430)

For MOR, when no merging is performed there is no point in reading either primary-key or pre-combine-key values (unless query is referencing these). Avoiding reading these allows to potentially save substantial resources wasted for reading it out.
This commit is contained in:
Alexey Kudinkin
2022-07-22 15:32:44 -07:00
committed by GitHub
parent 6b84384022
commit 39f2a06c85
8 changed files with 186 additions and 84 deletions

View File

@@ -51,6 +51,4 @@ class TestHoodieRelations {
requiredStructSchema.fields.toSeq
)
}
}

View File

@@ -54,7 +54,7 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> classOf[NonpartitionedKeyGenerator].getName
)
@Disabled("HUDI-3896")
@Disabled("Currently disabled b/c of the fallback to HadoopFsRelation")
@Test
def testBaseFileOnlyViewRelation(): Unit = {
val tablePath = s"$basePath/cow"

View File

@@ -24,6 +24,7 @@ import org.apache.hudi.config.HoodieWriteConfig
import org.apache.hudi.exception.HoodieDuplicateKeyException
import org.apache.hudi.keygen.ComplexKeyGenerator
import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.internal.SQLConf
import java.io.File