[HUDI-4223] Fix NullPointerException from getLogRecordScanner when reading metadata table (#5840)
When explicitly specifying the metadata table path for reading in spark, the "hoodie.metadata.enable" is overwritten to true for proper read behavior.
This commit is contained in:
@@ -25,7 +25,6 @@ import org.apache.hadoop.fs.Path
|
||||
import org.apache.hadoop.mapred.JobConf
|
||||
import org.apache.hudi.HoodieConversionUtils.{toJavaOption, toScalaOption}
|
||||
import org.apache.hudi.HoodieMergeOnReadRDD.{AvroDeserializerSupport, collectFieldOrdinals, getPartitionPath, projectAvro, projectAvroUnsafe, projectRowUnsafe, resolveAvroSchemaNullability}
|
||||
import org.apache.hudi.MergeOnReadSnapshotRelation.getFilePath
|
||||
import org.apache.hudi.common.config.HoodieMetadataConfig
|
||||
import org.apache.hudi.common.engine.HoodieLocalEngineContext
|
||||
import org.apache.hudi.common.fs.FSUtils
|
||||
@@ -37,9 +36,9 @@ import org.apache.hudi.config.HoodiePayloadConfig
|
||||
import org.apache.hudi.exception.HoodieException
|
||||
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig
|
||||
import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes
|
||||
import org.apache.hudi.internal.schema.InternalSchema
|
||||
import org.apache.hudi.metadata.HoodieTableMetadata.getDataTableBasePathFromMetadataTable
|
||||
import org.apache.hudi.metadata.{HoodieBackedTableMetadata, HoodieTableMetadata}
|
||||
import org.apache.hudi.internal.schema.InternalSchema
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.avro.HoodieAvroDeserializer
|
||||
import org.apache.spark.sql.catalyst.InternalRow
|
||||
@@ -324,7 +323,8 @@ private object HoodieMergeOnReadRDD {
|
||||
val fs = FSUtils.getFs(tablePath, hadoopConf)
|
||||
|
||||
if (HoodieTableMetadata.isMetadataTable(tablePath)) {
|
||||
val metadataConfig = tableState.metadataConfig
|
||||
val metadataConfig = HoodieMetadataConfig.newBuilder()
|
||||
.fromProperties(tableState.metadataConfig.getProps).enable(true).build()
|
||||
val dataTableBasePath = getDataTableBasePathFromMetadataTable(tablePath)
|
||||
val metadataTable = new HoodieBackedTableMetadata(
|
||||
new HoodieLocalEngineContext(hadoopConf), metadataConfig,
|
||||
|
||||
@@ -78,7 +78,7 @@ class TestMetadataTableWithSparkDataSource extends SparkClientFunctionalTestHarn
|
||||
.save(basePath)
|
||||
|
||||
// Files partition of MT
|
||||
val filesPartitionDF = spark.read.options(metadataOpts).format(hudi).load(s"$basePath/.hoodie/metadata/files")
|
||||
val filesPartitionDF = spark.read.format(hudi).load(s"$basePath/.hoodie/metadata/files")
|
||||
|
||||
// Smoke test
|
||||
filesPartitionDF.show()
|
||||
@@ -96,7 +96,7 @@ class TestMetadataTableWithSparkDataSource extends SparkClientFunctionalTestHarn
|
||||
assertEquals(expectedKeys, keys)
|
||||
|
||||
// Column Stats Index partition of MT
|
||||
val colStatsDF = spark.read.options(metadataOpts).format(hudi).load(s"$basePath/.hoodie/metadata/column_stats")
|
||||
val colStatsDF = spark.read.format(hudi).load(s"$basePath/.hoodie/metadata/column_stats")
|
||||
|
||||
// Smoke test
|
||||
colStatsDF.show()
|
||||
|
||||
Reference in New Issue
Block a user