1
0

[HUDI-3280] Cleaning up Hive-related hierarchies after refactoring (#4743)

This commit is contained in:
Alexey Kudinkin
2022-02-16 15:36:37 -08:00
committed by GitHub
parent 3363c66468
commit aaddaf524a
27 changed files with 743 additions and 1013 deletions

View File

@@ -17,24 +17,17 @@
package org.apache.hudi
import org.apache.hudi.common.model.HoodieRecord
import org.apache.hudi.common.table.view.HoodieTableFileSystemView
import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
import org.apache.hudi.exception.HoodieException
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.listAffectedFilesForCommits
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getCommitMetadata
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getWritePartitionPaths
import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes
import org.apache.hadoop.fs.{GlobPattern, Path}
import org.apache.hadoop.mapred.JobConf
import org.apache.log4j.LogManager
import org.apache.hudi.common.model.HoodieRecord
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.common.table.view.HoodieTableFileSystemView
import org.apache.hudi.exception.HoodieException
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.{getCommitMetadata, getWritePartitionPaths, listAffectedFilesForCommits}
import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.datasources.PartitionedFile
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.sources._
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{Row, SQLContext}
@@ -167,7 +160,7 @@ class MergeOnReadIncrementalRelation(sqlContext: SQLContext,
def buildFileIndex(): List[HoodieMergeOnReadFileSplit] = {
val metadataList = commitsToReturn.map(instant => getCommitMetadata(instant, commitsTimelineToReturn))
val affectedFileStatus = listAffectedFilesForCommits(new Path(metaClient.getBasePath), metadataList)
val affectedFileStatus = listAffectedFilesForCommits(conf, new Path(metaClient.getBasePath), metadataList)
val fsView = new HoodieTableFileSystemView(metaClient, commitsTimelineToReturn, affectedFileStatus)
// Iterate partitions to create splits

View File

@@ -65,6 +65,7 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
queryPaths.asJava,
toJavaOption(specifiedQueryInstant),
false,
false,
SparkHoodieTableFileIndex.adapt(fileStatusCache)
)
with SparkAdapterSupport