1
0

[HUDI-3191] Removing duplicating file-listing process w/in Hive's MOR FileInputFormats (#4556)

This commit is contained in:
Alexey Kudinkin
2022-02-03 14:01:41 -08:00
committed by GitHub
parent 5927bdd1c0
commit 69dfcda116
22 changed files with 493 additions and 197 deletions

View File

@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi;
public class TypeUtils {
/**
* This utility abstracts unsafe type-casting in a way that allows to
* <ul>
* <li>Search for such type-casts more easily (just searching for usages of this method)</li>
* <li>Avoid type-cast warnings from the compiler</li>
* </ul>
*/
@SuppressWarnings("unchecked")
public static <T> T unsafeCast(Object o) {
return (T) o;
}
}

View File

@@ -24,11 +24,13 @@ import org.apache.hudi.common.fs.FSUtils
import org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ
import org.apache.hudi.common.model.{FileSlice, HoodieTableQueryType}
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.common.table.timeline.HoodieInstant
import org.apache.hudi.common.table.view.{FileSystemViewStorageConfig, HoodieTableFileSystemView}
import scala.collection.JavaConverters._
import scala.collection.JavaConversions._
import scala.collection.mutable
import scala.language.implicitConversions
/**
* Common (engine-agnostic) File Index implementation enabling individual query engines to
@@ -87,6 +89,12 @@ abstract class HoodieTableFileIndexBase(engineContext: HoodieEngineContext,
refresh0()
/**
* Returns latest completed instant as seen by this instance of the file-index
*/
def latestCompletedInstant(): Option[HoodieInstant] =
getActiveTimeline.filterCompletedInstants().lastInstant()
/**
* Fetch list of latest base files and log files per partition.
*
@@ -171,11 +179,17 @@ abstract class HoodieTableFileIndexBase(engineContext: HoodieEngineContext,
}
private def getActiveTimeline = {
val timeline = metaClient.getActiveTimeline.getCommitsTimeline
// NOTE: We have to use commits and compactions timeline, to make sure that we're properly
// handling the following case: when records are inserted into the new log-file w/in the file-group
// that is under the pending compaction process, new log-file will bear the compaction's instant (on the
// timeline) in its name, as opposed to the base-file's commit instant. To make sure we're not filtering
// such log-file we have to _always_ include pending compaction instants into consideration
// TODO(HUDI-3302) re-evaluate whether we should not filter any commits in here
val timeline = metaClient.getCommitsAndCompactionTimeline
if (shouldIncludePendingCommits) {
timeline
} else {
timeline.filterCompletedInstants()
timeline.filterCompletedAndCompactionInstants()
}
}
@@ -291,6 +305,16 @@ abstract class HoodieTableFileIndexBase(engineContext: HoodieEngineContext,
}
}
}
/**
* Converts Hudi's internal representation of the {@code Option} into Scala's default one
*/
implicit def asScalaOption[T](opt: org.apache.hudi.common.util.Option[T]): Option[T] =
if (opt.isPresent) {
Some(opt.get)
} else {
None
}
}
trait FileStatusCacheTrait {