[HUDI-3239] Convert BaseHoodieTableFileIndex to Java (#4669)
Converting BaseHoodieTableFileIndex to Java, removing Scala as a dependency from "hudi-common"
This commit is contained in:
@@ -20,8 +20,7 @@ package org.apache.hudi.hadoop;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.HoodieTableFileIndexBase;
|
||||
import org.apache.hudi.FileStatusCacheTrait;
|
||||
import org.apache.hudi.BaseHoodieTableFileIndex;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieTableQueryType;
|
||||
@@ -29,15 +28,13 @@ import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import scala.Function0;
|
||||
import scala.collection.JavaConverters;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Implementation of {@link HoodieTableFileIndexBase} for Hive-based query engines
|
||||
* Implementation of {@link BaseHoodieTableFileIndex} for Hive-based query engines
|
||||
*/
|
||||
public class HiveHoodieTableFileIndex extends HoodieTableFileIndexBase {
|
||||
public class HiveHoodieTableFileIndex extends BaseHoodieTableFileIndex {
|
||||
|
||||
public static final Logger LOG = LoggerFactory.getLogger(HiveHoodieTableFileIndex.class);
|
||||
|
||||
@@ -53,16 +50,12 @@ public class HiveHoodieTableFileIndex extends HoodieTableFileIndexBase {
|
||||
metaClient,
|
||||
configProperties,
|
||||
queryType,
|
||||
JavaConverters.asScalaBufferConverter(queryPaths).asScala(),
|
||||
toScalaOption(specifiedQueryInstant),
|
||||
queryPaths,
|
||||
specifiedQueryInstant,
|
||||
shouldIncludePendingCommits,
|
||||
new NoopCache());
|
||||
}
|
||||
|
||||
private static scala.Option<String> toScalaOption(Option<String> opt) {
|
||||
return scala.Option.apply(opt.orElse(null));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object[] parsePartitionColumnValues(String[] partitionColumns, String partitionPath) {
|
||||
// NOTE: Parsing partition path into partition column values isn't required on Hive,
|
||||
@@ -71,20 +64,10 @@ public class HiveHoodieTableFileIndex extends HoodieTableFileIndexBase {
|
||||
return new Object[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void logInfo(Function0<String> lazyStr) {
|
||||
LOG.info(lazyStr.apply());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void logWarning(Function0<String> lazyStr) {
|
||||
LOG.info(lazyStr.apply());
|
||||
}
|
||||
|
||||
static class NoopCache implements FileStatusCacheTrait {
|
||||
static class NoopCache implements FileStatusCache {
|
||||
@Override
|
||||
public scala.Option<FileStatus[]> get(Path path) {
|
||||
return scala.Option.empty();
|
||||
public Option<FileStatus[]> get(Path path) {
|
||||
return Option.empty();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -46,14 +46,13 @@ import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
|
||||
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
|
||||
import scala.collection.JavaConverters;
|
||||
import scala.collection.Seq;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
@@ -79,24 +78,6 @@ public class HoodieCopyOnWriteTableInputFormat extends FileInputFormat<NullWrita
|
||||
|
||||
protected Configuration conf;
|
||||
|
||||
@Nonnull
|
||||
private static RealtimeFileStatus createRealtimeFileStatusUnchecked(HoodieBaseFile baseFile, Stream<HoodieLogFile> logFiles) {
|
||||
List<HoodieLogFile> sortedLogFiles = logFiles.sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
|
||||
FileStatus baseFileStatus = getFileStatusUnchecked(baseFile);
|
||||
try {
|
||||
RealtimeFileStatus rtFileStatus = new RealtimeFileStatus(baseFileStatus);
|
||||
rtFileStatus.setDeltaLogFiles(sortedLogFiles);
|
||||
rtFileStatus.setBaseFilePath(baseFile.getPath());
|
||||
if (baseFileStatus instanceof LocatedFileStatusWithBootstrapBaseFile || baseFileStatus instanceof FileStatusWithBootstrapBaseFile) {
|
||||
rtFileStatus.setBootStrapFileStatus(baseFileStatus);
|
||||
}
|
||||
|
||||
return rtFileStatus;
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(String.format("Failed to init %s", RealtimeFileStatus.class.getSimpleName()), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Configuration getConf() {
|
||||
return conf;
|
||||
@@ -265,25 +246,23 @@ public class HoodieCopyOnWriteTableInputFormat extends FileInputFormat<NullWrita
|
||||
engineContext,
|
||||
tableMetaClient,
|
||||
props,
|
||||
HoodieTableQueryType.QUERY_TYPE_SNAPSHOT,
|
||||
HoodieTableQueryType.SNAPSHOT,
|
||||
partitionPaths,
|
||||
queryCommitInstant,
|
||||
shouldIncludePendingCommits);
|
||||
|
||||
Map<String, Seq<FileSlice>> partitionedFileSlices =
|
||||
JavaConverters.mapAsJavaMapConverter(fileIndex.listFileSlices()).asJava();
|
||||
Map<String, List<FileSlice>> partitionedFileSlices = fileIndex.listFileSlices();
|
||||
|
||||
targetFiles.addAll(
|
||||
partitionedFileSlices.values()
|
||||
.stream()
|
||||
.flatMap(seq -> JavaConverters.seqAsJavaListConverter(seq).asJava().stream())
|
||||
.flatMap(Collection::stream)
|
||||
.map(fileSlice -> {
|
||||
Option<HoodieBaseFile> baseFileOpt = fileSlice.getBaseFile();
|
||||
Option<HoodieLogFile> latestLogFileOpt = fileSlice.getLatestLogFile();
|
||||
Stream<HoodieLogFile> logFiles = fileSlice.getLogFiles();
|
||||
|
||||
Option<HoodieInstant> latestCompletedInstantOpt =
|
||||
fromScala(fileIndex.latestCompletedInstant());
|
||||
Option<HoodieInstant> latestCompletedInstantOpt = fileIndex.getLatestCompletedInstant();
|
||||
|
||||
// Check if we're reading a MOR table
|
||||
if (includeLogFilesForSnapshotView()) {
|
||||
@@ -307,7 +286,7 @@ public class HoodieCopyOnWriteTableInputFormat extends FileInputFormat<NullWrita
|
||||
);
|
||||
}
|
||||
|
||||
// TODO cleanup
|
||||
// TODO(HUDI-3280) cleanup
|
||||
validate(targetFiles, listStatusForSnapshotModeLegacy(job, tableMetaClientMap, snapshotPaths));
|
||||
|
||||
return targetFiles;
|
||||
@@ -380,12 +359,4 @@ public class HoodieCopyOnWriteTableInputFormat extends FileInputFormat<NullWrita
|
||||
throw new HoodieIOException(String.format("Failed to init %s", RealtimeFileStatus.class.getSimpleName()), e);
|
||||
}
|
||||
}
|
||||
|
||||
private static Option<HoodieInstant> fromScala(scala.Option<HoodieInstant> opt) {
|
||||
if (opt.isDefined()) {
|
||||
return Option.of(opt.get());
|
||||
}
|
||||
|
||||
return Option.empty();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user