1
0

[HUDI-3239] Convert BaseHoodieTableFileIndex to Java (#4669)

Converting BaseHoodieTableFileIndex to Java, removing Scala as a dependency from "hudi-common"
This commit is contained in:
Alexey Kudinkin
2022-02-09 15:42:08 -08:00
committed by GitHub
parent 973087f385
commit 464027ec37
15 changed files with 443 additions and 554 deletions

View File

@@ -20,8 +20,7 @@ package org.apache.hudi.hadoop;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.HoodieTableFileIndexBase;
import org.apache.hudi.FileStatusCacheTrait;
import org.apache.hudi.BaseHoodieTableFileIndex;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieTableQueryType;
@@ -29,15 +28,13 @@ import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Function0;
import scala.collection.JavaConverters;
import java.util.List;
/**
* Implementation of {@link HoodieTableFileIndexBase} for Hive-based query engines
* Implementation of {@link BaseHoodieTableFileIndex} for Hive-based query engines
*/
public class HiveHoodieTableFileIndex extends HoodieTableFileIndexBase {
public class HiveHoodieTableFileIndex extends BaseHoodieTableFileIndex {
public static final Logger LOG = LoggerFactory.getLogger(HiveHoodieTableFileIndex.class);
@@ -53,16 +50,12 @@ public class HiveHoodieTableFileIndex extends HoodieTableFileIndexBase {
metaClient,
configProperties,
queryType,
JavaConverters.asScalaBufferConverter(queryPaths).asScala(),
toScalaOption(specifiedQueryInstant),
queryPaths,
specifiedQueryInstant,
shouldIncludePendingCommits,
new NoopCache());
}
private static scala.Option<String> toScalaOption(Option<String> opt) {
return scala.Option.apply(opt.orElse(null));
}
@Override
public Object[] parsePartitionColumnValues(String[] partitionColumns, String partitionPath) {
// NOTE: Parsing partition path into partition column values isn't required on Hive,
@@ -71,20 +64,10 @@ public class HiveHoodieTableFileIndex extends HoodieTableFileIndexBase {
return new Object[0];
}
@Override
public void logInfo(Function0<String> lazyStr) {
LOG.info(lazyStr.apply());
}
@Override
public void logWarning(Function0<String> lazyStr) {
LOG.info(lazyStr.apply());
}
static class NoopCache implements FileStatusCacheTrait {
static class NoopCache implements FileStatusCache {
@Override
public scala.Option<FileStatus[]> get(Path path) {
return scala.Option.empty();
public Option<FileStatus[]> get(Path path) {
return Option.empty();
}
@Override

View File

@@ -46,14 +46,13 @@ import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
import scala.collection.JavaConverters;
import scala.collection.Seq;
import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@@ -79,24 +78,6 @@ public class HoodieCopyOnWriteTableInputFormat extends FileInputFormat<NullWrita
protected Configuration conf;
@Nonnull
private static RealtimeFileStatus createRealtimeFileStatusUnchecked(HoodieBaseFile baseFile, Stream<HoodieLogFile> logFiles) {
List<HoodieLogFile> sortedLogFiles = logFiles.sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
FileStatus baseFileStatus = getFileStatusUnchecked(baseFile);
try {
RealtimeFileStatus rtFileStatus = new RealtimeFileStatus(baseFileStatus);
rtFileStatus.setDeltaLogFiles(sortedLogFiles);
rtFileStatus.setBaseFilePath(baseFile.getPath());
if (baseFileStatus instanceof LocatedFileStatusWithBootstrapBaseFile || baseFileStatus instanceof FileStatusWithBootstrapBaseFile) {
rtFileStatus.setBootStrapFileStatus(baseFileStatus);
}
return rtFileStatus;
} catch (IOException e) {
throw new HoodieIOException(String.format("Failed to init %s", RealtimeFileStatus.class.getSimpleName()), e);
}
}
@Override
public final Configuration getConf() {
return conf;
@@ -265,25 +246,23 @@ public class HoodieCopyOnWriteTableInputFormat extends FileInputFormat<NullWrita
engineContext,
tableMetaClient,
props,
HoodieTableQueryType.QUERY_TYPE_SNAPSHOT,
HoodieTableQueryType.SNAPSHOT,
partitionPaths,
queryCommitInstant,
shouldIncludePendingCommits);
Map<String, Seq<FileSlice>> partitionedFileSlices =
JavaConverters.mapAsJavaMapConverter(fileIndex.listFileSlices()).asJava();
Map<String, List<FileSlice>> partitionedFileSlices = fileIndex.listFileSlices();
targetFiles.addAll(
partitionedFileSlices.values()
.stream()
.flatMap(seq -> JavaConverters.seqAsJavaListConverter(seq).asJava().stream())
.flatMap(Collection::stream)
.map(fileSlice -> {
Option<HoodieBaseFile> baseFileOpt = fileSlice.getBaseFile();
Option<HoodieLogFile> latestLogFileOpt = fileSlice.getLatestLogFile();
Stream<HoodieLogFile> logFiles = fileSlice.getLogFiles();
Option<HoodieInstant> latestCompletedInstantOpt =
fromScala(fileIndex.latestCompletedInstant());
Option<HoodieInstant> latestCompletedInstantOpt = fileIndex.getLatestCompletedInstant();
// Check if we're reading a MOR table
if (includeLogFilesForSnapshotView()) {
@@ -307,7 +286,7 @@ public class HoodieCopyOnWriteTableInputFormat extends FileInputFormat<NullWrita
);
}
// TODO cleanup
// TODO(HUDI-3280) cleanup
validate(targetFiles, listStatusForSnapshotModeLegacy(job, tableMetaClientMap, snapshotPaths));
return targetFiles;
@@ -380,12 +359,4 @@ public class HoodieCopyOnWriteTableInputFormat extends FileInputFormat<NullWrita
throw new HoodieIOException(String.format("Failed to init %s", RealtimeFileStatus.class.getSimpleName()), e);
}
}
private static Option<HoodieInstant> fromScala(scala.Option<HoodieInstant> opt) {
if (opt.isDefined()) {
return Option.of(opt.get());
}
return Option.empty();
}
}