1
0

[HUDI-3191] Rebasing Hive's FileInputFormat onto AbstractHoodieTableFileIndex (#4531)

This commit is contained in:
Alexey Kudinkin
2022-01-18 14:54:51 -08:00
committed by GitHub
parent caeea946fb
commit 4bea758738
21 changed files with 623 additions and 78 deletions

View File

@@ -18,10 +18,13 @@
package org.apache.hudi.hadoop.testutils;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.log.HoodieLogFormat;
@@ -68,8 +71,10 @@ public class InputFormatTestUtil {
throws IOException {
HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
baseFileFormat);
java.nio.file.Path partitionPath = basePath.resolve(Paths.get("2016", "05", "01"));
Files.createDirectories(partitionPath);
setupPartition(basePath, partitionPath);
return simulateInserts(partitionPath.toFile(), baseFileFormat.getFileExtension(), "fileId1", numberOfFiles,
commitNumber);
}
@@ -79,8 +84,10 @@ public class InputFormatTestUtil {
throws IOException {
HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
baseFileFormat);
java.nio.file.Path partitionPath = basePath.resolve(Paths.get("2016", "05", finalLevelPartitionName));
Files.createDirectories(partitionPath);
setupPartition(basePath, partitionPath);
return simulateInserts(partitionPath.toFile(), baseFileFormat.getFileExtension(), "fileId1" + finalLevelPartitionName, numberOfFiles,
commitNumber);
}
@@ -175,8 +182,12 @@ public class InputFormatTestUtil {
public static File prepareParquetTable(java.nio.file.Path basePath, Schema schema, int numberOfFiles,
int numberOfRecords, String commitNumber, HoodieTableType tableType) throws IOException {
HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), tableType, HoodieFileFormat.PARQUET);
java.nio.file.Path partitionPath = basePath.resolve(Paths.get("2016", "05", "01"));
setupPartition(basePath, partitionPath);
createData(schema, partitionPath, numberOfFiles, numberOfRecords, commitNumber);
return partitionPath.toFile();
}
@@ -188,8 +199,12 @@ public class InputFormatTestUtil {
public static File prepareSimpleParquetTable(java.nio.file.Path basePath, Schema schema, int numberOfFiles,
int numberOfRecords, String commitNumber, HoodieTableType tableType) throws Exception {
HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), tableType, HoodieFileFormat.PARQUET);
java.nio.file.Path partitionPath = basePath.resolve(Paths.get("2016", "05", "01"));
setupPartition(basePath, partitionPath);
createSimpleData(schema, partitionPath, numberOfFiles, numberOfRecords, commitNumber);
return partitionPath.toFile();
}
@@ -211,7 +226,10 @@ public class InputFormatTestUtil {
HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString());
for (int i = 0; i < numberPartitions; i++) {
java.nio.file.Path partitionPath = basePath.resolve(Paths.get(2016 + i + "", "05", "01"));
setupPartition(basePath, partitionPath);
createData(schema, partitionPath, 1, numberOfRecordsPerPartition, commitNumber);
result.add(partitionPath.toFile());
}
return result;
@@ -400,10 +418,27 @@ public class InputFormatTestUtil {
jobConf.addResource(conf);
}
private static void setupPartition(java.nio.file.Path basePath, java.nio.file.Path partitionPath) throws IOException {
Files.createDirectories(partitionPath);
// Create partition metadata to properly setup table's partition
RawLocalFileSystem lfs = new RawLocalFileSystem();
lfs.setConf(HoodieTestUtils.getDefaultHadoopConf());
HoodiePartitionMetadata partitionMetadata =
new HoodiePartitionMetadata(
new LocalFileSystem(lfs),
"0",
new Path(basePath.toAbsolutePath().toString()),
new Path(partitionPath.toAbsolutePath().toString())
);
partitionMetadata.trySave((int) (Math.random() * 1000));
}
public static void setInputPath(JobConf jobConf, String inputPath) {
jobConf.set("mapreduce.input.fileinputformat.inputdir", inputPath);
jobConf.set("mapreduce.input.fileinputformat.inputdir", inputPath);
jobConf.set("map.input.dir", inputPath);
}
}