[HUDI-684] Introduced abstraction for writing and reading different types of base file formats. (#1687)
Notable changes:
1. HoodieFileWriter and HoodieFileReader abstractions for writer/reader side of a base file format
2. HoodieDataBlock abstraction for creation specific data blocks for base file formats. (e.g. Parquet has HoodieAvroDataBlock)
3. All hardocded references to Parquet / Parquet based classes have been abstracted to call methods which accept a base file format
4. HiveSyncTool accepts the base file format as a CLI parameter
5. HoodieDeltaStreamer accepts the base file format as a CLI parameter
6. HoodieSparkSqlWriter accepts the base file format as a parameter
This commit is contained in:
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.hadoop;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
@@ -101,7 +102,8 @@ public class HoodieParquetInputFormat extends MapredParquetInputFormat implement
|
||||
setInputPaths(job, snapshotPaths.toArray(new Path[snapshotPaths.size()]));
|
||||
FileStatus[] fileStatuses = super.listStatus(job);
|
||||
Map<HoodieTableMetaClient, List<FileStatus>> groupedFileStatus =
|
||||
HoodieInputFormatUtils.groupFileStatusForSnapshotPaths(fileStatuses, tableMetaClientMap.values());
|
||||
HoodieInputFormatUtils.groupFileStatusForSnapshotPaths(fileStatuses,
|
||||
HoodieFileFormat.PARQUET.getFileExtension(), tableMetaClientMap.values());
|
||||
LOG.info("Found a total of " + groupedFileStatus.size() + " groups");
|
||||
for (Map.Entry<HoodieTableMetaClient, List<FileStatus>> entry : groupedFileStatus.entrySet()) {
|
||||
List<FileStatus> result = HoodieInputFormatUtils.filterFileStatusForSnapshotMode(job, entry.getKey(), entry.getValue());
|
||||
|
||||
@@ -24,7 +24,7 @@ import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.hadoop.HoodieParquetInputFormat;
|
||||
import org.apache.hudi.hadoop.realtime.HoodieCombineRealtimeRecordReader;
|
||||
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
|
||||
|
||||
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
@@ -952,7 +952,11 @@ public class HoodieCombineHiveInputFormat<K extends WritableComparable, V extend
|
||||
ValidationUtils.checkArgument(split instanceof HoodieCombineRealtimeFileSplit, "Only "
|
||||
+ HoodieCombineRealtimeFileSplit.class.getName() + " allowed, found " + split.getClass().getName());
|
||||
for (InputSplit inputSplit : ((HoodieCombineRealtimeFileSplit) split).getRealtimeFileSplits()) {
|
||||
recordReaders.add(new HoodieParquetRealtimeInputFormat().getRecordReader(inputSplit, job, reporter));
|
||||
if (split.getPaths().length == 0) {
|
||||
continue;
|
||||
}
|
||||
FileInputFormat inputFormat = HoodieInputFormatUtils.getInputFormat(split.getPath(0).toString(), true, job);
|
||||
recordReaders.add(inputFormat.getRecordReader(inputSplit, job, reporter));
|
||||
}
|
||||
return new HoodieCombineRealtimeRecordReader(job, split, recordReaders);
|
||||
}
|
||||
|
||||
@@ -32,8 +32,6 @@ import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.parquet.avro.AvroSchemaConverter;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
@@ -50,7 +48,6 @@ public abstract class AbstractRealtimeRecordReader {
|
||||
|
||||
protected final HoodieRealtimeFileSplit split;
|
||||
protected final JobConf jobConf;
|
||||
private final MessageType baseFileSchema;
|
||||
protected final boolean usesCustomPayload;
|
||||
// Schema handles
|
||||
private Schema readerSchema;
|
||||
@@ -66,7 +63,6 @@ public abstract class AbstractRealtimeRecordReader {
|
||||
try {
|
||||
this.usesCustomPayload = usesCustomPayload();
|
||||
LOG.info("usesCustomPayload ==> " + this.usesCustomPayload);
|
||||
baseFileSchema = HoodieRealtimeRecordReaderUtils.readSchema(jobConf, split.getPath());
|
||||
init();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not create HoodieRealtimeRecordReader on path " + this.split.getPath(), e);
|
||||
@@ -88,7 +84,7 @@ public abstract class AbstractRealtimeRecordReader {
|
||||
Schema schemaFromLogFile =
|
||||
LogReaderUtils.readLatestSchemaFromLogFiles(split.getBasePath(), split.getDeltaLogPaths(), jobConf);
|
||||
if (schemaFromLogFile == null) {
|
||||
writerSchema = new AvroSchemaConverter().convert(baseFileSchema);
|
||||
writerSchema = HoodieRealtimeRecordReaderUtils.readSchema(jobConf, split.getPath());
|
||||
LOG.debug("Writer Schema From Parquet => " + writerSchema.getFields());
|
||||
} else {
|
||||
writerSchema = schemaFromLogFile;
|
||||
|
||||
@@ -18,8 +18,10 @@
|
||||
|
||||
package org.apache.hudi.hadoop.utils;
|
||||
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
import org.apache.hudi.common.model.HoodiePartitionMetadata;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
|
||||
@@ -30,11 +32,15 @@ import org.apache.hudi.common.table.view.TableFileSystemView;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.StringUtils;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
import org.apache.hudi.hadoop.HoodieParquetInputFormat;
|
||||
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat;
|
||||
import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
|
||||
import org.apache.hadoop.mapred.FileInputFormat;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.log4j.LogManager;
|
||||
@@ -61,6 +67,54 @@ public class HoodieInputFormatUtils {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(HoodieInputFormatUtils.class);
|
||||
|
||||
public static FileInputFormat getInputFormat(HoodieFileFormat baseFileFormat, boolean realtime, Configuration conf) {
|
||||
switch (baseFileFormat) {
|
||||
case PARQUET:
|
||||
if (realtime) {
|
||||
HoodieParquetRealtimeInputFormat inputFormat = new HoodieParquetRealtimeInputFormat();
|
||||
inputFormat.setConf(conf);
|
||||
return inputFormat;
|
||||
} else {
|
||||
HoodieParquetInputFormat inputFormat = new HoodieParquetInputFormat();
|
||||
inputFormat.setConf(conf);
|
||||
return inputFormat;
|
||||
}
|
||||
default:
|
||||
throw new HoodieIOException("Hoodie InputFormat not implemented for base file format " + baseFileFormat);
|
||||
}
|
||||
}
|
||||
|
||||
public static String getInputFormatClassName(HoodieFileFormat baseFileFormat, boolean realtime, Configuration conf) {
|
||||
FileInputFormat inputFormat = getInputFormat(baseFileFormat, realtime, conf);
|
||||
return inputFormat.getClass().getName();
|
||||
}
|
||||
|
||||
public static String getOutputFormatClassName(HoodieFileFormat baseFileFormat) {
|
||||
switch (baseFileFormat) {
|
||||
case PARQUET:
|
||||
return MapredParquetOutputFormat.class.getName();
|
||||
default:
|
||||
throw new HoodieIOException("No OutputFormat for base file format " + baseFileFormat);
|
||||
}
|
||||
}
|
||||
|
||||
public static String getSerDeClassName(HoodieFileFormat baseFileFormat) {
|
||||
switch (baseFileFormat) {
|
||||
case PARQUET:
|
||||
return ParquetHiveSerDe.class.getName();
|
||||
default:
|
||||
throw new HoodieIOException("No SerDe for base file format " + baseFileFormat);
|
||||
}
|
||||
}
|
||||
|
||||
public static FileInputFormat getInputFormat(String path, boolean realtime, Configuration conf) {
|
||||
final String extension = FSUtils.getFileExtension(path.toString());
|
||||
if (extension.equals(HoodieFileFormat.PARQUET.getFileExtension())) {
|
||||
return getInputFormat(HoodieFileFormat.PARQUET, realtime, conf);
|
||||
}
|
||||
throw new HoodieIOException("Hoodie InputFormat not implemented for base file of type " + extension);
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter any specific instants that we do not want to process.
|
||||
* example timeline:
|
||||
@@ -255,19 +309,20 @@ public class HoodieInputFormatUtils {
|
||||
* Takes in a list of filesStatus and a list of table metadatas. Groups the files status list
|
||||
* based on given table metadata.
|
||||
* @param fileStatuses
|
||||
* @param fileExtension
|
||||
* @param metaClientList
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public static Map<HoodieTableMetaClient, List<FileStatus>> groupFileStatusForSnapshotPaths(
|
||||
FileStatus[] fileStatuses, Collection<HoodieTableMetaClient> metaClientList) {
|
||||
FileStatus[] fileStatuses, String fileExtension, Collection<HoodieTableMetaClient> metaClientList) {
|
||||
// This assumes the paths for different tables are grouped together
|
||||
Map<HoodieTableMetaClient, List<FileStatus>> grouped = new HashMap<>();
|
||||
HoodieTableMetaClient metadata = null;
|
||||
for (FileStatus status : fileStatuses) {
|
||||
Path inputPath = status.getPath();
|
||||
if (!inputPath.getName().endsWith(".parquet")) {
|
||||
//FIXME(vc): skip non parquet files for now. This wont be needed once log file name start
|
||||
if (!inputPath.getName().endsWith(fileExtension)) {
|
||||
//FIXME(vc): skip non data files for now. This wont be needed once log file name start
|
||||
// with "."
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -22,7 +22,8 @@ import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
import org.apache.hudi.io.storage.HoodieFileReader;
|
||||
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
|
||||
import org.apache.avro.LogicalTypes;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericArray;
|
||||
@@ -40,8 +41,6 @@ import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.parquet.hadoop.ParquetFileReader;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
@@ -57,14 +56,14 @@ import java.util.stream.Collectors;
|
||||
public class HoodieRealtimeRecordReaderUtils {
|
||||
|
||||
/**
|
||||
* Reads the schema from the parquet file. This is different from ParquetUtils as it uses the twitter parquet to
|
||||
* support hive 1.1.0
|
||||
* Reads the schema from the base file.
|
||||
*/
|
||||
public static MessageType readSchema(Configuration conf, Path parquetFilePath) {
|
||||
public static Schema readSchema(Configuration conf, Path filePath) {
|
||||
try {
|
||||
return ParquetFileReader.readFooter(conf, parquetFilePath).getFileMetaData().getSchema();
|
||||
HoodieFileReader storageReader = HoodieFileReaderFactory.getFileReader(conf, filePath);
|
||||
return storageReader.getSchema();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath, e);
|
||||
throw new HoodieIOException("Failed to read schema from " + filePath, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ package org.apache.hudi.hadoop;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
@@ -59,6 +60,8 @@ public class TestHoodieParquetInputFormat {
|
||||
|
||||
private HoodieParquetInputFormat inputFormat;
|
||||
private JobConf jobConf;
|
||||
private final HoodieFileFormat baseFileFormat = HoodieFileFormat.PARQUET;
|
||||
private final String baseFileExtension = baseFileFormat.getFileExtension();
|
||||
|
||||
public static void ensureFilesInCommit(String msg, FileStatus[] files, String commit, int expected) {
|
||||
int count = 0;
|
||||
@@ -145,7 +148,7 @@ public class TestHoodieParquetInputFormat {
|
||||
@Test
|
||||
public void testInputFormatLoad() throws IOException {
|
||||
// initial commit
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, 10, "100");
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, baseFileFormat, 10, "100");
|
||||
InputFormatTestUtil.commit(basePath, "100");
|
||||
|
||||
// Add the paths
|
||||
@@ -161,7 +164,7 @@ public class TestHoodieParquetInputFormat {
|
||||
@Test
|
||||
public void testInputFormatUpdates() throws IOException {
|
||||
// initial commit
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, 10, "100");
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, baseFileFormat, 10, "100");
|
||||
InputFormatTestUtil.commit(basePath, "100");
|
||||
|
||||
// Add the paths
|
||||
@@ -171,7 +174,7 @@ public class TestHoodieParquetInputFormat {
|
||||
assertEquals(10, files.length);
|
||||
|
||||
// update files
|
||||
InputFormatTestUtil.simulateUpdates(partitionDir, "100", 5, "200", true);
|
||||
InputFormatTestUtil.simulateUpdates(partitionDir, baseFileExtension, "100", 5, "200", true);
|
||||
// Before the commit
|
||||
files = inputFormat.listStatus(jobConf);
|
||||
assertEquals(10, files.length);
|
||||
@@ -188,7 +191,7 @@ public class TestHoodieParquetInputFormat {
|
||||
@Test
|
||||
public void testInputFormatWithCompaction() throws IOException {
|
||||
// initial commit
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, 10, "100");
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, baseFileFormat, 10, "100");
|
||||
InputFormatTestUtil.commit(basePath, "100");
|
||||
|
||||
// Add the paths
|
||||
@@ -204,7 +207,7 @@ public class TestHoodieParquetInputFormat {
|
||||
createCompactionFile(basePath, "125");
|
||||
|
||||
// add inserts after compaction timestamp
|
||||
InputFormatTestUtil.simulateInserts(partitionDir, "fileId2", 5, "200");
|
||||
InputFormatTestUtil.simulateInserts(partitionDir, baseFileExtension, "fileId2", 5, "200");
|
||||
InputFormatTestUtil.commit(basePath, "200");
|
||||
|
||||
// verify snapshot reads show all new inserts even though there is pending compaction
|
||||
@@ -221,7 +224,7 @@ public class TestHoodieParquetInputFormat {
|
||||
@Test
|
||||
public void testIncrementalSimple() throws IOException {
|
||||
// initial commit
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, 10, "100");
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, baseFileFormat, 10, "100");
|
||||
createCommitFile(basePath, "100", "2016/05/01");
|
||||
|
||||
// Add the paths
|
||||
@@ -266,25 +269,25 @@ public class TestHoodieParquetInputFormat {
|
||||
@Test
|
||||
public void testIncrementalWithMultipleCommits() throws IOException {
|
||||
// initial commit
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, 10, "100");
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, baseFileFormat, 10, "100");
|
||||
createCommitFile(basePath, "100", "2016/05/01");
|
||||
|
||||
// Add the paths
|
||||
FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
|
||||
// update files
|
||||
InputFormatTestUtil.simulateUpdates(partitionDir, "100", 5, "200", false);
|
||||
InputFormatTestUtil.simulateUpdates(partitionDir, baseFileExtension, "100", 5, "200", false);
|
||||
createCommitFile(basePath, "200", "2016/05/01");
|
||||
|
||||
InputFormatTestUtil.simulateUpdates(partitionDir, "100", 4, "300", false);
|
||||
InputFormatTestUtil.simulateUpdates(partitionDir, baseFileExtension, "100", 4, "300", false);
|
||||
createCommitFile(basePath, "300", "2016/05/01");
|
||||
|
||||
InputFormatTestUtil.simulateUpdates(partitionDir, "100", 3, "400", false);
|
||||
InputFormatTestUtil.simulateUpdates(partitionDir, baseFileExtension, "100", 3, "400", false);
|
||||
createCommitFile(basePath, "400", "2016/05/01");
|
||||
|
||||
InputFormatTestUtil.simulateUpdates(partitionDir, "100", 2, "500", false);
|
||||
InputFormatTestUtil.simulateUpdates(partitionDir, baseFileExtension, "100", 2, "500", false);
|
||||
createCommitFile(basePath, "500", "2016/05/01");
|
||||
|
||||
InputFormatTestUtil.simulateUpdates(partitionDir, "100", 1, "600", false);
|
||||
InputFormatTestUtil.simulateUpdates(partitionDir, baseFileExtension, "100", 1, "600", false);
|
||||
createCommitFile(basePath, "600", "2016/05/01");
|
||||
|
||||
InputFormatTestUtil.setupIncremental(jobConf, "100", 1);
|
||||
@@ -364,14 +367,14 @@ public class TestHoodieParquetInputFormat {
|
||||
@Test
|
||||
public void testIncrementalWithPendingCompaction() throws IOException {
|
||||
// initial commit
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, 10, "100");
|
||||
File partitionDir = InputFormatTestUtil.prepareTable(basePath, baseFileFormat, 10, "100");
|
||||
createCommitFile(basePath, "100", "2016/05/01");
|
||||
|
||||
// simulate compaction requested at 300
|
||||
File compactionFile = createCompactionFile(basePath, "300");
|
||||
|
||||
// write inserts into new bucket
|
||||
InputFormatTestUtil.simulateInserts(partitionDir, "fileId2", 10, "400");
|
||||
InputFormatTestUtil.simulateInserts(partitionDir, baseFileExtension, "fileId2", 10, "400");
|
||||
createCommitFile(basePath, "400", "2016/05/01");
|
||||
|
||||
// Add the paths
|
||||
|
||||
@@ -20,8 +20,10 @@ package org.apache.hudi.hadoop.testutils;
|
||||
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.table.log.HoodieLogFormat;
|
||||
import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
|
||||
import org.apache.hudi.common.table.log.block.HoodieCommandBlock;
|
||||
@@ -59,25 +61,29 @@ public class InputFormatTestUtil {
|
||||
|
||||
private static String TEST_WRITE_TOKEN = "1-0-1";
|
||||
|
||||
public static File prepareTable(java.nio.file.Path basePath, int numberOfFiles, String commitNumber)
|
||||
public static File prepareTable(java.nio.file.Path basePath, HoodieFileFormat baseFileFormat, int numberOfFiles,
|
||||
String commitNumber)
|
||||
throws IOException {
|
||||
HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString());
|
||||
HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), HoodieTableType.COPY_ON_WRITE,
|
||||
baseFileFormat);
|
||||
java.nio.file.Path partitionPath = basePath.resolve(Paths.get("2016", "05", "01"));
|
||||
Files.createDirectories(partitionPath);
|
||||
return simulateInserts(partitionPath.toFile(), "fileId1", numberOfFiles, commitNumber);
|
||||
return simulateInserts(partitionPath.toFile(), baseFileFormat.getFileExtension(), "fileId1", numberOfFiles,
|
||||
commitNumber);
|
||||
}
|
||||
|
||||
public static File simulateInserts(File partitionPath, String fileId, int numberOfFiles, String commitNumber)
|
||||
public static File simulateInserts(File partitionPath, String baseFileExtension, String fileId, int numberOfFiles,
|
||||
String commitNumber)
|
||||
throws IOException {
|
||||
for (int i = 0; i < numberOfFiles; i++) {
|
||||
Files.createFile(partitionPath.toPath()
|
||||
.resolve(FSUtils.makeDataFileName(commitNumber, TEST_WRITE_TOKEN, fileId + i)));
|
||||
.resolve(FSUtils.makeDataFileName(commitNumber, TEST_WRITE_TOKEN, fileId + i, baseFileExtension)));
|
||||
}
|
||||
return partitionPath;
|
||||
}
|
||||
|
||||
public static void simulateUpdates(File directory, final String originalCommit, int numberOfFilesUpdated,
|
||||
String newCommit, boolean randomize) throws IOException {
|
||||
public static void simulateUpdates(File directory, String baseFileExtension, final String originalCommit,
|
||||
int numberOfFilesUpdated, String newCommit, boolean randomize) throws IOException {
|
||||
List<File> dataFiles = Arrays.asList(Objects.requireNonNull(directory.listFiles((dir, name) -> {
|
||||
String commitTs = FSUtils.getCommitTime(name);
|
||||
return originalCommit.equals(commitTs);
|
||||
@@ -88,7 +94,8 @@ public class InputFormatTestUtil {
|
||||
List<File> toUpdateList = dataFiles.subList(0, Math.min(numberOfFilesUpdated, dataFiles.size()));
|
||||
for (File file : toUpdateList) {
|
||||
String fileId = FSUtils.getFileId(file.getName());
|
||||
Files.createFile(directory.toPath().resolve(FSUtils.makeDataFileName(newCommit, TEST_WRITE_TOKEN, fileId)));
|
||||
Files.createFile(directory.toPath().resolve(FSUtils.makeDataFileName(newCommit, TEST_WRITE_TOKEN, fileId,
|
||||
baseFileExtension)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user