This reverts commit 1bb0532563.
This commit is contained in:
committed by
GitHub
parent
4b5512e685
commit
e3fc74668f
@@ -82,7 +82,7 @@ public abstract class AbstractRealtimeRecordReader {
|
||||
* job conf.
|
||||
*/
|
||||
private void init() throws IOException {
|
||||
Schema schemaFromLogFile = LogReaderUtils.readLatestSchemaFromLogFiles(split.getBasePath(), split.getDeltaLogFileStatus(), jobConf);
|
||||
Schema schemaFromLogFile = LogReaderUtils.readLatestSchemaFromLogFiles(split.getBasePath(), split.getDeltaLogPaths(), jobConf);
|
||||
if (schemaFromLogFile == null) {
|
||||
writerSchema = InputSplitUtils.getBaseFileSchema((FileSplit)split, jobConf);
|
||||
LOG.info("Writer Schema From Parquet => " + writerSchema.getFields());
|
||||
|
||||
@@ -20,14 +20,12 @@ package org.apache.hudi.hadoop.realtime;
|
||||
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.mapred.FileSplit;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Filesplit that wraps the base split and a list of log files to merge deltas from.
|
||||
@@ -35,7 +33,6 @@ import java.util.stream.Collectors;
|
||||
public class HoodieRealtimeFileSplit extends FileSplit implements RealtimeSplit {
|
||||
|
||||
private List<String> deltaLogPaths;
|
||||
private List<FileStatus> deltaLogFileStatus;
|
||||
|
||||
private String maxCommitTime;
|
||||
|
||||
@@ -47,12 +44,11 @@ public class HoodieRealtimeFileSplit extends FileSplit implements RealtimeSplit
|
||||
super();
|
||||
}
|
||||
|
||||
public HoodieRealtimeFileSplit(FileSplit baseSplit, String basePath, List<FileStatus> deltaLogFileStatus, String maxCommitTime,
|
||||
public HoodieRealtimeFileSplit(FileSplit baseSplit, String basePath, List<String> deltaLogPaths, String maxCommitTime,
|
||||
Option<HoodieVirtualKeyInfo> hoodieVirtualKeyInfo)
|
||||
throws IOException {
|
||||
super(baseSplit.getPath(), baseSplit.getStart(), baseSplit.getLength(), baseSplit.getLocations());
|
||||
this.deltaLogFileStatus = deltaLogFileStatus;
|
||||
this.deltaLogPaths = deltaLogFileStatus.stream().map(entry -> entry.getPath().toString()).collect(Collectors.toList());
|
||||
this.deltaLogPaths = deltaLogPaths;
|
||||
this.maxCommitTime = maxCommitTime;
|
||||
this.basePath = basePath;
|
||||
this.hoodieVirtualKeyInfo = hoodieVirtualKeyInfo;
|
||||
@@ -62,10 +58,6 @@ public class HoodieRealtimeFileSplit extends FileSplit implements RealtimeSplit
|
||||
return deltaLogPaths;
|
||||
}
|
||||
|
||||
public List<FileStatus> getDeltaLogFileStatus() {
|
||||
return deltaLogFileStatus;
|
||||
}
|
||||
|
||||
public String getMaxCommitTime() {
|
||||
return maxCommitTime;
|
||||
}
|
||||
|
||||
@@ -21,14 +21,12 @@ package org.apache.hudi.hadoop.realtime;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.hadoop.BootstrapBaseFileSplit;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.mapred.FileSplit;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Realtime File Split with external base file.
|
||||
@@ -36,7 +34,6 @@ import java.util.stream.Collectors;
|
||||
public class RealtimeBootstrapBaseFileSplit extends BootstrapBaseFileSplit implements RealtimeSplit {
|
||||
|
||||
private List<String> deltaLogPaths;
|
||||
private List<FileStatus> deltaLogFileStatus;
|
||||
|
||||
private String maxInstantTime;
|
||||
|
||||
@@ -46,12 +43,11 @@ public class RealtimeBootstrapBaseFileSplit extends BootstrapBaseFileSplit imple
|
||||
super();
|
||||
}
|
||||
|
||||
public RealtimeBootstrapBaseFileSplit(FileSplit baseSplit, String basePath, List<FileStatus> deltaLogFileStatus,
|
||||
public RealtimeBootstrapBaseFileSplit(FileSplit baseSplit, String basePath, List<String> deltaLogPaths,
|
||||
String maxInstantTime, FileSplit externalFileSplit) throws IOException {
|
||||
super(baseSplit, externalFileSplit);
|
||||
this.maxInstantTime = maxInstantTime;
|
||||
this.deltaLogFileStatus = deltaLogFileStatus;
|
||||
this.deltaLogPaths = deltaLogFileStatus.stream().map(entry -> entry.getPath().toString()).collect(Collectors.toList());
|
||||
this.deltaLogPaths = deltaLogPaths;
|
||||
this.basePath = basePath;
|
||||
}
|
||||
|
||||
@@ -72,11 +68,6 @@ public class RealtimeBootstrapBaseFileSplit extends BootstrapBaseFileSplit imple
|
||||
return deltaLogPaths;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FileStatus> getDeltaLogFileStatus() {
|
||||
return deltaLogFileStatus;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getMaxCommitTime() {
|
||||
return maxInstantTime;
|
||||
|
||||
@@ -21,7 +21,6 @@ package org.apache.hudi.hadoop.realtime;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.hadoop.InputSplitUtils;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.mapred.InputSplitWithLocationInfo;
|
||||
|
||||
@@ -42,8 +41,6 @@ public interface RealtimeSplit extends InputSplitWithLocationInfo {
|
||||
*/
|
||||
List<String> getDeltaLogPaths();
|
||||
|
||||
List<FileStatus> getDeltaLogFileStatus();
|
||||
|
||||
/**
|
||||
* Return Max Instant Time.
|
||||
* @return
|
||||
|
||||
@@ -43,7 +43,6 @@ import org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit;
|
||||
import org.apache.hudi.hadoop.realtime.RealtimeSplit;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
|
||||
import org.apache.hadoop.mapred.FileSplit;
|
||||
@@ -131,8 +130,8 @@ public class HoodieRealtimeInputFormatUtils extends HoodieInputFormatUtils {
|
||||
List<FileSplit> dataFileSplits = groupedInputSplits.get(fileSlice.getFileId());
|
||||
dataFileSplits.forEach(split -> {
|
||||
try {
|
||||
List<FileStatus> logFilePaths = fileSlice.getLogFiles().sorted(HoodieLogFile.getLogFileComparator())
|
||||
.map(logFile -> logFile.getFileStatus()).collect(Collectors.toList());
|
||||
List<String> logFilePaths = fileSlice.getLogFiles().sorted(HoodieLogFile.getLogFileComparator())
|
||||
.map(logFile -> logFile.getPath().toString()).collect(Collectors.toList());
|
||||
if (split instanceof BootstrapBaseFileSplit) {
|
||||
BootstrapBaseFileSplit eSplit = (BootstrapBaseFileSplit) split;
|
||||
String[] hosts = split.getLocationInfo() != null ? Arrays.stream(split.getLocationInfo())
|
||||
|
||||
@@ -20,7 +20,6 @@ package org.apache.hudi.hadoop.realtime;
|
||||
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapred.FileSplit;
|
||||
@@ -57,7 +56,6 @@ public class TestHoodieRealtimeFileSplit {
|
||||
|
||||
private HoodieRealtimeFileSplit split;
|
||||
private String basePath;
|
||||
private List<FileStatus> deltaLogFileStatus;
|
||||
private List<String> deltaLogPaths;
|
||||
private String fileSplitName;
|
||||
private FileSplit baseFileSplit;
|
||||
@@ -66,13 +64,12 @@ public class TestHoodieRealtimeFileSplit {
|
||||
@BeforeEach
|
||||
public void setUp(@TempDir java.nio.file.Path tempDir) throws Exception {
|
||||
basePath = tempDir.toAbsolutePath().toString();
|
||||
deltaLogFileStatus = Collections.singletonList(new FileStatus(0L, false, 0, 0L, 0, new Path(basePath + "/1.log")));
|
||||
deltaLogPaths = Collections.singletonList(basePath + "/1.log");
|
||||
fileSplitName = basePath + "/test.file";
|
||||
baseFileSplit = new FileSplit(new Path(fileSplitName), 0, 100, new String[] {});
|
||||
maxCommitTime = "10001";
|
||||
|
||||
split = new HoodieRealtimeFileSplit(baseFileSplit, basePath, deltaLogFileStatus, maxCommitTime, Option.empty());
|
||||
split = new HoodieRealtimeFileSplit(baseFileSplit, basePath, deltaLogPaths, maxCommitTime, Option.empty());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@@ -41,7 +41,6 @@ import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.Schema.Field;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
|
||||
@@ -188,7 +187,7 @@ public class TestHoodieRealtimeRecordReader {
|
||||
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
||||
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + baseInstant + ".parquet"), 0, 1, baseJobConf),
|
||||
basePath.toUri().toString(), fileSlice.getLogFiles().sorted(HoodieLogFile.getLogFileComparator())
|
||||
.map(h -> new FileStatus(0L, false, 0, 0L, 0, h.getPath())).collect(Collectors.toList()),
|
||||
.map(h -> h.getPath().toString()).collect(Collectors.toList()),
|
||||
instantTime, Option.empty());
|
||||
|
||||
// create a RecordReader to be used by HoodieRealtimeRecordReader
|
||||
@@ -257,10 +256,10 @@ public class TestHoodieRealtimeRecordReader {
|
||||
FileCreateUtils.createDeltaCommit(basePath.toString(), newCommitTime);
|
||||
|
||||
// create a split with baseFile (parquet file written earlier) and new log file(s)
|
||||
FileStatus logFileFileStatus = new FileStatus(0L, false, 0, 0L, 0, writer.getLogFile().getPath());
|
||||
String logFilePath = writer.getLogFile().getPath().toString();
|
||||
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
||||
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + instantTime + ".parquet"), 0, 1, baseJobConf),
|
||||
basePath.toUri().toString(), Collections.singletonList(logFileFileStatus), newCommitTime, Option.empty());
|
||||
basePath.toUri().toString(), Collections.singletonList(logFilePath), newCommitTime, Option.empty());
|
||||
|
||||
// create a RecordReader to be used by HoodieRealtimeRecordReader
|
||||
RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(
|
||||
@@ -337,10 +336,10 @@ public class TestHoodieRealtimeRecordReader {
|
||||
InputFormatTestUtil.deltaCommit(basePath, newCommitTime);
|
||||
|
||||
// create a split with baseFile (parquet file written earlier) and new log file(s)
|
||||
FileStatus logFileStatus = new FileStatus(0L, false, 0, 0L, 0, writer.getLogFile().getPath());
|
||||
String logFilePath = writer.getLogFile().getPath().toString();
|
||||
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
||||
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + instantTime + ".parquet"), 0, 1, baseJobConf),
|
||||
basePath.toUri().toString(), Collections.singletonList(logFileStatus), newCommitTime, Option.empty());
|
||||
basePath.toUri().toString(), Collections.singletonList(logFilePath), newCommitTime, Option.empty());
|
||||
|
||||
// create a RecordReader to be used by HoodieRealtimeRecordReader
|
||||
RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(
|
||||
@@ -450,7 +449,7 @@ public class TestHoodieRealtimeRecordReader {
|
||||
public void testSchemaEvolutionAndRollbackBlockInLastLogFile(ExternalSpillableMap.DiskMapType diskMapType,
|
||||
boolean isCompressionEnabled) throws Exception {
|
||||
// initial commit
|
||||
List<FileStatus> logFilePaths = new ArrayList<>();
|
||||
List<String> logFilePaths = new ArrayList<>();
|
||||
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
|
||||
HoodieTestUtils.init(hadoopConf, basePath.toString(), HoodieTableType.MERGE_ON_READ);
|
||||
String instantTime = "100";
|
||||
@@ -471,7 +470,7 @@ public class TestHoodieRealtimeRecordReader {
|
||||
InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid0", instantTime, newCommitTime,
|
||||
numberOfLogRecords, 0, 1);
|
||||
long size = writer.getCurrentSize();
|
||||
logFilePaths.add(new FileStatus(0L, false, 0, 0L, 0, writer.getLogFile().getPath()));
|
||||
logFilePaths.add(writer.getLogFile().getPath().toString());
|
||||
writer.close();
|
||||
assertTrue(size > 0, "block - size should be > 0");
|
||||
|
||||
@@ -479,7 +478,7 @@ public class TestHoodieRealtimeRecordReader {
|
||||
newCommitTime = "102";
|
||||
writer = InputFormatTestUtil.writeRollbackBlockToLogFile(partitionDir, fs, schema, "fileid0", instantTime,
|
||||
newCommitTime, "101", 1);
|
||||
logFilePaths.add(new FileStatus(0L, false, 0, 0L, 0, writer.getLogFile().getPath()));
|
||||
logFilePaths.add(writer.getLogFile().getPath().toString());
|
||||
writer.close();
|
||||
InputFormatTestUtil.deltaCommit(basePath, newCommitTime);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user