[HUDI-583] Code Cleanup, remove redundant code, and other changes (#1237)
This commit is contained in:
@@ -33,12 +33,12 @@ import org.apache.parquet.avro.AvroParquetWriter;
|
||||
import org.junit.rules.TemporaryFolder;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FilenameFilter;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.UUID;
|
||||
|
||||
public class InputFormatTestUtil {
|
||||
@@ -59,13 +59,10 @@ public class InputFormatTestUtil {
|
||||
|
||||
public static void simulateUpdates(File directory, final String originalCommit, int numberOfFilesUpdated,
|
||||
String newCommit, boolean randomize) throws IOException {
|
||||
List<File> dataFiles = Arrays.asList(directory.listFiles(new FilenameFilter() {
|
||||
@Override
|
||||
public boolean accept(File dir, String name) {
|
||||
String commitTs = FSUtils.getCommitTime(name);
|
||||
return originalCommit.equals(commitTs);
|
||||
}
|
||||
}));
|
||||
List<File> dataFiles = Arrays.asList(Objects.requireNonNull(directory.listFiles((dir, name) -> {
|
||||
String commitTs = FSUtils.getCommitTime(name);
|
||||
return originalCommit.equals(commitTs);
|
||||
})));
|
||||
if (randomize) {
|
||||
Collections.shuffle(dataFiles);
|
||||
}
|
||||
@@ -183,16 +180,10 @@ public class InputFormatTestUtil {
|
||||
|
||||
public static void simulateParquetUpdates(File directory, Schema schema, String originalCommit,
|
||||
int totalNumberOfRecords, int numberOfRecordsToUpdate, String newCommit) throws IOException {
|
||||
File fileToUpdate = directory.listFiles(new FilenameFilter() {
|
||||
@Override
|
||||
public boolean accept(File dir, String name) {
|
||||
return name.endsWith("parquet");
|
||||
}
|
||||
})[0];
|
||||
File fileToUpdate = Objects.requireNonNull(directory.listFiles((dir, name) -> name.endsWith("parquet")))[0];
|
||||
String fileId = FSUtils.getFileId(fileToUpdate.getName());
|
||||
File dataFile = new File(directory, FSUtils.makeDataFileName(newCommit, TEST_WRITE_TOKEN, fileId));
|
||||
AvroParquetWriter parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()), schema);
|
||||
try {
|
||||
try (AvroParquetWriter parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()), schema)) {
|
||||
for (GenericRecord record : generateAvroRecords(schema, totalNumberOfRecords, originalCommit, fileId)) {
|
||||
if (numberOfRecordsToUpdate > 0) {
|
||||
// update this record
|
||||
@@ -203,8 +194,6 @@ public class InputFormatTestUtil {
|
||||
}
|
||||
parquetWriter.write(record);
|
||||
}
|
||||
} finally {
|
||||
parquetWriter.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -157,7 +157,7 @@ public class InputPathHandlerTest {
|
||||
@Test
|
||||
public void testInputPathHandler() throws IOException {
|
||||
inputPathHandler = new InputPathHandler(dfs.getConf(), inputPaths.toArray(
|
||||
new Path[inputPaths.size()]), incrementalTables);
|
||||
new Path[0]), incrementalTables);
|
||||
List<Path> actualPaths = inputPathHandler.getGroupedIncrementalPaths().values().stream()
|
||||
.flatMap(List::stream).collect(Collectors.toList());
|
||||
assertTrue(actualComparesToExpected(actualPaths, incrementalPaths));
|
||||
|
||||
@@ -122,7 +122,7 @@ public class TestHoodieParquetInputFormat {
|
||||
throws IOException {
|
||||
List<HoodieWriteStat> writeStats = HoodieTestUtils.generateFakeHoodieWriteStat(1);
|
||||
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
|
||||
writeStats.stream().forEach(stat -> commitMetadata.addWriteStat(partitionPath, stat));
|
||||
writeStats.forEach(stat -> commitMetadata.addWriteStat(partitionPath, stat));
|
||||
File file = new File(basePath.getRoot().toString() + "/.hoodie/", commitNumber + ".commit");
|
||||
file.createNewFile();
|
||||
FileOutputStream fileOutputStream = new FileOutputStream(file);
|
||||
@@ -221,8 +221,8 @@ public class TestHoodieParquetInputFormat {
|
||||
String defaultmode = String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
|
||||
conf.set(defaultmode, HoodieHiveUtil.DEFAULT_SCAN_MODE);
|
||||
List<String> actualincrTables = HoodieHiveUtil.getIncrementalTableNames(Job.getInstance(conf));
|
||||
for (int i = 0; i < expectedincrTables.length; i++) {
|
||||
assertTrue(actualincrTables.contains(expectedincrTables[i]));
|
||||
for (String expectedincrTable : expectedincrTables) {
|
||||
assertTrue(actualincrTables.contains(expectedincrTable));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ import org.junit.rules.TemporaryFolder;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -167,9 +167,9 @@ public class TestHoodieRealtimeRecordReader {
|
||||
|
||||
private void setHiveColumnNameProps(List<Schema.Field> fields, JobConf jobConf, boolean isPartitioned) {
|
||||
String names = fields.stream().map(Field::name).collect(Collectors.joining(","));
|
||||
String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
|
||||
String positions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
|
||||
jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
|
||||
jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
|
||||
jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, positions);
|
||||
|
||||
String hiveOrderedColumnNames = fields.stream().filter(field -> !field.name().equalsIgnoreCase(PARTITION_COLUMN))
|
||||
.map(Field::name).collect(Collectors.joining(","));
|
||||
@@ -286,7 +286,7 @@ public class TestHoodieRealtimeRecordReader {
|
||||
String logFilePath = writer.getLogFile().getPath().toString();
|
||||
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
||||
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf),
|
||||
basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
|
||||
basePath.getRoot().getPath(), Collections.singletonList(logFilePath), newCommitTime);
|
||||
|
||||
// create a RecordReader to be used by HoodieRealtimeRecordReader
|
||||
RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(
|
||||
@@ -361,7 +361,7 @@ public class TestHoodieRealtimeRecordReader {
|
||||
String logFilePath = writer.getLogFile().getPath().toString();
|
||||
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
|
||||
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf),
|
||||
basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
|
||||
basePath.getRoot().getPath(), Collections.singletonList(logFilePath), newCommitTime);
|
||||
|
||||
// create a RecordReader to be used by HoodieRealtimeRecordReader
|
||||
RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(
|
||||
|
||||
Reference in New Issue
Block a user