1
0

[HUDI-583] Code Cleanup, remove redundant code, and other changes (#1237)

This commit is contained in:
Suneel Marthi
2020-02-02 11:03:44 +01:00
committed by GitHub
parent f27c7a16c6
commit 5b7bb142dc
69 changed files with 447 additions and 582 deletions

View File

@@ -33,12 +33,12 @@ import org.apache.parquet.avro.AvroParquetWriter;
import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.UUID;
public class InputFormatTestUtil {
@@ -59,13 +59,10 @@ public class InputFormatTestUtil {
public static void simulateUpdates(File directory, final String originalCommit, int numberOfFilesUpdated,
String newCommit, boolean randomize) throws IOException {
List<File> dataFiles = Arrays.asList(directory.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
String commitTs = FSUtils.getCommitTime(name);
return originalCommit.equals(commitTs);
}
}));
List<File> dataFiles = Arrays.asList(Objects.requireNonNull(directory.listFiles((dir, name) -> {
String commitTs = FSUtils.getCommitTime(name);
return originalCommit.equals(commitTs);
})));
if (randomize) {
Collections.shuffle(dataFiles);
}
@@ -183,16 +180,10 @@ public class InputFormatTestUtil {
public static void simulateParquetUpdates(File directory, Schema schema, String originalCommit,
int totalNumberOfRecords, int numberOfRecordsToUpdate, String newCommit) throws IOException {
File fileToUpdate = directory.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith("parquet");
}
})[0];
File fileToUpdate = Objects.requireNonNull(directory.listFiles((dir, name) -> name.endsWith("parquet")))[0];
String fileId = FSUtils.getFileId(fileToUpdate.getName());
File dataFile = new File(directory, FSUtils.makeDataFileName(newCommit, TEST_WRITE_TOKEN, fileId));
AvroParquetWriter parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()), schema);
try {
try (AvroParquetWriter parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()), schema)) {
for (GenericRecord record : generateAvroRecords(schema, totalNumberOfRecords, originalCommit, fileId)) {
if (numberOfRecordsToUpdate > 0) {
// update this record
@@ -203,8 +194,6 @@ public class InputFormatTestUtil {
}
parquetWriter.write(record);
}
} finally {
parquetWriter.close();
}
}

View File

@@ -157,7 +157,7 @@ public class InputPathHandlerTest {
@Test
public void testInputPathHandler() throws IOException {
inputPathHandler = new InputPathHandler(dfs.getConf(), inputPaths.toArray(
new Path[inputPaths.size()]), incrementalTables);
new Path[0]), incrementalTables);
List<Path> actualPaths = inputPathHandler.getGroupedIncrementalPaths().values().stream()
.flatMap(List::stream).collect(Collectors.toList());
assertTrue(actualComparesToExpected(actualPaths, incrementalPaths));

View File

@@ -122,7 +122,7 @@ public class TestHoodieParquetInputFormat {
throws IOException {
List<HoodieWriteStat> writeStats = HoodieTestUtils.generateFakeHoodieWriteStat(1);
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
writeStats.stream().forEach(stat -> commitMetadata.addWriteStat(partitionPath, stat));
writeStats.forEach(stat -> commitMetadata.addWriteStat(partitionPath, stat));
File file = new File(basePath.getRoot().toString() + "/.hoodie/", commitNumber + ".commit");
file.createNewFile();
FileOutputStream fileOutputStream = new FileOutputStream(file);
@@ -221,8 +221,8 @@ public class TestHoodieParquetInputFormat {
String defaultmode = String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN, "db3.first_trips");
conf.set(defaultmode, HoodieHiveUtil.DEFAULT_SCAN_MODE);
List<String> actualincrTables = HoodieHiveUtil.getIncrementalTableNames(Job.getInstance(conf));
for (int i = 0; i < expectedincrTables.length; i++) {
assertTrue(actualincrTables.contains(expectedincrTables[i]));
for (String expectedincrTable : expectedincrTables) {
assertTrue(actualincrTables.contains(expectedincrTable));
}
}

View File

@@ -68,7 +68,7 @@ import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -167,9 +167,9 @@ public class TestHoodieRealtimeRecordReader {
private void setHiveColumnNameProps(List<Schema.Field> fields, JobConf jobConf, boolean isPartitioned) {
String names = fields.stream().map(Field::name).collect(Collectors.joining(","));
String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
String positions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, positions);
String hiveOrderedColumnNames = fields.stream().filter(field -> !field.name().equalsIgnoreCase(PARTITION_COLUMN))
.map(Field::name).collect(Collectors.joining(","));
@@ -286,7 +286,7 @@ public class TestHoodieRealtimeRecordReader {
String logFilePath = writer.getLogFile().getPath().toString();
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf),
basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
basePath.getRoot().getPath(), Collections.singletonList(logFilePath), newCommitTime);
// create a RecordReader to be used by HoodieRealtimeRecordReader
RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(
@@ -361,7 +361,7 @@ public class TestHoodieRealtimeRecordReader {
String logFilePath = writer.getLogFile().getPath().toString();
HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf),
basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
basePath.getRoot().getPath(), Collections.singletonList(logFilePath), newCommitTime);
// create a RecordReader to be used by HoodieRealtimeRecordReader
RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(