1
0

[HUDI-1055] Remove hardcoded parquet in tests (#2740)

* Remove hardcoded parquet in tests
* Use DataFileUtils.getInstance
* Renaming DataFileUtils to BaseFileUtils

Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
TeRS-K
2021-05-11 13:01:45 -04:00
committed by GitHub
parent ac72470e10
commit be9db2c4f5
42 changed files with 359 additions and 218 deletions

View File

@@ -26,6 +26,7 @@ import org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter;
import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
import org.apache.hudi.common.model.BootstrapFileMapping;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
import org.apache.hudi.common.util.collection.Pair;
@@ -62,7 +63,7 @@ public class TestBootstrapIndex extends HoodieCommonTestHarness {
private static final String[] PARTITIONS = {"2020/03/18", "2020/03/19", "2020/03/20", "2020/03/21"};
private static final Set<String> PARTITION_SET = Arrays.stream(PARTITIONS).collect(Collectors.toSet());
private static final String BOOTSTRAP_BASE_PATH = "/tmp/source/parquet_tables/table1";
private static final String BOOTSTRAP_BASE_PATH = "/tmp/source/data_tables/table1";
@BeforeEach
public void init() throws IOException {
@@ -168,7 +169,7 @@ public class TestBootstrapIndex extends HoodieCommonTestHarness {
return Arrays.stream(partitions).map(partition -> {
return Pair.of(partition, IntStream.range(0, numEntriesPerPartition).mapToObj(idx -> {
String hudiFileId = UUID.randomUUID().toString();
String sourceFileName = idx + ".parquet";
String sourceFileName = idx + HoodieTableConfig.DEFAULT_BASE_FILE_FORMAT.getFileExtension();
HoodieFileStatus sourceFileStatus = HoodieFileStatus.newBuilder()
.setPath(HoodiePath.newBuilder().setUri(sourceBasePath + "/" + partition + "/" + sourceFileName).build())
.setLength(256 * 1024 * 1024L)

View File

@@ -21,6 +21,7 @@ package org.apache.hudi.common.fs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
import org.apache.hudi.common.testutils.HoodieTestUtils;
@@ -56,6 +57,7 @@ public class TestFSUtils extends HoodieCommonTestHarness {
private final long minCleanToKeep = 10;
private static String TEST_WRITE_TOKEN = "1-0-1";
private static final String BASE_FILE_EXTENSION = HoodieTableConfig.DEFAULT_BASE_FILE_FORMAT.getFileExtension();
@Rule
public final EnvironmentVariables environmentVariables = new EnvironmentVariables();
@@ -69,14 +71,14 @@ public class TestFSUtils extends HoodieCommonTestHarness {
public void testMakeDataFileName() {
String instantTime = COMMIT_FORMATTER.format(new Date());
String fileName = UUID.randomUUID().toString();
assertEquals(FSUtils.makeDataFileName(instantTime, TEST_WRITE_TOKEN, fileName), fileName + "_" + TEST_WRITE_TOKEN + "_" + instantTime + ".parquet");
assertEquals(FSUtils.makeDataFileName(instantTime, TEST_WRITE_TOKEN, fileName), fileName + "_" + TEST_WRITE_TOKEN + "_" + instantTime + BASE_FILE_EXTENSION);
}
@Test
public void testMaskFileName() {
String instantTime = COMMIT_FORMATTER.format(new Date());
int taskPartitionId = 2;
assertEquals(FSUtils.maskWithoutFileId(instantTime, taskPartitionId), "*_" + taskPartitionId + "_" + instantTime + ".parquet");
assertEquals(FSUtils.maskWithoutFileId(instantTime, taskPartitionId), "*_" + taskPartitionId + "_" + instantTime + BASE_FILE_EXTENSION);
}
@Test
@@ -100,9 +102,12 @@ public class TestFSUtils extends HoodieCommonTestHarness {
});
// Files inside partitions and marker directories
List<String> files = Arrays.asList("2016/04/15/1_1-0-1_20190528120000.parquet",
"2016/05/16/2_1-0-1_20190528120000.parquet", ".hoodie/.temp/2/2016/05/16/2_1-0-1_20190528120000.parquet",
".hoodie/.temp/2/2016/04/15/1_1-0-1_20190528120000.parquet");
List<String> files = Stream.of("2016/04/15/1_1-0-1_20190528120000",
"2016/05/16/2_1-0-1_20190528120000",
".hoodie/.temp/2/2016/05/16/2_1-0-1_20190528120000",
".hoodie/.temp/2/2016/04/15/1_1-0-1_20190528120000")
.map(fileName -> fileName + BASE_FILE_EXTENSION)
.collect(Collectors.toList());
files.forEach(f -> {
try {

View File

@@ -18,6 +18,7 @@
package org.apache.hudi.common.model;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
@@ -33,7 +34,7 @@ public class TestHoodieDeltaWriteStat {
@Test
public void testBaseFileAndLogFiles() {
HoodieDeltaWriteStat writeStat = new HoodieDeltaWriteStat();
String baseFile = "file1.parquet";
String baseFile = "file1" + HoodieTableConfig.DEFAULT_BASE_FILE_FORMAT.getFileExtension();
String logFile1 = ".log1.log";
String logFile2 = ".log2.log";

View File

@@ -284,7 +284,7 @@ public class TestTimelineUtils extends HoodieCommonTestHarness {
HoodieWriteStat stat = new HoodieWriteStat();
stat.setFileId(i + "");
stat.setPartitionPath(Paths.get(basePath, partition).toString());
stat.setPath(commitTs + "." + i + ".parquet");
stat.setPath(commitTs + "." + i + metaClient.getTableConfig().getBaseFileFormat().getFileExtension());
commit.addWriteStat(partition, stat);
}
for (Map.Entry<String, String> extraEntries : extraMetadata.entrySet()) {
@@ -303,7 +303,7 @@ public class TestTimelineUtils extends HoodieCommonTestHarness {
HoodieWriteStat stat = new HoodieWriteStat();
stat.setFileId(i + "");
stat.setPartitionPath(Paths.get(basePath, newFilePartition).toString());
stat.setPath(commitTs + "." + i + ".parquet");
stat.setPath(commitTs + "." + i + metaClient.getTableConfig().getBaseFileFormat().getFileExtension());
commit.addWriteStat(newFilePartition, stat);
}
Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();

View File

@@ -306,7 +306,7 @@ public class TestHoodieTableFileSystemView extends HoodieCommonTestHarness {
String partitionPath = "2016/05/01";
new File(basePath + "/" + partitionPath).mkdirs();
String fileId = UUID.randomUUID().toString();
String srcName = "part_0000.parquet";
String srcName = "part_0000" + metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
HoodieFileStatus srcFileStatus = HoodieFileStatus.newBuilder()
.setPath(HoodiePath.newBuilder().setUri(BOOTSTRAP_SOURCE_PATH + partitionPath + "/" + srcName).build())
.setLength(256 * 1024 * 1024L)

View File

@@ -22,6 +22,7 @@ import org.apache.hudi.common.model.CompactionOperation;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieFileGroup;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.testutils.MockHoodieTimeline;
@@ -66,7 +67,8 @@ public class TestPriorityBasedFileSystemView {
public void setUp() {
fsView = new PriorityBasedFileSystemView(primary, secondary);
testBaseFileStream = Stream.of(new HoodieBaseFile("test"));
testFileSliceStream = Stream.of(new FileSlice("2020-01-01", "20:20", "file0001.parquet"));
testFileSliceStream = Stream.of(new FileSlice("2020-01-01", "20:20",
"file0001" + HoodieTableConfig.DEFAULT_BASE_FILE_FORMAT.getFileExtension()));
}
private void resetMocks() {

View File

@@ -29,6 +29,7 @@ import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.IOType;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
@@ -54,9 +55,10 @@ import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.serial
public class FileCreateUtils {
private static final String WRITE_TOKEN = "1-0-1";
private static final String BASE_FILE_EXTENSION = HoodieTableConfig.DEFAULT_BASE_FILE_FORMAT.getFileExtension();
public static String baseFileName(String instantTime, String fileId) {
return baseFileName(instantTime, fileId, HoodieFileFormat.PARQUET.getFileExtension());
return baseFileName(instantTime, fileId, BASE_FILE_EXTENSION);
}
public static String baseFileName(String instantTime, String fileId, String fileExtension) {
@@ -72,7 +74,7 @@ public class FileCreateUtils {
}
public static String markerFileName(String instantTime, String fileId, IOType ioType) {
return markerFileName(instantTime, fileId, ioType, HoodieFileFormat.PARQUET.getFileExtension());
return markerFileName(instantTime, fileId, ioType, BASE_FILE_EXTENSION);
}
public static String markerFileName(String instantTime, String fileId, IOType ioType, String fileExtension) {

View File

@@ -29,6 +29,7 @@ import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.IOType;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -437,7 +438,7 @@ public class HoodieTestTable {
}
public FileStatus[] listAllBaseFiles() throws IOException {
return listAllBaseFiles(HoodieFileFormat.PARQUET.getFileExtension());
return listAllBaseFiles(HoodieTableConfig.DEFAULT_BASE_FILE_FORMAT.getFileExtension());
}
public FileStatus[] listAllBaseFiles(String fileExtension) throws IOException {

View File

@@ -97,6 +97,8 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
@Test
public void testBuildFromFileSlice() {
String extension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
// Empty File-Slice with no data and log files
FileSlice emptyFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "empty1");
HoodieCompactionOperation op =
@@ -106,7 +108,7 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
// File Slice with data-file but no log files
FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noLog1");
noLogFileSlice.setBaseFile(new DummyHoodieBaseFile("/tmp/noLog_1_000.parquet"));
noLogFileSlice.setBaseFile(new DummyHoodieBaseFile("/tmp/noLog_1_000" + extension));
op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], noLogFileSlice, Option.of(metricsCaptureFn));
testFileSliceCompactionOpEquality(noLogFileSlice, op, DEFAULT_PARTITION_PATHS[0],
LATEST_COMPACTION_METADATA_VERSION);
@@ -122,7 +124,7 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
// File Slice with data-file and log files present
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
fileSlice.setBaseFile(new DummyHoodieBaseFile("/tmp/noLog_1_000.parquet"));
fileSlice.setBaseFile(new DummyHoodieBaseFile("/tmp/noLog_1_000" + extension));
fileSlice.addLogFile(
new HoodieLogFile(new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN))));
fileSlice.addLogFile(
@@ -135,16 +137,18 @@ public class TestCompactionUtils extends HoodieCommonTestHarness {
* Generate input for compaction plan tests.
*/
private Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> buildCompactionPlan() {
String extension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
Path fullPartitionPath = new Path(new Path(metaClient.getBasePath()), DEFAULT_PARTITION_PATHS[0]);
FileSlice emptyFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "empty1");
FileSlice fileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
fileSlice.setBaseFile(new DummyHoodieBaseFile(fullPartitionPath.toString() + "/data1_1_000.parquet"));
fileSlice.setBaseFile(new DummyHoodieBaseFile(fullPartitionPath.toString() + "/data1_1_000" + extension));
fileSlice.addLogFile(new HoodieLogFile(
new Path(fullPartitionPath, new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN)))));
fileSlice.addLogFile(new HoodieLogFile(
new Path(fullPartitionPath, new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 2, TEST_WRITE_TOKEN)))));
FileSlice noLogFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noLog1");
noLogFileSlice.setBaseFile(new DummyHoodieBaseFile(fullPartitionPath.toString() + "/noLog_1_000.parquet"));
noLogFileSlice.setBaseFile(new DummyHoodieBaseFile(fullPartitionPath.toString() + "/noLog_1_000" + extension));
FileSlice noDataFileSlice = new FileSlice(DEFAULT_PARTITION_PATHS[0], "000", "noData1");
noDataFileSlice.addLogFile(new HoodieLogFile(
new Path(fullPartitionPath, new Path(FSUtils.makeLogFileName("noData1", ".log", "000", 1, TEST_WRITE_TOKEN)))));

View File

@@ -58,6 +58,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
*/
public class TestParquetUtils extends HoodieCommonTestHarness {
private ParquetUtils parquetUtils = new ParquetUtils();
public static List<Arguments> bloomFilterTypeCodes() {
return Arrays.asList(
Arguments.of(BloomFilterTypeCode.SIMPLE.name()),
@@ -83,13 +85,13 @@ public class TestParquetUtils extends HoodieCommonTestHarness {
// Read and verify
List<String> rowKeysInFile = new ArrayList<>(
ParquetUtils.readRowKeysFromParquet(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath)));
parquetUtils.readRowKeys(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath)));
Collections.sort(rowKeysInFile);
Collections.sort(rowKeys);
assertEquals(rowKeys, rowKeysInFile, "Did not read back the expected list of keys");
BloomFilter filterInFile =
ParquetUtils.readBloomFilterFromParquetMetadata(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath));
parquetUtils.readBloomFilterFromMetadata(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath));
for (String rowKey : rowKeys) {
assertTrue(filterInFile.mightContain(rowKey), "key should be found in bloom filter");
}
@@ -113,7 +115,7 @@ public class TestParquetUtils extends HoodieCommonTestHarness {
// Read and verify
Set<String> filtered =
ParquetUtils.filterParquetRowKeys(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath), filter);
parquetUtils.filterRowKeys(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath), filter);
assertEquals(filter.size(), filtered.size(), "Filtered count does not match");
@@ -140,7 +142,7 @@ public class TestParquetUtils extends HoodieCommonTestHarness {
// Read and verify
List<HoodieKey> fetchedRows =
ParquetUtils.fetchRecordKeyPartitionPathFromParquet(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath));
parquetUtils.fetchRecordKeyPartitionPath(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath));
assertEquals(rowKeys.size(), fetchedRows.size(), "Total count does not match");
for (HoodieKey entry : fetchedRows) {
@@ -157,7 +159,7 @@ public class TestParquetUtils extends HoodieCommonTestHarness {
}
writeParquetFile(BloomFilterTypeCode.SIMPLE.name(), filePath, rowKeys);
assertEquals(123, ParquetUtils.getRowCount(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath)));
assertEquals(123, parquetUtils.getRowCount(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath)));
}
private void writeParquetFile(String typeCode, String filePath, List<String> rowKeys) throws Exception {

View File

@@ -21,6 +21,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
@@ -36,6 +37,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
public final class TestTablePathUtils {
private static final String BASE_FILE_EXTENSION = HoodieTableConfig.DEFAULT_BASE_FILE_FORMAT.getFileExtension();
@TempDir
static File tempDir;
@@ -73,9 +75,9 @@ public final class TestTablePathUtils {
partitionMetadata2.trySave(2);
// Create files
URI filePathURI1 = Paths.get(partitionPathURI1.getPath(), "data1.parquet").toUri();
URI filePathURI1 = Paths.get(partitionPathURI1.getPath(), "data1" + BASE_FILE_EXTENSION).toUri();
filePath1 = new Path(filePathURI1);
URI filePathURI2 = Paths.get(partitionPathURI2.getPath(), "data2.parquet").toUri();
URI filePathURI2 = Paths.get(partitionPathURI2.getPath(), "data2" + BASE_FILE_EXTENSION).toUri();
filePath2 = new Path(filePathURI2);
assertTrue(new File(filePathURI1).createNewFile());