1
0

Import from Hoodie private repo: Part 1

This commit is contained in:
Prasanna Rajaperumal
2016-12-16 14:03:59 -08:00
commit 0512da094b
56 changed files with 8868 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common;
import org.junit.Test;
import java.io.*;
public class
TestBloomFilter {
@Test
public void testAddKey() {
BloomFilter filter = new BloomFilter(100, 0.0000001);
filter.add("key1");
assert (filter.mightContain("key1"));
}
@Test
public void testSerialize() throws IOException, ClassNotFoundException {
BloomFilter filter = new BloomFilter(1000, 0.0000001);
filter.add("key1");
filter.add("key2");
String filterStr = filter.serializeToString();
// Rebuild
BloomFilter newFilter = new BloomFilter(filterStr);
assert (newFilter.mightContain("key1"));
assert (newFilter.mightContain("key2"));
}
}

View File

@@ -0,0 +1,96 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.uber.hoodie.common.util.FSUtils;
import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Properties;
import java.util.UUID;
public class HoodieTestUtils {
public static final String RAW_TRIPS_TEST_NAME = "raw_trips";
public static final void initializeHoodieDirectory(String basePath) throws IOException {
new File(basePath + "/" + HoodieTableMetadata.METAFOLDER_NAME).mkdirs();
Properties properties = new Properties();
properties.setProperty(HoodieTableMetadata.HOODIE_TABLE_NAME_PROP_NAME, RAW_TRIPS_TEST_NAME);
properties.setProperty(HoodieTableMetadata.HOODIE_TABLE_TYPE_PROP_NAME, HoodieTableMetadata.DEFAULT_TABLE_TYPE.name());
FileWriter fileWriter = new FileWriter(new File(basePath + "/.hoodie/hoodie.properties"));
try {
properties.store(fileWriter, "");
} finally {
fileWriter.close();
}
}
public static final String initializeTempHoodieBasePath() throws IOException {
// Create a temp folder as the base path
TemporaryFolder folder = new TemporaryFolder();
folder.create();
String basePath = folder.getRoot().getAbsolutePath();
HoodieTestUtils.initializeHoodieDirectory(basePath);
return basePath;
}
public static final String getNewCommitTime() {
return new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
}
public static final void createCommitFiles(String basePath, String... commitTimes) throws IOException {
for (String commitTime: commitTimes) {
new File(basePath + "/" + HoodieTableMetadata.METAFOLDER_NAME+ "/" + FSUtils.makeCommitFileName(commitTime)).createNewFile();
}
}
public static final void createInflightCommitFiles(String basePath, String... commitTimes) throws IOException {
for (String commitTime: commitTimes) {
new File(basePath + "/" + HoodieTableMetadata.METAFOLDER_NAME+ "/" + FSUtils.makeInflightCommitFileName(commitTime)).createNewFile();
}
}
public static final String createNewDataFile(String basePath, String partitionPath, String commitTime) throws IOException {
String fileID = UUID.randomUUID().toString();
return createDataFile(basePath, partitionPath, commitTime, fileID);
}
public static final String createDataFile(String basePath, String partitionPath, String commitTime, String fileID) throws IOException {
String folderPath = basePath + "/" + partitionPath + "/";
new File(folderPath).mkdirs();
new File(folderPath + FSUtils.makeDataFileName(commitTime, 1, fileID)).createNewFile();
return fileID;
}
public static final boolean doesDataFileExist(String basePath, String partitionPath, String commitTime, String fileID) throws IOException {
return new File(basePath + "/" + partitionPath + "/" + FSUtils.makeDataFileName(commitTime, 1, fileID)).exists();
}
public static final boolean doesCommitExist(String basePath, String commitTime) {
return new File(basePath + "/" + HoodieTableMetadata.METAFOLDER_NAME+ "/" + commitTime + HoodieTableMetadata.COMMIT_FILE_SUFFIX).exists();
}
public static final boolean doesInflightExist(String basePath, String commitTime) {
return new File(basePath + "/" + HoodieTableMetadata.METAFOLDER_NAME+ "/" + commitTime + HoodieTableMetadata.INFLIGHT_FILE_SUFFIX).exists();
}
}

View File

@@ -0,0 +1,48 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import org.junit.Test;
import java.util.Arrays;
import static org.junit.Assert.*;
/**
*
*/
public class TestHoodieCommits {
@Test
public void testHoodieCommits() throws Exception {
HoodieCommits commits = new HoodieCommits(Arrays.asList("001", "005", "004", "002"));
assertFalse(commits.contains("003"));
assertTrue(commits.contains("002"));
assertEquals(Arrays.asList("004", "005"), commits.findCommitsAfter("003", 2));
assertEquals(Arrays.asList("001", "002", "004"), commits.findCommitsInRange("000", "004"));
assertEquals(commits.lastCommit(), commits.lastCommit(0));
assertEquals("001", commits.lastCommit(3));
assertEquals(null, commits.lastCommit(4));
assertEquals(commits.max("001", "000"), "001");
assertFalse(HoodieCommits.isCommit1After("001", "002"));
assertFalse(HoodieCommits.isCommit1After("001", "001"));
assertTrue(HoodieCommits.isCommit1After("003", "002"));
assertTrue(HoodieCommits.isCommit1BeforeOrOn("003", "003"));
}
}

View File

@@ -0,0 +1,242 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.google.common.collect.Sets;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.exception.HoodieIOException;
import com.uber.hoodie.exception.HoodieRecordMissingException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
import static org.junit.Assert.*;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class TestHoodieTableMetadata {
private String basePath = null;
private HoodieTableMetadata metadata = null;
@Rule
public final ExpectedException exception = ExpectedException.none();
@Before
public void init() throws Exception {
basePath = HoodieTestUtils.initializeTempHoodieBasePath();
metadata = new HoodieTableMetadata(FSUtils.getFs(), basePath, "testTable");
}
@Test
public void testScanCommitTs() throws Exception {
// Empty commit dir
assertTrue(metadata.getAllCommits().isEmpty());
// Create some commit files
new File(basePath + "/.hoodie/20160504123032.commit").createNewFile();
new File(basePath + "/.hoodie/20160503122032.commit").createNewFile();
metadata = new HoodieTableMetadata(FSUtils.getFs(), basePath, "testTable");
List<String> list = metadata.getAllCommits().getCommitList();
assertEquals(list.size(), 2);
assertTrue(list.contains("20160504123032"));
assertTrue(list.contains("20160503122032"));
// Check the .inflight files
assertTrue(metadata.getAllInflightCommits().isEmpty());
new File(basePath + "/.hoodie/20160505123032.inflight").createNewFile();
new File(basePath + "/.hoodie/20160506122032.inflight").createNewFile();
metadata = new HoodieTableMetadata(FSUtils.getFs(), basePath, "testTable");
list = metadata.getAllInflightCommits();
assertEquals(list.size(), 2);
assertTrue(list.contains("20160505123032"));
assertTrue(list.contains("20160506122032"));
}
@Test
public void testGetLastValidFileNameForRecord() throws Exception {
FileSystem fs = FSUtils.getFs();
String partitionPath = "2016/05/01";
new File(basePath + "/" + partitionPath).mkdirs();
String fileId = UUID.randomUUID().toString();
HoodieRecord record = mock(HoodieRecord.class);
when(record.getPartitionPath()).thenReturn(partitionPath);
when(record.getCurrentLocation()).thenReturn(new HoodieRecordLocation("001", fileId));
// First, no commit for this record
exception.expect(HoodieIOException.class);
metadata.getFilenameForRecord(fs, record);
// Only one commit, but is not safe
String commitTime1 = "20160501123212";
String fileName1 = FSUtils.makeDataFileName(commitTime1, 1, fileId);
new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
assertNull(metadata.getFilenameForRecord(fs, record));
// Make this commit safe
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
metadata = new HoodieTableMetadata(fs, basePath, "testTable");
assertTrue(metadata.getFilenameForRecord(fs, record).equals(fileName1));
// Do another commit, but not safe
String commitTime2 = "20160502123012";
String fileName2 = FSUtils.makeDataFileName(commitTime2, 1, fileId);
new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
assertTrue(metadata.getFilenameForRecord(fs, record).equals(fileName1));
// Make it safe
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
metadata = new HoodieTableMetadata(fs, basePath, "testTable");
assertTrue(metadata.getFilenameForRecord(fs, record).equals(fileName2));
}
@Test
public void testGetAllPartitionPaths() throws IOException {
FileSystem fs = FSUtils.getFs();
// Empty
List<String> partitions = FSUtils.getAllPartitionPaths(fs, basePath);
assertEquals(partitions.size(), 0);
// Add some dirs
new File(basePath + "/2016/04/01").mkdirs();
new File(basePath + "/2015/04/01").mkdirs();
partitions = FSUtils.getAllPartitionPaths(fs, basePath);
assertEquals(partitions.size(), 2);
assertTrue(partitions.contains("2016/04/01"));
assertTrue(partitions.contains("2015/04/01"));
}
@Test
public void testGetFileVersionsInPartition() throws IOException {
// Put some files in the partition
String fullPartitionPath = basePath + "/2016/05/01/";
new File(fullPartitionPath).mkdirs();
String commitTime1 = "20160501123032";
String commitTime2 = "20160502123032";
String commitTime3 = "20160503123032";
String commitTime4 = "20160504123032";
HoodieTestUtils.createCommitFiles(basePath, commitTime1, commitTime2, commitTime3, commitTime4);
String fileId1 = UUID.randomUUID().toString();
String fileId2 = UUID.randomUUID().toString();
String fileId3 = UUID.randomUUID().toString();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3)).createNewFile();
metadata = new HoodieTableMetadata(FSUtils.getFs(), basePath, "testTable");
Map<String, List<FileStatus>> fileVersions = metadata.getAllVersionsInPartition(FSUtils.getFs(), "2016/05/01");
assertEquals(fileVersions.get(fileId1).size(), 2);
assertEquals(fileVersions.get(fileId2).size(), 3);
assertEquals(fileVersions.get(fileId3).size(), 2);
String commitTs = FSUtils.getCommitTime(fileVersions.get(fileId1).get(fileVersions.get(fileId1).size() - 1).getPath().getName());
assertTrue(commitTs.equals(commitTime1));
commitTs = FSUtils.getCommitTime(fileVersions.get(fileId1).get(fileVersions.get(fileId1).size() - 2).getPath().getName());
assertTrue(commitTs.equals(commitTime4));
}
@Test
public void testGetOnlyLatestVersionFiles() throws Exception {
// Put some files in the partition
String fullPartitionPath = basePath + "/2016/05/01/";
new File(fullPartitionPath).mkdirs();
String commitTime1 = "20160501123032";
String commitTime2 = "20160502123032";
String commitTime3 = "20160503123032";
String commitTime4 = "20160504123032";
String fileId1 = UUID.randomUUID().toString();
String fileId2 = UUID.randomUUID().toString();
String fileId3 = UUID.randomUUID().toString();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3)).createNewFile();
new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3)).createNewFile();
new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
// Now we list the entire partition
FileSystem fs = FSUtils.getFs();
FileStatus[] statuses = fs.listStatus(new Path(fullPartitionPath));
assertEquals(statuses.length, 7);
metadata = new HoodieTableMetadata(fs, basePath, "testTable");
FileStatus[] statuses1 = metadata
.getLatestVersionInPartition(fs, "2016/05/01", commitTime4);
assertEquals(statuses1.length, 3);
Set<String> filenames = Sets.newHashSet();
for (FileStatus status : statuses1) {
filenames.add(status.getPath().getName());
}
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId1)));
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId3)));
// Reset the max commit time
FileStatus[] statuses2 = metadata
.getLatestVersionInPartition(fs, "2016/05/01", commitTime3);
assertEquals(statuses2.length, 3);
filenames = Sets.newHashSet();
for (FileStatus status : statuses2) {
filenames.add(status.getPath().getName());
}
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, 1, fileId1)));
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId3)));
}
@Test
public void testCommitTimeComparison() {
String commitTime1 = "20160504123032";
String commitTime2 = "20151231203159";
assertTrue(HoodieCommits.isCommit1After(commitTime1, commitTime2));
assertTrue(HoodieCommits.isCommit1BeforeOrOn(commitTime1, commitTime1));
assertTrue(HoodieCommits.isCommit1BeforeOrOn(commitTime2, commitTime1));
}
@After
public void cleanup() {
if (basePath != null) {
new File(basePath).delete();
}
}
}

View File

@@ -0,0 +1,70 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.util;
import org.junit.Test;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.UUID;
import static org.junit.Assert.assertTrue;
public class TestFSUtils {
@Test
public void testMakeDataFileName() {
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
int taskPartitionId = 2;
String fileName = UUID.randomUUID().toString();
assertTrue(FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName)
.equals(fileName + "_" + taskPartitionId + "_" + commitTime + ".parquet"));
}
@Test
public void testMaskFileName() {
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
int taskPartitionId = 2;
assertTrue(FSUtils.maskWithoutFileId(commitTime, taskPartitionId)
.equals("*_" + taskPartitionId + "_" + commitTime + ".parquet"));
}
@Test
public void testGetCommitTime() {
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
int taskPartitionId = 2;
String fileName = UUID.randomUUID().toString();
String fullFileName = FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName);
assertTrue(FSUtils.getCommitTime(fullFileName).equals(commitTime));
}
@Test
public void testGetCommitFromCommitFile() {
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
String commitFileName = FSUtils.makeCommitFileName(commitTime);
assertTrue(FSUtils.getCommitFromCommitFile(commitFileName).equals(commitTime));
}
@Test
public void testGetFileNameWithoutMeta() {
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
int taskPartitionId = 2;
String fileName = UUID.randomUUID().toString();
String fullFileName = FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName);
assertTrue(FSUtils.getFileId(fullFileName).equals(fileName));
}
}

View File

@@ -0,0 +1,35 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.util;
import org.junit.Test;
import static org.junit.Assert.*;
public class TestNumericUtils {
@Test
public void testHumanReadableByteCount() {
assertTrue(NumericUtils.humanReadableByteCount(0).equals("0.0 B"));
assertTrue(NumericUtils.humanReadableByteCount(27).equals("27.0 B"));
assertTrue(NumericUtils.humanReadableByteCount(1023).equals("1023.0 B"));
assertTrue(NumericUtils.humanReadableByteCount(1024).equals("1.0 KB"));
assertTrue(NumericUtils.humanReadableByteCount(110592).equals("108.0 KB"));
assertTrue(NumericUtils.humanReadableByteCount(28991029248L).equals("27.0 GB"));
assertTrue(NumericUtils.humanReadableByteCount(1855425871872L).equals("1.7 TB"));
assertTrue(NumericUtils.humanReadableByteCount(9223372036854775807L).equals("8.0 EB"));
}
}

View File

@@ -0,0 +1,92 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.util;
import com.uber.hoodie.avro.HoodieAvroWriteSupport;
import com.uber.hoodie.common.BloomFilter;
import com.uber.hoodie.common.model.HoodieRecord;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.junit.Before;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.UUID;
import static org.junit.Assert.*;
public class TestParquetUtils {
private String basePath;
@Before
public void setup() throws IOException {
// Create a temp folder as the base path
TemporaryFolder folder = new TemporaryFolder();
folder.create();
basePath = folder.getRoot().getAbsolutePath();
}
@Test
public void testHoodieWriteSupport() throws Exception {
List<String> rowKeys = new ArrayList<>();
for (int i = 0; i < 1000; i++) {
rowKeys.add(UUID.randomUUID().toString());
}
// Write out a parquet file
Schema schema = HoodieAvroUtils.getRecordKeySchema();
BloomFilter filter = new BloomFilter(1000, 0.0001);
HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
String filePath = basePath + "/test.parquet";
ParquetWriter writer = new ParquetWriter(new Path(filePath),
writeSupport, CompressionCodecName.GZIP, 120 * 1024 * 1024, ParquetWriter.DEFAULT_PAGE_SIZE);
for (String rowKey : rowKeys) {
GenericRecord rec = new GenericData.Record(schema);
rec.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, rowKey);
writer.write(rec);
filter.add(rowKey);
}
writer.close();
// Read and verify
List<String> rowKeysInFile = new ArrayList<>(ParquetUtils.readRowKeysFromParquet(new Path(filePath)));
Collections.sort(rowKeysInFile);
Collections.sort(rowKeys);
assertEquals("Did not read back the expected list of keys", rowKeys, rowKeysInFile);
BloomFilter filterInFile = ParquetUtils.readBloomFilterFromParquetMetadata(new Path(filePath));
for (String rowKey : rowKeys) {
assertTrue("key should be found in bloom filter", filterInFile.mightContain(rowKey));
}
}
}