Import from Hoodie private repo: Part 1
This commit is contained in:
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.util;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class TestFSUtils {
|
||||
|
||||
@Test
|
||||
public void testMakeDataFileName() {
|
||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||
int taskPartitionId = 2;
|
||||
String fileName = UUID.randomUUID().toString();
|
||||
assertTrue(FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName)
|
||||
.equals(fileName + "_" + taskPartitionId + "_" + commitTime + ".parquet"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaskFileName() {
|
||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||
int taskPartitionId = 2;
|
||||
assertTrue(FSUtils.maskWithoutFileId(commitTime, taskPartitionId)
|
||||
.equals("*_" + taskPartitionId + "_" + commitTime + ".parquet"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetCommitTime() {
|
||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||
int taskPartitionId = 2;
|
||||
String fileName = UUID.randomUUID().toString();
|
||||
String fullFileName = FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName);
|
||||
assertTrue(FSUtils.getCommitTime(fullFileName).equals(commitTime));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetCommitFromCommitFile() {
|
||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||
String commitFileName = FSUtils.makeCommitFileName(commitTime);
|
||||
assertTrue(FSUtils.getCommitFromCommitFile(commitFileName).equals(commitTime));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetFileNameWithoutMeta() {
|
||||
String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
|
||||
int taskPartitionId = 2;
|
||||
String fileName = UUID.randomUUID().toString();
|
||||
String fullFileName = FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName);
|
||||
assertTrue(FSUtils.getFileId(fullFileName).equals(fileName));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.util;
|
||||
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class TestNumericUtils {
|
||||
@Test
|
||||
public void testHumanReadableByteCount() {
|
||||
assertTrue(NumericUtils.humanReadableByteCount(0).equals("0.0 B"));
|
||||
assertTrue(NumericUtils.humanReadableByteCount(27).equals("27.0 B"));
|
||||
assertTrue(NumericUtils.humanReadableByteCount(1023).equals("1023.0 B"));
|
||||
assertTrue(NumericUtils.humanReadableByteCount(1024).equals("1.0 KB"));
|
||||
assertTrue(NumericUtils.humanReadableByteCount(110592).equals("108.0 KB"));
|
||||
assertTrue(NumericUtils.humanReadableByteCount(28991029248L).equals("27.0 GB"));
|
||||
assertTrue(NumericUtils.humanReadableByteCount(1855425871872L).equals("1.7 TB"));
|
||||
assertTrue(NumericUtils.humanReadableByteCount(9223372036854775807L).equals("8.0 EB"));
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.util;
|
||||
|
||||
import com.uber.hoodie.avro.HoodieAvroWriteSupport;
|
||||
import com.uber.hoodie.common.BloomFilter;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.parquet.avro.AvroSchemaConverter;
|
||||
import org.apache.parquet.hadoop.ParquetWriter;
|
||||
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.TemporaryFolder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class TestParquetUtils {
|
||||
|
||||
|
||||
private String basePath;
|
||||
|
||||
@Before
|
||||
public void setup() throws IOException {
|
||||
// Create a temp folder as the base path
|
||||
TemporaryFolder folder = new TemporaryFolder();
|
||||
folder.create();
|
||||
basePath = folder.getRoot().getAbsolutePath();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHoodieWriteSupport() throws Exception {
|
||||
|
||||
List<String> rowKeys = new ArrayList<>();
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
rowKeys.add(UUID.randomUUID().toString());
|
||||
}
|
||||
|
||||
// Write out a parquet file
|
||||
Schema schema = HoodieAvroUtils.getRecordKeySchema();
|
||||
BloomFilter filter = new BloomFilter(1000, 0.0001);
|
||||
HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
|
||||
|
||||
|
||||
String filePath = basePath + "/test.parquet";
|
||||
ParquetWriter writer = new ParquetWriter(new Path(filePath),
|
||||
writeSupport, CompressionCodecName.GZIP, 120 * 1024 * 1024, ParquetWriter.DEFAULT_PAGE_SIZE);
|
||||
for (String rowKey : rowKeys) {
|
||||
GenericRecord rec = new GenericData.Record(schema);
|
||||
rec.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, rowKey);
|
||||
writer.write(rec);
|
||||
filter.add(rowKey);
|
||||
}
|
||||
writer.close();
|
||||
|
||||
|
||||
// Read and verify
|
||||
List<String> rowKeysInFile = new ArrayList<>(ParquetUtils.readRowKeysFromParquet(new Path(filePath)));
|
||||
Collections.sort(rowKeysInFile);
|
||||
Collections.sort(rowKeys);
|
||||
|
||||
assertEquals("Did not read back the expected list of keys", rowKeys, rowKeysInFile);
|
||||
BloomFilter filterInFile = ParquetUtils.readBloomFilterFromParquetMetadata(new Path(filePath));
|
||||
for (String rowKey : rowKeys) {
|
||||
assertTrue("key should be found in bloom filter", filterInFile.mightContain(rowKey));
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user