|
|
|
|
@@ -18,17 +18,6 @@
|
|
|
|
|
|
|
|
|
|
package org.apache.hudi.io.storage;
|
|
|
|
|
|
|
|
|
|
import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
|
|
|
|
|
import org.apache.hudi.common.engine.TaskContextSupplier;
|
|
|
|
|
import org.apache.hudi.common.fs.FSUtils;
|
|
|
|
|
import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
|
|
|
|
|
import org.apache.hudi.common.model.HoodieAvroRecord;
|
|
|
|
|
import org.apache.hudi.common.model.HoodieKey;
|
|
|
|
|
import org.apache.hudi.common.model.HoodieRecord;
|
|
|
|
|
import org.apache.hudi.common.util.FileIOUtils;
|
|
|
|
|
import org.apache.hudi.config.HoodieIndexConfig;
|
|
|
|
|
import org.apache.hudi.config.HoodieWriteConfig;
|
|
|
|
|
|
|
|
|
|
import org.apache.avro.Schema;
|
|
|
|
|
import org.apache.avro.generic.GenericData;
|
|
|
|
|
import org.apache.avro.generic.GenericRecord;
|
|
|
|
|
@@ -39,7 +28,17 @@ import org.apache.hadoop.fs.Path;
|
|
|
|
|
import org.apache.hadoop.hbase.CellComparatorImpl;
|
|
|
|
|
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
|
|
|
|
|
import org.apache.hadoop.hbase.io.hfile.HFile;
|
|
|
|
|
import org.apache.hadoop.hbase.util.Pair;
|
|
|
|
|
import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
|
|
|
|
|
import org.apache.hudi.common.engine.TaskContextSupplier;
|
|
|
|
|
import org.apache.hudi.common.fs.FSUtils;
|
|
|
|
|
import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
|
|
|
|
|
import org.apache.hudi.common.model.HoodieAvroRecord;
|
|
|
|
|
import org.apache.hudi.common.model.HoodieKey;
|
|
|
|
|
import org.apache.hudi.common.model.HoodieRecord;
|
|
|
|
|
import org.apache.hudi.common.util.FileIOUtils;
|
|
|
|
|
import org.apache.hudi.common.util.Option;
|
|
|
|
|
import org.apache.hudi.config.HoodieIndexConfig;
|
|
|
|
|
import org.apache.hudi.config.HoodieWriteConfig;
|
|
|
|
|
import org.junit.jupiter.api.Test;
|
|
|
|
|
import org.junit.jupiter.params.ParameterizedTest;
|
|
|
|
|
import org.junit.jupiter.params.provider.Arguments;
|
|
|
|
|
@@ -51,21 +50,25 @@ import java.io.IOException;
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.Arrays;
|
|
|
|
|
import java.util.Collections;
|
|
|
|
|
import java.util.HashMap;
|
|
|
|
|
import java.util.HashSet;
|
|
|
|
|
import java.util.Iterator;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.Map;
|
|
|
|
|
import java.util.Set;
|
|
|
|
|
import java.util.Spliterator;
|
|
|
|
|
import java.util.Spliterators;
|
|
|
|
|
import java.util.TreeMap;
|
|
|
|
|
import java.util.function.Supplier;
|
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
import java.util.stream.IntStream;
|
|
|
|
|
import java.util.stream.Stream;
|
|
|
|
|
import java.util.stream.StreamSupport;
|
|
|
|
|
|
|
|
|
|
import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM;
|
|
|
|
|
import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource;
|
|
|
|
|
import static org.apache.hudi.common.util.CollectionUtils.toStream;
|
|
|
|
|
import static org.apache.hudi.io.storage.HoodieHFileConfig.HFILE_COMPARATOR;
|
|
|
|
|
import static org.apache.hudi.io.storage.HoodieHFileReader.KEY_SCHEMA;
|
|
|
|
|
import static org.apache.hudi.io.storage.HoodieHFileReader.SCHEMA_KEY;
|
|
|
|
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
|
|
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
|
|
|
|
import static org.junit.jupiter.api.Assertions.assertNull;
|
|
|
|
|
@@ -124,7 +127,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
|
|
|
|
|
FileSystem fs = getFilePath().getFileSystem(conf);
|
|
|
|
|
HFile.Reader hfileReader = HoodieHFileUtils.createHFileReader(fs, getFilePath(), new CacheConfig(conf), conf);
|
|
|
|
|
assertEquals(getSchemaFromResource(TestHoodieHFileReaderWriter.class, schemaPath),
|
|
|
|
|
new Schema.Parser().parse(new String(hfileReader.getHFileInfo().get(KEY_SCHEMA.getBytes()))));
|
|
|
|
|
new Schema.Parser().parse(new String(hfileReader.getHFileInfo().get(SCHEMA_KEY.getBytes()))));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static Stream<Arguments> populateMetaFieldsAndTestAvroWithMeta() {
|
|
|
|
|
@@ -142,7 +145,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
|
|
|
|
|
Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleSchemaWithMetaFields.avsc");
|
|
|
|
|
HoodieFileWriter<GenericRecord> writer = createWriter(avroSchema, populateMetaFields);
|
|
|
|
|
List<String> keys = new ArrayList<>();
|
|
|
|
|
Map<String, GenericRecord> recordMap = new HashMap<>();
|
|
|
|
|
Map<String, GenericRecord> recordMap = new TreeMap<>();
|
|
|
|
|
for (int i = 0; i < 100; i++) {
|
|
|
|
|
GenericRecord record = new GenericData.Record(avroSchema);
|
|
|
|
|
String key = String.format("%s%04d", "key", i);
|
|
|
|
|
@@ -163,24 +166,30 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
|
|
|
|
|
|
|
|
|
|
Configuration conf = new Configuration();
|
|
|
|
|
HoodieHFileReader hoodieHFileReader = (HoodieHFileReader) createReader(conf);
|
|
|
|
|
List<Pair<String, IndexedRecord>> records = hoodieHFileReader.readAllRecords();
|
|
|
|
|
records.forEach(entry -> assertEquals(entry.getSecond(), recordMap.get(entry.getFirst())));
|
|
|
|
|
List<IndexedRecord> records = HoodieHFileReader.readAllRecords(hoodieHFileReader);
|
|
|
|
|
assertEquals(new ArrayList<>(recordMap.values()), records);
|
|
|
|
|
|
|
|
|
|
hoodieHFileReader.close();
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
|
int randomRowstoFetch = 5 + RANDOM.nextInt(10);
|
|
|
|
|
Set<String> rowsToFetch = getRandomKeys(randomRowstoFetch, keys);
|
|
|
|
|
|
|
|
|
|
List<String> rowsList = new ArrayList<>(rowsToFetch);
|
|
|
|
|
Collections.sort(rowsList);
|
|
|
|
|
hoodieHFileReader = (HoodieHFileReader) createReader(conf);
|
|
|
|
|
List<Pair<String, GenericRecord>> result = hoodieHFileReader.readRecords(rowsList);
|
|
|
|
|
assertEquals(result.size(), randomRowstoFetch);
|
|
|
|
|
|
|
|
|
|
List<GenericRecord> expectedRecords = rowsList.stream().map(recordMap::get).collect(Collectors.toList());
|
|
|
|
|
|
|
|
|
|
hoodieHFileReader = (HoodieHFileReader<GenericRecord>) createReader(conf);
|
|
|
|
|
List<GenericRecord> result = HoodieHFileReader.readRecords(hoodieHFileReader, rowsList);
|
|
|
|
|
|
|
|
|
|
assertEquals(expectedRecords, result);
|
|
|
|
|
|
|
|
|
|
result.forEach(entry -> {
|
|
|
|
|
assertEquals(entry.getSecond(), recordMap.get(entry.getFirst()));
|
|
|
|
|
if (populateMetaFields && testAvroWithMeta) {
|
|
|
|
|
assertNotNull(entry.getSecond().get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
|
|
|
|
|
assertNotNull(entry.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
|
|
|
|
|
} else {
|
|
|
|
|
assertNull(entry.getSecond().get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
|
|
|
|
|
assertNull(entry.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
hoodieHFileReader.close();
|
|
|
|
|
@@ -202,7 +211,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
|
|
|
|
|
fs.open(getFilePath()), (int) fs.getFileStatus(getFilePath()).getLen());
|
|
|
|
|
// Reading byte array in HFile format, without actual file path
|
|
|
|
|
HoodieHFileReader<GenericRecord> hfileReader =
|
|
|
|
|
new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content);
|
|
|
|
|
new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content, Option.empty());
|
|
|
|
|
Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
|
|
|
|
|
assertEquals(NUM_RECORDS, hfileReader.getTotalRecords());
|
|
|
|
|
verifySimpleRecords(hfileReader.getRecordIterator(avroSchema));
|
|
|
|
|
@@ -217,7 +226,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
|
|
|
|
|
IntStream.concat(IntStream.range(40, NUM_RECORDS * 2), IntStream.range(10, 20))
|
|
|
|
|
.mapToObj(i -> "key" + String.format("%02d", i)).collect(Collectors.toList());
|
|
|
|
|
Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
|
|
|
|
|
Iterator<GenericRecord> iterator = hfileReader.getRecordIterator(keys, avroSchema);
|
|
|
|
|
Iterator<GenericRecord> iterator = hfileReader.getRecordsByKeysIterator(keys, avroSchema);
|
|
|
|
|
|
|
|
|
|
List<Integer> expectedIds =
|
|
|
|
|
IntStream.concat(IntStream.range(40, NUM_RECORDS), IntStream.range(10, 20))
|
|
|
|
|
@@ -233,6 +242,59 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testReaderGetRecordIteratorByKeyPrefixes() throws Exception {
|
|
|
|
|
writeFileWithSimpleSchema();
|
|
|
|
|
HoodieHFileReader<GenericRecord> hfileReader =
|
|
|
|
|
(HoodieHFileReader<GenericRecord>) createReader(new Configuration());
|
|
|
|
|
|
|
|
|
|
Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
|
|
|
|
|
|
|
|
|
|
List<String> keyPrefixes = Collections.singletonList("key");
|
|
|
|
|
Iterator<GenericRecord> iterator =
|
|
|
|
|
hfileReader.getRecordsByKeyPrefixIterator(keyPrefixes, avroSchema);
|
|
|
|
|
|
|
|
|
|
List<GenericRecord> recordsByPrefix = toStream(iterator).collect(Collectors.toList());
|
|
|
|
|
|
|
|
|
|
List<GenericRecord> allRecords = toStream(hfileReader.getRecordIterator()).collect(Collectors.toList());
|
|
|
|
|
|
|
|
|
|
assertEquals(allRecords, recordsByPrefix);
|
|
|
|
|
|
|
|
|
|
// filter for "key1" : entries from key10 to key19 should be matched
|
|
|
|
|
List<GenericRecord> expectedKey1s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key1")).collect(Collectors.toList());
|
|
|
|
|
iterator =
|
|
|
|
|
hfileReader.getRecordsByKeyPrefixIterator(Collections.singletonList("key1"), avroSchema);
|
|
|
|
|
recordsByPrefix =
|
|
|
|
|
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
|
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
assertEquals(expectedKey1s, recordsByPrefix);
|
|
|
|
|
|
|
|
|
|
// exact match
|
|
|
|
|
List<GenericRecord> expectedKey25 = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key25")).collect(Collectors.toList());
|
|
|
|
|
iterator =
|
|
|
|
|
hfileReader.getRecordsByKeyPrefixIterator(Collections.singletonList("key25"), avroSchema);
|
|
|
|
|
recordsByPrefix =
|
|
|
|
|
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
|
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
assertEquals(expectedKey25, recordsByPrefix);
|
|
|
|
|
|
|
|
|
|
// no match. key prefix is beyond entries in file.
|
|
|
|
|
iterator =
|
|
|
|
|
hfileReader.getRecordsByKeyPrefixIterator(Collections.singletonList("key99"), avroSchema);
|
|
|
|
|
recordsByPrefix =
|
|
|
|
|
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
|
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
assertEquals(Collections.emptyList(), recordsByPrefix);
|
|
|
|
|
|
|
|
|
|
// no match. but keyPrefix is in between the entries found in file.
|
|
|
|
|
iterator =
|
|
|
|
|
hfileReader.getRecordsByKeyPrefixIterator(Collections.singletonList("key1234"), avroSchema);
|
|
|
|
|
recordsByPrefix =
|
|
|
|
|
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
|
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
assertEquals(Collections.emptyList(), recordsByPrefix);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@ParameterizedTest
|
|
|
|
|
@ValueSource(strings = {
|
|
|
|
|
"/hudi_0_9_hbase_1_2_3", "/hudi_0_10_hbase_1_2_3", "/hudi_0_11_hbase_2_4_9"})
|
|
|
|
|
@@ -253,7 +315,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
|
|
|
|
|
HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content),
|
|
|
|
|
hfilePrefix, true, HFILE_COMPARATOR.getClass(), NUM_RECORDS_FIXTURE);
|
|
|
|
|
HoodieHFileReader<GenericRecord> hfileReader =
|
|
|
|
|
new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content);
|
|
|
|
|
new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content, Option.empty());
|
|
|
|
|
Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
|
|
|
|
|
assertEquals(NUM_RECORDS_FIXTURE, hfileReader.getTotalRecords());
|
|
|
|
|
verifySimpleRecords(hfileReader.getRecordIterator(avroSchema));
|
|
|
|
|
@@ -261,7 +323,7 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
|
|
|
|
|
content = readHFileFromResources(complexHFile);
|
|
|
|
|
verifyHFileReader(HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content),
|
|
|
|
|
hfilePrefix, true, HFILE_COMPARATOR.getClass(), NUM_RECORDS_FIXTURE);
|
|
|
|
|
hfileReader = new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content);
|
|
|
|
|
hfileReader = new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content, Option.empty());
|
|
|
|
|
avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchemaWithUDT.avsc");
|
|
|
|
|
assertEquals(NUM_RECORDS_FIXTURE, hfileReader.getTotalRecords());
|
|
|
|
|
verifySimpleRecords(hfileReader.getRecordIterator(avroSchema));
|
|
|
|
|
|