1
0

[HUDI-2029] Implement compression for DiskBasedMap in Spillable Map (#3128)

This commit is contained in:
rmahindra123
2021-07-14 19:57:38 -07:00
committed by GitHub
parent 75040ee9e5
commit d024439764
7 changed files with 167 additions and 54 deletions

View File

@@ -305,6 +305,11 @@ public class HoodieWriteConfig extends HoodieConfig {
+ "By default, we use a persistent hashmap based loosely on bitcask, that offers O(1) inserts, lookups. "
+ "Change this to `ROCKS_DB` to prefer using rocksDB, for handling the spill.");
public static final ConfigProperty<Boolean> DISK_MAP_BITCASK_COMPRESSION_ENABLED = ConfigProperty
.key("hoodie.diskmap.bitcask.compression.enabled")
.defaultValue(true)
.withDocumentation("Turn on compression for BITCASK disk map used by the External Spillable Map");
public static final ConfigProperty<Integer> CLIENT_HEARTBEAT_INTERVAL_IN_MS_PROP = ConfigProperty
.key("hoodie.client.heartbeat.interval_in_ms")
.defaultValue(60 * 1000)
@@ -582,6 +587,10 @@ public class HoodieWriteConfig extends HoodieConfig {
return ExternalSpillableMap.DiskMapType.valueOf(getString(SPILLABLE_DISK_MAP_TYPE).toUpperCase(Locale.ROOT));
}
public boolean isBitCaskDiskMapCompressionEnabled() {
return getBoolean(DISK_MAP_BITCASK_COMPRESSION_ENABLED);
}
public EngineType getEngineType() {
return engineType;
}
@@ -1541,6 +1550,11 @@ public class HoodieWriteConfig extends HoodieConfig {
return this;
}
public Builder withBitcaskDiskMapCompressionEnabled(boolean bitcaskDiskMapCompressionEnabled) {
writeConfig.setValue(DISK_MAP_BITCASK_COMPRESSION_ENABLED, String.valueOf(bitcaskDiskMapCompressionEnabled));
return this;
}
public Builder withHeartbeatIntervalInMs(Integer heartbeatIntervalInMs) {
writeConfig.setValue(CLIENT_HEARTBEAT_INTERVAL_IN_MS_PROP, String.valueOf(heartbeatIntervalInMs));
return this;

View File

@@ -200,7 +200,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
LOG.info("MaxMemoryPerPartitionMerge => " + memoryForMerge);
this.keyToNewRecords = new ExternalSpillableMap<>(memoryForMerge, config.getSpillableMapBasePath(),
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(tableSchema),
config.getSpillableDiskMapType());
config.getSpillableDiskMapType(), config.isBitCaskDiskMapCompressionEnabled());
} catch (IOException io) {
throw new HoodieIOException("Cannot instantiate an ExternalSpillableMap", io);
}

View File

@@ -29,9 +29,12 @@ import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.io.Serializable;
import java.net.InetAddress;
@@ -47,6 +50,9 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Stream;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterInputStream;
/**
* This class provides a disk spillable only map implementation. All of the data is currenly written to one file,
@@ -59,27 +65,33 @@ public final class BitCaskDiskMap<T extends Serializable, R extends Serializable
public static final int BUFFER_SIZE = 128 * 1024; // 128 KB
private static final Logger LOG = LogManager.getLogger(BitCaskDiskMap.class);
// Caching byte compression/decompression to avoid creating instances for every operation
private static final ThreadLocal<CompressionHandler> DISK_COMPRESSION_REF =
ThreadLocal.withInitial(CompressionHandler::new);
// Stores the key and corresponding value's latest metadata spilled to disk
private final Map<T, ValueMetadata> valueMetadataMap;
// Enables compression for all values stored in the disk map
private final boolean isCompressionEnabled;
// Write only file
private File writeOnlyFile;
private final File writeOnlyFile;
// Write only OutputStream to be able to ONLY append to the file
private SizeAwareDataOutputStream writeOnlyFileHandle;
private final SizeAwareDataOutputStream writeOnlyFileHandle;
// FileOutputStream for the file handle to be able to force fsync
// since FileOutputStream's flush() does not force flush to disk
private FileOutputStream fileOutputStream;
private final FileOutputStream fileOutputStream;
// Current position in the file
private AtomicLong filePosition;
private final AtomicLong filePosition;
// FilePath to store the spilled data
private String filePath;
private final String filePath;
// Thread-safe random access file
private ThreadLocal<BufferedRandomAccessFile> randomAccessFile = new ThreadLocal<>();
private Queue<BufferedRandomAccessFile> openedAccessFiles = new ConcurrentLinkedQueue<>();
private final ThreadLocal<BufferedRandomAccessFile> randomAccessFile = new ThreadLocal<>();
private final Queue<BufferedRandomAccessFile> openedAccessFiles = new ConcurrentLinkedQueue<>();
private transient Thread shutdownThread = null;
public BitCaskDiskMap(String baseFilePath) throws IOException {
public BitCaskDiskMap(String baseFilePath, boolean isCompressionEnabled) throws IOException {
this.valueMetadataMap = new ConcurrentHashMap<>();
this.isCompressionEnabled = isCompressionEnabled;
this.writeOnlyFile = new File(baseFilePath, UUID.randomUUID().toString());
this.filePath = writeOnlyFile.getPath();
initFile(writeOnlyFile);
@@ -88,6 +100,10 @@ public final class BitCaskDiskMap<T extends Serializable, R extends Serializable
this.filePosition = new AtomicLong(0L);
}
public BitCaskDiskMap(String baseFilePath) throws IOException {
this(baseFilePath, false);
}
/**
* RandomAcessFile is not thread-safe. This API opens a new file handle per thread and returns.
*
@@ -147,7 +163,7 @@ public final class BitCaskDiskMap<T extends Serializable, R extends Serializable
*/
@Override
public Iterator<R> iterator() {
return new LazyFileIterable(filePath, valueMetadataMap).iterator();
return new LazyFileIterable(filePath, valueMetadataMap, isCompressionEnabled).iterator();
}
/**
@@ -188,13 +204,16 @@ public final class BitCaskDiskMap<T extends Serializable, R extends Serializable
}
private R get(ValueMetadata entry) {
return get(entry, getRandomAccessFile());
return get(entry, getRandomAccessFile(), isCompressionEnabled);
}
public static <R> R get(ValueMetadata entry, RandomAccessFile file) {
public static <R> R get(ValueMetadata entry, RandomAccessFile file, boolean isCompressionEnabled) {
try {
return SerializationUtils
.deserialize(SpillableMapUtils.readBytesFromDisk(file, entry.getOffsetOfValue(), entry.getSizeOfValue()));
byte[] bytesFromDisk = SpillableMapUtils.readBytesFromDisk(file, entry.getOffsetOfValue(), entry.getSizeOfValue());
if (isCompressionEnabled) {
return SerializationUtils.deserialize(DISK_COMPRESSION_REF.get().decompressBytes(bytesFromDisk));
}
return SerializationUtils.deserialize(bytesFromDisk);
} catch (IOException e) {
throw new HoodieIOException("Unable to readFromDisk Hoodie Record from disk", e);
}
@@ -202,7 +221,8 @@ public final class BitCaskDiskMap<T extends Serializable, R extends Serializable
private synchronized R put(T key, R value, boolean flush) {
try {
byte[] val = SerializationUtils.serialize(value);
byte[] val = isCompressionEnabled ? DISK_COMPRESSION_REF.get().compressBytes(SerializationUtils.serialize(value)) :
SerializationUtils.serialize(value);
Integer valueSize = val.length;
Long timestamp = System.currentTimeMillis();
this.valueMetadataMap.put(key,
@@ -293,7 +313,7 @@ public final class BitCaskDiskMap<T extends Serializable, R extends Serializable
@Override
public Stream<R> valueStream() {
final BufferedRandomAccessFile file = getRandomAccessFile();
return valueMetadataMap.values().stream().sorted().sequential().map(valueMetaData -> (R) get(valueMetaData, file));
return valueMetadataMap.values().stream().sorted().sequential().map(valueMetaData -> (R) get(valueMetaData, file, isCompressionEnabled));
}
@Override
@@ -399,4 +419,47 @@ public final class BitCaskDiskMap<T extends Serializable, R extends Serializable
return Long.compare(this.offsetOfValue, o.offsetOfValue);
}
}
private static class CompressionHandler implements Serializable {
private static final int DISK_COMPRESSION_INITIAL_BUFFER_SIZE = 1048576;
private static final int DECOMPRESS_INTERMEDIATE_BUFFER_SIZE = 8192;
// Caching ByteArrayOutputStreams to avoid recreating it for every operation
private final ByteArrayOutputStream compressBaos;
private final ByteArrayOutputStream decompressBaos;
private final byte[] decompressIntermediateBuffer;
CompressionHandler() {
compressBaos = new ByteArrayOutputStream(DISK_COMPRESSION_INITIAL_BUFFER_SIZE);
decompressBaos = new ByteArrayOutputStream(DISK_COMPRESSION_INITIAL_BUFFER_SIZE);
decompressIntermediateBuffer = new byte[DECOMPRESS_INTERMEDIATE_BUFFER_SIZE];
}
private byte[] compressBytes(final byte[] value) throws IOException {
compressBaos.reset();
Deflater deflater = new Deflater(Deflater.BEST_COMPRESSION);
DeflaterOutputStream dos = new DeflaterOutputStream(compressBaos, deflater);
try {
dos.write(value);
} finally {
dos.close();
deflater.end();
}
return compressBaos.toByteArray();
}
private byte[] decompressBytes(final byte[] bytes) throws IOException {
decompressBaos.reset();
InputStream in = new InflaterInputStream(new ByteArrayInputStream(bytes));
try {
int len;
while ((len = in.read(decompressIntermediateBuffer)) > 0) {
decompressBaos.write(decompressIntermediateBuffer, 0, len);
}
return decompressBaos.toByteArray();
} catch (IOException e) {
throw new HoodieIOException("IOException while decompressing bytes", e);
}
}
}
}

View File

@@ -72,6 +72,8 @@ public class ExternalSpillableMap<T extends Serializable, R extends Serializable
private final SizeEstimator<R> valueSizeEstimator;
// Type of the disk map
private final DiskMapType diskMapType;
// Enables compression of values stored in disc
private final boolean isCompressionEnabled;
// current space occupied by this map in-memory
private Long currentInMemoryMapSize;
// An estimate of the size of each payload written to this map
@@ -88,6 +90,11 @@ public class ExternalSpillableMap<T extends Serializable, R extends Serializable
public ExternalSpillableMap(Long maxInMemorySizeInBytes, String baseFilePath, SizeEstimator<T> keySizeEstimator,
SizeEstimator<R> valueSizeEstimator, DiskMapType diskMapType) throws IOException {
this(maxInMemorySizeInBytes, baseFilePath, keySizeEstimator, valueSizeEstimator, diskMapType, false);
}
public ExternalSpillableMap(Long maxInMemorySizeInBytes, String baseFilePath, SizeEstimator<T> keySizeEstimator,
SizeEstimator<R> valueSizeEstimator, DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException {
this.inMemoryMap = new HashMap<>();
this.baseFilePath = baseFilePath;
this.maxInMemorySizeInBytes = (long) Math.floor(maxInMemorySizeInBytes * sizingFactorForInMemoryMap);
@@ -95,6 +102,7 @@ public class ExternalSpillableMap<T extends Serializable, R extends Serializable
this.keySizeEstimator = keySizeEstimator;
this.valueSizeEstimator = valueSizeEstimator;
this.diskMapType = diskMapType;
this.isCompressionEnabled = isCompressionEnabled;
}
private DiskMap<T, R> getDiskBasedMap() {
@@ -108,7 +116,7 @@ public class ExternalSpillableMap<T extends Serializable, R extends Serializable
break;
case BITCASK:
default:
diskBasedMap = new BitCaskDiskMap<>(baseFilePath);
diskBasedMap = new BitCaskDiskMap<>(baseFilePath, isCompressionEnabled);
}
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);

View File

@@ -37,12 +37,19 @@ public class LazyFileIterable<T, R> implements Iterable<R> {
private final String filePath;
// Stores the key and corresponding value's latest metadata spilled to disk
private final Map<T, BitCaskDiskMap.ValueMetadata> inMemoryMetadataOfSpilledData;
// Was compressions enabled for the values when inserted into the file/ map
private final boolean isCompressionEnabled;
private transient Thread shutdownThread = null;
public LazyFileIterable(String filePath, Map<T, BitCaskDiskMap.ValueMetadata> map) {
this(filePath, map, false);
}
public LazyFileIterable(String filePath, Map<T, BitCaskDiskMap.ValueMetadata> map, boolean isCompressionEnabled) {
this.filePath = filePath;
this.inMemoryMetadataOfSpilledData = map;
this.isCompressionEnabled = isCompressionEnabled;
}
@Override
@@ -91,7 +98,7 @@ public class LazyFileIterable<T, R> implements Iterable<R> {
throw new IllegalStateException("next() called on EOF'ed stream. File :" + filePath);
}
Map.Entry<T, BitCaskDiskMap.ValueMetadata> entry = this.metadataIterator.next();
return BitCaskDiskMap.get(entry.getValue(), readOnlyFileHandle);
return BitCaskDiskMap.get(entry.getValue(), readOnlyFileHandle, isCompressionEnabled);
}
@Override

View File

@@ -38,6 +38,8 @@ import org.apache.avro.generic.IndexedRecord;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import java.io.IOException;
import java.io.UncheckedIOException;
@@ -66,27 +68,33 @@ public class TestBitCaskDiskMap extends HoodieCommonTestHarness {
initPath();
}
@Test
public void testSimpleInsert() throws IOException, URISyntaxException {
BitCaskDiskMap records = new BitCaskDiskMap<>(basePath);
@ParameterizedTest
@ValueSource(booleans = {false, true})
public void testSimpleInsert(boolean isCompressionEnabled) throws IOException, URISyntaxException {
BitCaskDiskMap records = new BitCaskDiskMap<>(basePath, isCompressionEnabled);
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
((GenericRecord) iRecords.get(0)).get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
Map<String, IndexedRecord> originalRecords = iRecords.stream()
.collect(Collectors.toMap(k -> ((GenericRecord) k).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(), v -> v));
// make sure records have spilled to disk
assertTrue(records.sizeOfFileOnDiskInBytes() > 0);
Iterator<HoodieRecord<? extends HoodieRecordPayload>> itr = records.iterator();
List<HoodieRecord> oRecords = new ArrayList<>();
while (itr.hasNext()) {
HoodieRecord<? extends HoodieRecordPayload> rec = itr.next();
oRecords.add(rec);
assert recordKeys.contains(rec.getRecordKey());
IndexedRecord originalRecord = originalRecords.get(rec.getRecordKey());
HoodieAvroPayload payload = (HoodieAvroPayload) rec.getData();
Option<IndexedRecord> value = payload.getInsertValue(HoodieAvroUtils.addMetadataFields(getSimpleSchema()));
assertEquals(originalRecord, value.get());
}
}
@Test
public void testSimpleInsertWithoutHoodieMetadata() throws IOException, URISyntaxException {
BitCaskDiskMap records = new BitCaskDiskMap<>(basePath);
@ParameterizedTest
@ValueSource(booleans = {false, true})
public void testSimpleInsertWithoutHoodieMetadata(boolean isCompressionEnabled) throws IOException, URISyntaxException {
BitCaskDiskMap records = new BitCaskDiskMap<>(basePath, isCompressionEnabled);
List<HoodieRecord> hoodieRecords = SchemaTestUtil.generateHoodieTestRecordsWithoutHoodieMetadata(0, 1000);
Set<String> recordKeys = new HashSet<>();
// insert generated records into the map
@@ -105,11 +113,12 @@ public class TestBitCaskDiskMap extends HoodieCommonTestHarness {
}
}
@Test
public void testSimpleUpsert() throws IOException, URISyntaxException {
@ParameterizedTest
@ValueSource(booleans = {false, true})
public void testSimpleUpsert(boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
BitCaskDiskMap records = new BitCaskDiskMap<>(basePath);
BitCaskDiskMap records = new BitCaskDiskMap<>(basePath, isCompressionEnabled);
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
// perform some inserts
@@ -187,9 +196,10 @@ public class TestBitCaskDiskMap extends HoodieCommonTestHarness {
assertTrue(payloadSize > 0);
}
@Test
public void testPutAll() throws IOException, URISyntaxException {
BitCaskDiskMap<String, HoodieRecord> records = new BitCaskDiskMap<>(basePath);
@ParameterizedTest
@ValueSource(booleans = {false, true})
public void testPutAll(boolean isCompressionEnabled) throws IOException, URISyntaxException {
BitCaskDiskMap<String, HoodieRecord> records = new BitCaskDiskMap<>(basePath, isCompressionEnabled);
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
Map<String, HoodieRecord> recordMap = new HashMap<>();
iRecords.forEach(r -> {

View File

@@ -39,7 +39,8 @@ import org.junit.jupiter.api.MethodOrderer.Alphanumeric;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestMethodOrder;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import java.io.IOException;
import java.io.UncheckedIOException;
@@ -48,11 +49,13 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.params.provider.Arguments.arguments;
/**
* Tests external spillable map {@link ExternalSpillableMap}.
@@ -69,13 +72,13 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
}
@ParameterizedTest
@EnumSource(ExternalSpillableMap.DiskMapType.class)
public void simpleInsertTest(ExternalSpillableMap.DiskMapType diskMapType) throws IOException, URISyntaxException {
@MethodSource("testArguments")
public void simpleInsertTest(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(),
new HoodieRecordSizeEstimator(schema), diskMapType); // 16B
new HoodieRecordSizeEstimator(schema), diskMapType, isCompressionEnabled); // 16B
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
@@ -102,14 +105,13 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
}
@ParameterizedTest
@EnumSource(ExternalSpillableMap.DiskMapType.class)
public void testSimpleUpsert(ExternalSpillableMap.DiskMapType diskMapType) throws IOException, URISyntaxException {
@MethodSource("testArguments")
public void testSimpleUpsert(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(),
new HoodieRecordSizeEstimator(schema), diskMapType); // 16B
new HoodieRecordSizeEstimator(schema), diskMapType, isCompressionEnabled); // 16B
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
@@ -140,15 +142,15 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
}
@ParameterizedTest
@EnumSource(ExternalSpillableMap.DiskMapType.class)
public void testAllMapOperations(ExternalSpillableMap.DiskMapType diskMapType) throws IOException, URISyntaxException {
@MethodSource("testArguments")
public void testAllMapOperations(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
String payloadClazz = HoodieAvroPayload.class.getName();
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(),
new HoodieRecordSizeEstimator(schema), diskMapType); // 16B
new HoodieRecordSizeEstimator(schema), diskMapType, isCompressionEnabled); // 16B
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
// insert a bunch of records so that values spill to disk too
@@ -198,13 +200,13 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
}
@ParameterizedTest
@EnumSource(ExternalSpillableMap.DiskMapType.class)
public void simpleTestWithException(ExternalSpillableMap.DiskMapType diskMapType) throws IOException, URISyntaxException {
@MethodSource("testArguments")
public void simpleTestWithException(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L,
failureOutputPath, new DefaultSizeEstimator(),
new HoodieRecordSizeEstimator(schema), diskMapType); // 16B
new HoodieRecordSizeEstimator(schema), diskMapType, isCompressionEnabled); // 16B
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
@@ -218,14 +220,14 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
}
@ParameterizedTest
@EnumSource(ExternalSpillableMap.DiskMapType.class)
public void testDataCorrectnessWithUpsertsToDataInMapAndOnDisk(ExternalSpillableMap.DiskMapType diskMapType) throws IOException, URISyntaxException {
@MethodSource("testArguments")
public void testDataCorrectnessWithUpsertsToDataInMapAndOnDisk(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(),
new HoodieRecordSizeEstimator(schema), diskMapType); // 16B
new HoodieRecordSizeEstimator(schema), diskMapType, isCompressionEnabled); // 16B
List<String> recordKeys = new ArrayList<>();
// Ensure we spill to disk
@@ -271,14 +273,14 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
}
@ParameterizedTest
@EnumSource(ExternalSpillableMap.DiskMapType.class)
public void testDataCorrectnessWithoutHoodieMetadata(ExternalSpillableMap.DiskMapType diskMapType) throws IOException, URISyntaxException {
@MethodSource("testArguments")
public void testDataCorrectnessWithoutHoodieMetadata(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = SchemaTestUtil.getSimpleSchema();
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(),
new HoodieRecordSizeEstimator(schema), diskMapType); // 16B
new HoodieRecordSizeEstimator(schema), diskMapType, isCompressionEnabled); // 16B
List<String> recordKeys = new ArrayList<>();
// Ensure we spill to disk
@@ -338,4 +340,13 @@ public class TestExternalSpillableMap extends HoodieCommonTestHarness {
// TODO : come up with a performance eval test for spillableMap
@Test
public void testLargeInsertUpsert() {}
}
private static Stream<Arguments> testArguments() {
return Stream.of(
arguments(ExternalSpillableMap.DiskMapType.BITCASK, false),
arguments(ExternalSpillableMap.DiskMapType.ROCKS_DB, false),
arguments(ExternalSpillableMap.DiskMapType.UNKNOWN, false),
arguments(ExternalSpillableMap.DiskMapType.BITCASK, true)
);
}
}