Making ExternalSpillableMap generic for any datatype
- Introduced concept of converters to be able to serde generic datatype for SpillableMap - Fixed/Added configs to Hoodie Configs - Changed HoodieMergeHandle to start using SpillableMap
This commit is contained in:
committed by
vinoth chandar
parent
fa787ab5ab
commit
987f5d6b96
@@ -16,7 +16,6 @@
|
||||
|
||||
package com.uber.hoodie.common.table.log;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import com.uber.hoodie.common.model.HoodieKey;
|
||||
import com.uber.hoodie.common.model.HoodieLogFile;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
@@ -29,8 +28,9 @@ import com.uber.hoodie.common.table.log.block.HoodieDeleteBlock;
|
||||
import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
|
||||
import com.uber.hoodie.common.util.SpillableMapUtils;
|
||||
import com.uber.hoodie.common.util.collection.ExternalSpillableMap;
|
||||
import com.uber.hoodie.common.util.collection.converter.StringConverter;
|
||||
import com.uber.hoodie.common.util.collection.converter.HoodieRecordConverter;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
@@ -48,6 +48,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.HeaderMetadataType.INSTANT_TIME;
|
||||
import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.HoodieLogBlockType.CORRUPT_BLOCK;
|
||||
@@ -102,8 +103,8 @@ public class HoodieCompactedLogRecordScanner implements
|
||||
|
||||
try {
|
||||
// Store merged records for all versions for this log file, set the in-memory footprint to maxInMemoryMapSize
|
||||
this.records = new ExternalSpillableMap<>(maxMemorySizeInBytes, readerSchema,
|
||||
payloadClassFQN, Optional.empty());
|
||||
this.records = new ExternalSpillableMap<>(maxMemorySizeInBytes, Optional.empty(),
|
||||
new StringConverter(), new HoodieRecordConverter(readerSchema, payloadClassFQN));
|
||||
// iterate over the paths
|
||||
HoodieLogFormatReader logFormatReaderWrapper =
|
||||
new HoodieLogFormatReader(fs,
|
||||
|
||||
@@ -19,11 +19,10 @@ import com.uber.hoodie.common.model.HoodieKey;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.model.HoodieRecordPayload;
|
||||
import com.uber.hoodie.common.util.collection.DiskBasedMap;
|
||||
import com.uber.hoodie.common.util.collection.converter.Converter;
|
||||
import com.uber.hoodie.common.util.collection.io.storage.SizeAwareDataOutputStream;
|
||||
import com.uber.hoodie.exception.HoodieCorruptedDataException;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.htrace.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
@@ -32,28 +31,23 @@ import java.util.zip.CRC32;
|
||||
|
||||
public class SpillableMapUtils {
|
||||
|
||||
public static ObjectMapper objectMapper = new ObjectMapper();
|
||||
/**
|
||||
* Using the schema and payload class, read and convert the bytes on disk to a HoodieRecord
|
||||
*
|
||||
* @param file
|
||||
* @param schema
|
||||
* @param payloadClazz
|
||||
* @param valuePosition
|
||||
* @param valueLength
|
||||
* @param <R>
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public static <R> R readFromDisk(RandomAccessFile file, Schema schema, String payloadClazz,
|
||||
long valuePosition, int valueLength) throws IOException {
|
||||
|
||||
public static byte[] readBytesFromDisk(RandomAccessFile file, long valuePosition, int valueLength) throws IOException {
|
||||
DiskBasedMap.FileEntry fileEntry = readInternal(file, valuePosition, valueLength);
|
||||
return (R) convertToHoodieRecordPayload(HoodieAvroUtils.bytesToAvro(fileEntry.getValue(), schema),
|
||||
payloadClazz);
|
||||
return fileEntry.getValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* |crc|timestamp|sizeOfKey|SizeOfValue|key|value|
|
||||
*
|
||||
* @param file
|
||||
* @param valuePosition
|
||||
* @param valueLength
|
||||
@@ -66,15 +60,15 @@ public class SpillableMapUtils {
|
||||
long timestamp = file.readLong();
|
||||
int keySize = file.readInt();
|
||||
int valueSize = file.readInt();
|
||||
byte [] key = new byte[keySize];
|
||||
byte[] key = new byte[keySize];
|
||||
file.read(key, 0, keySize);
|
||||
byte [] value = new byte[valueSize];
|
||||
if(!(valueSize == valueLength)) {
|
||||
byte[] value = new byte[valueSize];
|
||||
if (!(valueSize == valueLength)) {
|
||||
throw new HoodieCorruptedDataException("unequal size of payload written to external file, data may be corrupted");
|
||||
}
|
||||
file.read(value, 0, valueSize);
|
||||
long crcOfReadValue = generateChecksum(value);
|
||||
if(!(crc == crcOfReadValue)) {
|
||||
if (!(crc == crcOfReadValue)) {
|
||||
throw new HoodieCorruptedDataException("checksum of payload written to external disk does not match, " +
|
||||
"data may be corrupted");
|
||||
}
|
||||
@@ -83,7 +77,7 @@ public class SpillableMapUtils {
|
||||
|
||||
/**
|
||||
* Write Value and other metadata necessary to disk. Each entry has the following sequence of data
|
||||
*
|
||||
* <p>
|
||||
* |crc|timestamp|sizeOfKey|SizeOfValue|key|value|
|
||||
*
|
||||
* @param outputStream
|
||||
@@ -108,10 +102,11 @@ public class SpillableMapUtils {
|
||||
|
||||
/**
|
||||
* Generate a checksum for a given set of bytes
|
||||
*
|
||||
* @param data
|
||||
* @return
|
||||
*/
|
||||
public static long generateChecksum(byte [] data) {
|
||||
public static long generateChecksum(byte[] data) {
|
||||
CRC32 crc = new CRC32();
|
||||
crc.update(data);
|
||||
return crc.getValue();
|
||||
@@ -120,20 +115,19 @@ public class SpillableMapUtils {
|
||||
/**
|
||||
* Compute a bytes representation of the payload by serializing the contents
|
||||
* This is used to estimate the size of the payload (either in memory or when written to disk)
|
||||
*
|
||||
* @param <R>
|
||||
* @param value
|
||||
* @param schema
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public static <R> int computePayloadSize(R value, Schema schema) throws IOException {
|
||||
HoodieRecord payload = (HoodieRecord) value;
|
||||
byte [] val = HoodieAvroUtils.avroToBytes((GenericRecord) payload.getData().getInsertValue(schema).get());
|
||||
return val.length;
|
||||
public static <R> long computePayloadSize(R value, Converter<R> valueConverter) throws IOException {
|
||||
return valueConverter.sizeEstimate(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to convert bytes to HoodieRecord using schema and payload class
|
||||
*
|
||||
* @param rec
|
||||
* @param payloadClazz
|
||||
* @param <R>
|
||||
|
||||
@@ -16,15 +16,14 @@
|
||||
|
||||
package com.uber.hoodie.common.util.collection;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
||||
import com.uber.hoodie.common.util.SpillableMapUtils;
|
||||
import com.uber.hoodie.common.util.collection.converter.Converter;
|
||||
import com.uber.hoodie.common.util.collection.io.storage.SizeAwareDataOutputStream;
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import com.uber.hoodie.exception.HoodieNotSupportedException;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
@@ -48,13 +47,11 @@ import java.util.concurrent.atomic.AtomicLong;
|
||||
* 1) An in-memory map that tracks the key-> latest ValueMetadata.
|
||||
* 2) Current position in the file
|
||||
* NOTE : Only String.class type supported for Key
|
||||
* @param <T>
|
||||
* @param <R>
|
||||
*/
|
||||
final public class DiskBasedMap<T,R> implements Map<T,R> {
|
||||
final public class DiskBasedMap<T, R> implements Map<T, R> {
|
||||
|
||||
// Stores the key and corresponding value's latest metadata spilled to disk
|
||||
final private Map<T, ValueMetadata> inMemoryMetadataOfSpilledData;
|
||||
final private Map<T, ValueMetadata> valueMetadataMap;
|
||||
// Read only file access to be able to seek to random positions to readFromDisk values
|
||||
private RandomAccessFile readOnlyFileHandle;
|
||||
// Write only OutputStream to be able to ONLY append to the file
|
||||
@@ -64,16 +61,20 @@ final public class DiskBasedMap<T,R> implements Map<T,R> {
|
||||
private FileOutputStream fileOutputStream;
|
||||
// Current position in the file
|
||||
private AtomicLong filePosition;
|
||||
// Schema used to de-serialize payload written to disk
|
||||
private Schema schema;
|
||||
// Class used to de-serialize/realize payload written to disk
|
||||
private String payloadClazz;
|
||||
// FilePath to store the spilled data
|
||||
private String filePath;
|
||||
// Default file path prefix to put the spillable file
|
||||
private static String DEFAULT_BASE_FILE_PATH = "/tmp/";
|
||||
// Key converter to convert key type to bytes
|
||||
final private Converter<T> keyConverter;
|
||||
// Value converter to convert value type to bytes
|
||||
final private Converter<R> valueConverter;
|
||||
|
||||
private static Logger log = LogManager.getLogger(DiskBasedMap.class);
|
||||
|
||||
|
||||
public final class ValueMetadata {
|
||||
|
||||
// FilePath to store the spilled data
|
||||
private String filePath;
|
||||
// Size (numberOfBytes) of the value written to disk
|
||||
@@ -108,6 +109,7 @@ final public class DiskBasedMap<T,R> implements Map<T,R> {
|
||||
}
|
||||
|
||||
public static final class FileEntry {
|
||||
|
||||
// Checksum of the value written to disk, compared during every readFromDisk to make sure no corruption
|
||||
private Long crc;
|
||||
// Size (numberOfBytes) of the key written to disk
|
||||
@@ -115,13 +117,14 @@ final public class DiskBasedMap<T,R> implements Map<T,R> {
|
||||
// Size (numberOfBytes) of the value written to disk
|
||||
private Integer sizeOfValue;
|
||||
// Actual key
|
||||
private byte [] key;
|
||||
private byte[] key;
|
||||
// Actual value
|
||||
private byte [] value;
|
||||
private byte[] value;
|
||||
// Current timestamp when the value was written to disk
|
||||
private Long timestamp;
|
||||
|
||||
public FileEntry(long crc, int sizeOfKey, int sizeOfValue, byte [] key, byte [] value, long timestamp) {
|
||||
public FileEntry(long crc, int sizeOfKey, int sizeOfValue, byte[] key, byte[] value,
|
||||
long timestamp) {
|
||||
this.crc = crc;
|
||||
this.sizeOfKey = sizeOfKey;
|
||||
this.sizeOfValue = sizeOfValue;
|
||||
@@ -155,10 +158,11 @@ final public class DiskBasedMap<T,R> implements Map<T,R> {
|
||||
}
|
||||
}
|
||||
|
||||
protected DiskBasedMap(Schema schema, String payloadClazz, Optional<String> baseFilePath) throws IOException {
|
||||
this.inMemoryMetadataOfSpilledData = new HashMap<>();
|
||||
protected DiskBasedMap(Optional<String> baseFilePath,
|
||||
Converter<T> keyConverter, Converter<R> valueConverter) throws IOException {
|
||||
this.valueMetadataMap = new HashMap<>();
|
||||
|
||||
if(!baseFilePath.isPresent()) {
|
||||
if (!baseFilePath.isPresent()) {
|
||||
baseFilePath = Optional.of(DEFAULT_BASE_FILE_PATH);
|
||||
}
|
||||
this.filePath = baseFilePath.get() + UUID.randomUUID().toString();
|
||||
@@ -168,16 +172,18 @@ final public class DiskBasedMap<T,R> implements Map<T,R> {
|
||||
this.fileOutputStream = new FileOutputStream(writeOnlyFileHandle, true);
|
||||
this.writeOnlyFileHandle = new SizeAwareDataOutputStream(fileOutputStream);
|
||||
this.filePosition = new AtomicLong(0L);
|
||||
this.schema = schema;
|
||||
this.payloadClazz = payloadClazz;
|
||||
this.keyConverter = keyConverter;
|
||||
this.valueConverter = valueConverter;
|
||||
}
|
||||
|
||||
private void initFile(File writeOnlyFileHandle) throws IOException {
|
||||
// delete the file if it exists
|
||||
if(writeOnlyFileHandle.exists()) {
|
||||
if (writeOnlyFileHandle.exists()) {
|
||||
writeOnlyFileHandle.delete();
|
||||
}
|
||||
writeOnlyFileHandle.createNewFile();
|
||||
|
||||
log.info("Spilling to file location " + writeOnlyFileHandle.getAbsolutePath());
|
||||
// Open file in readFromDisk-only mode
|
||||
readOnlyFileHandle = new RandomAccessFile(filePath, "r");
|
||||
readOnlyFileHandle.seek(0);
|
||||
@@ -194,12 +200,12 @@ final public class DiskBasedMap<T,R> implements Map<T,R> {
|
||||
Runtime.getRuntime().addShutdownHook(new Thread() {
|
||||
public void run() {
|
||||
try {
|
||||
if(writeOnlyFileHandle != null) {
|
||||
if (writeOnlyFileHandle != null) {
|
||||
writeOnlyFileHandle.flush();
|
||||
fileOutputStream.getChannel().force(false);
|
||||
writeOnlyFileHandle.close();
|
||||
}
|
||||
} catch(Exception e) {
|
||||
} catch (Exception e) {
|
||||
// fail silently for any sort of exception
|
||||
}
|
||||
}
|
||||
@@ -208,16 +214,14 @@ final public class DiskBasedMap<T,R> implements Map<T,R> {
|
||||
|
||||
/**
|
||||
* Custom iterator to iterate over values written to disk
|
||||
* @return
|
||||
*/
|
||||
public Iterator<R> iterator() {
|
||||
return new LazyFileIterable(readOnlyFileHandle,
|
||||
inMemoryMetadataOfSpilledData, schema, payloadClazz).iterator();
|
||||
valueMetadataMap, valueConverter).iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of bytes spilled to disk
|
||||
* @return
|
||||
*/
|
||||
public long sizeOfFileOnDiskInBytes() {
|
||||
return filePosition.get();
|
||||
@@ -225,17 +229,17 @@ final public class DiskBasedMap<T,R> implements Map<T,R> {
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return inMemoryMetadataOfSpilledData.size();
|
||||
return valueMetadataMap.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return inMemoryMetadataOfSpilledData.isEmpty();
|
||||
return valueMetadataMap.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsKey(Object key) {
|
||||
return inMemoryMetadataOfSpilledData.containsKey(key);
|
||||
return valueMetadataMap.containsKey(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -245,34 +249,31 @@ final public class DiskBasedMap<T,R> implements Map<T,R> {
|
||||
|
||||
@Override
|
||||
public R get(Object key) {
|
||||
ValueMetadata entry = inMemoryMetadataOfSpilledData.get(key);
|
||||
if(entry == null) {
|
||||
ValueMetadata entry = valueMetadataMap.get(key);
|
||||
if (entry == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return SpillableMapUtils.readFromDisk(readOnlyFileHandle, schema,
|
||||
payloadClazz, entry.getOffsetOfValue(), entry.getSizeOfValue());
|
||||
} catch(IOException e) {
|
||||
return this.valueConverter.getData(SpillableMapUtils.readBytesFromDisk(readOnlyFileHandle,
|
||||
entry.getOffsetOfValue(), entry.getSizeOfValue()));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Unable to readFromDisk Hoodie Record from disk", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public R put(T key, R value) {
|
||||
//TODO (na) : check value instanceof HoodieRecordPayload, now assume every payload is HoodieRecord
|
||||
HoodieRecord payload = (HoodieRecord) value;
|
||||
try {
|
||||
byte [] val = HoodieAvroUtils.avroToBytes((GenericRecord) payload.getData().getInsertValue(this.schema).get());
|
||||
byte[] val = this.valueConverter.getBytes(value);
|
||||
Integer valueSize = val.length;
|
||||
Long timestamp = new Date().getTime();
|
||||
this.inMemoryMetadataOfSpilledData.put(key, new DiskBasedMap.ValueMetadata(this.filePath, valueSize,
|
||||
filePosition.get(), timestamp));
|
||||
// TODO(na) : Test serializer performance for generic types
|
||||
String serializedKey = SpillableMapUtils.objectMapper.writeValueAsString(key);
|
||||
this.valueMetadataMap.put(key,
|
||||
new DiskBasedMap.ValueMetadata(this.filePath, valueSize, filePosition.get(), timestamp));
|
||||
byte[] serializedKey = keyConverter.getBytes(key);
|
||||
filePosition.set(SpillableMapUtils.spillToDisk(writeOnlyFileHandle,
|
||||
new FileEntry(SpillableMapUtils.generateChecksum(val),
|
||||
serializedKey.getBytes().length, valueSize, serializedKey.getBytes(), val, timestamp)));
|
||||
} catch(IOException io) {
|
||||
serializedKey.length, valueSize, serializedKey, val, timestamp)));
|
||||
} catch (IOException io) {
|
||||
throw new HoodieIOException("Unable to store data in Disk Based map", io);
|
||||
}
|
||||
return value;
|
||||
@@ -281,33 +282,33 @@ final public class DiskBasedMap<T,R> implements Map<T,R> {
|
||||
@Override
|
||||
public R remove(Object key) {
|
||||
R value = get(key);
|
||||
inMemoryMetadataOfSpilledData.remove(key);
|
||||
valueMetadataMap.remove(key);
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putAll(Map<? extends T, ? extends R> m) {
|
||||
for(Map.Entry<? extends T, ? extends R> entry: m.entrySet()) {
|
||||
for (Map.Entry<? extends T, ? extends R> entry : m.entrySet()) {
|
||||
put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
inMemoryMetadataOfSpilledData.clear();
|
||||
valueMetadataMap.clear();
|
||||
// close input/output streams
|
||||
try {
|
||||
writeOnlyFileHandle.flush();
|
||||
writeOnlyFileHandle.close();
|
||||
new File(filePath).delete();
|
||||
} catch(IOException e) {
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("unable to clear map or delete file on disk", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<T> keySet() {
|
||||
return inMemoryMetadataOfSpilledData.keySet();
|
||||
return valueMetadataMap.keySet();
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -318,7 +319,7 @@ final public class DiskBasedMap<T,R> implements Map<T,R> {
|
||||
@Override
|
||||
public Set<Entry<T, R>> entrySet() {
|
||||
Set<Entry<T, R>> entrySet = new HashSet<>();
|
||||
for(T key: inMemoryMetadataOfSpilledData.keySet()) {
|
||||
for (T key : valueMetadataMap.keySet()) {
|
||||
entrySet.add(new AbstractMap.SimpleEntry<>(key, get(key)));
|
||||
}
|
||||
return entrySet;
|
||||
|
||||
@@ -16,10 +16,10 @@
|
||||
|
||||
package com.uber.hoodie.common.util.collection;
|
||||
|
||||
import com.uber.hoodie.common.util.SpillableMapUtils;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import com.twitter.common.objectsize.ObjectSizeCalculator;
|
||||
import com.uber.hoodie.common.util.collection.converter.Converter;
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
import com.uber.hoodie.exception.HoodieNotSupportedException;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@@ -33,56 +33,54 @@ import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* An external map that spills content to disk when there is insufficient space for it
|
||||
* to grow.
|
||||
*
|
||||
* This map holds 2 types of data structures :
|
||||
*
|
||||
* (1) Key-Value pairs in a in-memory map
|
||||
* (2) Key-ValueMetadata pairs in an in-memory map which keeps a marker to the values spilled to disk
|
||||
*
|
||||
* NOTE : Values are only appended to disk. If a remove() is called, the entry is marked removed from the in-memory
|
||||
* key-valueMetadata map but it's values will be lying around in the temp file on disk until the file is cleaned.
|
||||
*
|
||||
* The setting of the spill threshold faces the following trade-off: If the spill threshold is
|
||||
* too high, the in-memory map may occupy more memory than is available, resulting in OOM.
|
||||
* However, if the spill threshold is too low, we spill frequently and incur unnecessary disk
|
||||
* writes.
|
||||
* @param <T>
|
||||
* @param <R>
|
||||
* An external map that spills content to disk when there is insufficient space for it to grow. <p>
|
||||
* This map holds 2 types of data structures : <p> (1) Key-Value pairs in a in-memory map (2)
|
||||
* Key-ValueMetadata pairs in an in-memory map which keeps a marker to the values spilled to disk
|
||||
* <p> NOTE : Values are only appended to disk. If a remove() is called, the entry is marked removed
|
||||
* from the in-memory key-valueMetadata map but it's values will be lying around in the temp file on
|
||||
* disk until the file is cleaned. <p> The setting of the spill threshold faces the following
|
||||
* trade-off: If the spill threshold is too high, the in-memory map may occupy more memory than is
|
||||
* available, resulting in OOM. However, if the spill threshold is too low, we spill frequently and
|
||||
* incur unnecessary disk writes.
|
||||
*/
|
||||
public class ExternalSpillableMap<T,R> implements Map<T,R> {
|
||||
public class ExternalSpillableMap<T, R> implements Map<T, R> {
|
||||
|
||||
// Find the actual estimated payload size after inserting N records
|
||||
final private static int NUMBER_OF_RECORDS_TO_ESTIMATE_PAYLOAD_SIZE = 100;
|
||||
// maximum space allowed in-memory for this map
|
||||
final private long maxInMemorySizeInBytes;
|
||||
// current space occupied by this map in-memory
|
||||
private Long currentInMemoryMapSize;
|
||||
// Map to store key-values in memory until it hits maxInMemorySizeInBytes
|
||||
final private Map<T,R> inMemoryMap;
|
||||
final private Map<T, R> inMemoryMap;
|
||||
// Map to store key-valuemetadata important to find the values spilled to disk
|
||||
final private DiskBasedMap<T,R> diskBasedMap;
|
||||
// Schema used to de-serialize and readFromDisk the records written to disk
|
||||
final private Schema schema;
|
||||
final private DiskBasedMap<T, R> diskBasedMap;
|
||||
// An estimate of the size of each payload written to this map
|
||||
private volatile long estimatedPayloadSize = 0;
|
||||
// TODO(na) : a dynamic sizing factor to ensure we have space for other objects in memory and incorrect payload estimation
|
||||
final private Double sizingFactorForInMemoryMap = 0.8;
|
||||
// Key converter to convert key type to bytes
|
||||
final private Converter<T> keyConverter;
|
||||
// Value converter to convert value type to bytes
|
||||
final private Converter<R> valueConverter;
|
||||
// Flag to determine whether to stop re-estimating payload size
|
||||
private boolean shouldEstimatePayloadSize = true;
|
||||
|
||||
private static Logger log = LogManager.getLogger(ExternalSpillableMap.class);
|
||||
|
||||
|
||||
public ExternalSpillableMap(Long maxInMemorySizeInBytes, Schema schema,
|
||||
String payloadClazz, Optional<String> baseFilePath) throws IOException {
|
||||
public ExternalSpillableMap(Long maxInMemorySizeInBytes, Optional<String> baseFilePath,
|
||||
Converter<T> keyConverter, Converter<R> valueConverter) throws IOException {
|
||||
this.inMemoryMap = new HashMap<>();
|
||||
this.diskBasedMap = new DiskBasedMap<>(schema, payloadClazz, baseFilePath);
|
||||
this.maxInMemorySizeInBytes = (long) Math.floor(maxInMemorySizeInBytes*sizingFactorForInMemoryMap);
|
||||
this.schema = schema;
|
||||
this.diskBasedMap = new DiskBasedMap<>(baseFilePath, keyConverter, valueConverter);
|
||||
this.maxInMemorySizeInBytes = (long) Math
|
||||
.floor(maxInMemorySizeInBytes * sizingFactorForInMemoryMap);
|
||||
this.currentInMemoryMapSize = 0L;
|
||||
this.keyConverter = keyConverter;
|
||||
this.valueConverter = valueConverter;
|
||||
}
|
||||
|
||||
/**
|
||||
* A custom iterator to wrap over iterating in-memory + disk spilled data
|
||||
* @return
|
||||
*/
|
||||
public Iterator<R> iterator() {
|
||||
return new IteratorWrapper<>(inMemoryMap.values().iterator(), diskBasedMap.iterator());
|
||||
@@ -90,7 +88,6 @@ public class ExternalSpillableMap<T,R> implements Map<T,R> {
|
||||
|
||||
/**
|
||||
* Number of entries in DiskBasedMap
|
||||
* @return
|
||||
*/
|
||||
public int getDiskBasedMapNumEntries() {
|
||||
return diskBasedMap.size();
|
||||
@@ -98,7 +95,6 @@ public class ExternalSpillableMap<T,R> implements Map<T,R> {
|
||||
|
||||
/**
|
||||
* Number of bytes spilled to disk
|
||||
* @return
|
||||
*/
|
||||
public long getSizeOfFileOnDiskInBytes() {
|
||||
return diskBasedMap.sizeOfFileOnDiskInBytes();
|
||||
@@ -106,7 +102,6 @@ public class ExternalSpillableMap<T,R> implements Map<T,R> {
|
||||
|
||||
/**
|
||||
* Number of entries in InMemoryMap
|
||||
* @return
|
||||
*/
|
||||
public int getInMemoryMapNumEntries() {
|
||||
return inMemoryMap.size();
|
||||
@@ -114,7 +109,6 @@ public class ExternalSpillableMap<T,R> implements Map<T,R> {
|
||||
|
||||
/**
|
||||
* Approximate memory footprint of the in-memory map
|
||||
* @return
|
||||
*/
|
||||
public long getCurrentInMemoryMapSize() {
|
||||
return currentInMemoryMapSize;
|
||||
@@ -142,9 +136,9 @@ public class ExternalSpillableMap<T,R> implements Map<T,R> {
|
||||
|
||||
@Override
|
||||
public R get(Object key) {
|
||||
if(inMemoryMap.containsKey(key)) {
|
||||
if (inMemoryMap.containsKey(key)) {
|
||||
return inMemoryMap.get(key);
|
||||
} else if(diskBasedMap.containsKey(key)) {
|
||||
} else if (diskBasedMap.containsKey(key)) {
|
||||
return diskBasedMap.get(key);
|
||||
}
|
||||
return null;
|
||||
@@ -152,33 +146,43 @@ public class ExternalSpillableMap<T,R> implements Map<T,R> {
|
||||
|
||||
@Override
|
||||
public R put(T key, R value) {
|
||||
try {
|
||||
if (this.currentInMemoryMapSize < maxInMemorySizeInBytes || inMemoryMap.containsKey(key)) {
|
||||
// Naive approach for now
|
||||
if (estimatedPayloadSize == 0) {
|
||||
this.estimatedPayloadSize = SpillableMapUtils.computePayloadSize(value, schema);
|
||||
log.info("Estimated Payload size => " + estimatedPayloadSize);
|
||||
}
|
||||
if(!inMemoryMap.containsKey(key)) {
|
||||
currentInMemoryMapSize += this.estimatedPayloadSize;
|
||||
}
|
||||
inMemoryMap.put(key, value);
|
||||
} else {
|
||||
diskBasedMap.put(key, value);
|
||||
if (this.currentInMemoryMapSize < maxInMemorySizeInBytes || inMemoryMap.containsKey(key)) {
|
||||
if (shouldEstimatePayloadSize && estimatedPayloadSize == 0) {
|
||||
// At first, use the sizeEstimate of a record being inserted into the spillable map.
|
||||
// Note, the converter may over estimate the size of a record in the JVM
|
||||
this.estimatedPayloadSize =
|
||||
keyConverter.sizeEstimate(key) + valueConverter.sizeEstimate(value);
|
||||
log.info("Estimated Payload size => " + estimatedPayloadSize);
|
||||
}
|
||||
return value;
|
||||
} catch(IOException io) {
|
||||
throw new HoodieIOException("Unable to estimate size of payload", io);
|
||||
else if(shouldEstimatePayloadSize &&
|
||||
inMemoryMap.size() % NUMBER_OF_RECORDS_TO_ESTIMATE_PAYLOAD_SIZE == 0) {
|
||||
// Re-estimate the size of a record by calculating the size of the entire map containing
|
||||
// N entries and then dividing by the number of entries present (N). This helps to get a
|
||||
// correct estimation of the size of each record in the JVM.
|
||||
long totalMapSize = ObjectSizeCalculator.getObjectSize(inMemoryMap);
|
||||
this.currentInMemoryMapSize = totalMapSize;
|
||||
this.estimatedPayloadSize = totalMapSize/inMemoryMap.size();
|
||||
shouldEstimatePayloadSize = false;
|
||||
log.info("New Estimated Payload size => " + this.estimatedPayloadSize);
|
||||
}
|
||||
if (!inMemoryMap.containsKey(key)) {
|
||||
// TODO : Add support for adjusting payloadSize for updates to the same key
|
||||
currentInMemoryMapSize += this.estimatedPayloadSize;
|
||||
}
|
||||
inMemoryMap.put(key, value);
|
||||
} else {
|
||||
diskBasedMap.put(key, value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public R remove(Object key) {
|
||||
// NOTE : diskBasedMap.remove does not delete the data from disk
|
||||
if(inMemoryMap.containsKey(key)) {
|
||||
if (inMemoryMap.containsKey(key)) {
|
||||
currentInMemoryMapSize -= estimatedPayloadSize;
|
||||
return inMemoryMap.remove(key);
|
||||
} else if(diskBasedMap.containsKey(key)) {
|
||||
} else if (diskBasedMap.containsKey(key)) {
|
||||
return diskBasedMap.remove(key);
|
||||
}
|
||||
return null;
|
||||
@@ -186,7 +190,7 @@ public class ExternalSpillableMap<T,R> implements Map<T,R> {
|
||||
|
||||
@Override
|
||||
public void putAll(Map<? extends T, ? extends R> m) {
|
||||
for(Map.Entry<? extends T, ? extends R> entry: m.entrySet()) {
|
||||
for (Map.Entry<? extends T, ? extends R> entry : m.entrySet()) {
|
||||
put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
@@ -208,7 +212,7 @@ public class ExternalSpillableMap<T,R> implements Map<T,R> {
|
||||
|
||||
@Override
|
||||
public Collection<R> values() {
|
||||
if(diskBasedMap.isEmpty()) {
|
||||
if (diskBasedMap.isEmpty()) {
|
||||
return inMemoryMap.values();
|
||||
}
|
||||
throw new HoodieNotSupportedException("Cannot return all values in memory");
|
||||
@@ -226,7 +230,6 @@ public class ExternalSpillableMap<T,R> implements Map<T,R> {
|
||||
* Iterator that wraps iterating over all the values for this map
|
||||
* 1) inMemoryIterator - Iterates over all the data in-memory map
|
||||
* 2) diskLazyFileIterator - Iterates over all the data spilled to disk
|
||||
* @param <R>
|
||||
*/
|
||||
private class IteratorWrapper<R> implements Iterator<R> {
|
||||
|
||||
@@ -237,9 +240,10 @@ public class ExternalSpillableMap<T,R> implements Map<T,R> {
|
||||
this.inMemoryIterator = inMemoryIterator;
|
||||
this.diskLazyFileIterator = diskLazyFileIterator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if(inMemoryIterator.hasNext()) {
|
||||
if (inMemoryIterator.hasNext()) {
|
||||
return true;
|
||||
}
|
||||
return diskLazyFileIterator.hasNext();
|
||||
@@ -247,7 +251,7 @@ public class ExternalSpillableMap<T,R> implements Map<T,R> {
|
||||
|
||||
@Override
|
||||
public R next() {
|
||||
if(inMemoryIterator.hasNext()) {
|
||||
if (inMemoryIterator.hasNext()) {
|
||||
return inMemoryIterator.next();
|
||||
}
|
||||
return diskLazyFileIterator.next();
|
||||
|
||||
@@ -17,14 +17,13 @@
|
||||
package com.uber.hoodie.common.util.collection;
|
||||
|
||||
import com.uber.hoodie.common.util.SpillableMapUtils;
|
||||
import com.uber.hoodie.common.util.collection.converter.Converter;
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import org.apache.avro.Schema;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -33,57 +32,52 @@ import java.util.stream.Collectors;
|
||||
* Iterable to lazily fetch values spilled to disk.
|
||||
* This class uses RandomAccessFile to randomly access the position of
|
||||
* the latest value for a key spilled to disk and returns the result.
|
||||
* @param <T>
|
||||
*/
|
||||
public class LazyFileIterable<T> implements Iterable<T> {
|
||||
public class LazyFileIterable<T, R> implements Iterable<R> {
|
||||
|
||||
// Used to access the value written at a specific position in the file
|
||||
private RandomAccessFile readOnlyFileHandle;
|
||||
private final RandomAccessFile readOnlyFileHandle;
|
||||
// Stores the key and corresponding value's latest metadata spilled to disk
|
||||
private Map<T, DiskBasedMap.ValueMetadata> inMemoryMetadataOfSpilledData;
|
||||
// Schema used to de-serialize payload written to disk
|
||||
private Schema schema;
|
||||
// Class used to de-serialize/realize payload written to disk
|
||||
private String payloadClazz;
|
||||
private final Map<T, DiskBasedMap.ValueMetadata> inMemoryMetadataOfSpilledData;
|
||||
private final Converter<R> valueConverter;
|
||||
|
||||
public LazyFileIterable(RandomAccessFile file, Map<T, DiskBasedMap.ValueMetadata> map,
|
||||
Schema schema, String payloadClazz) {
|
||||
Converter<R> valueConverter) {
|
||||
this.readOnlyFileHandle = file;
|
||||
this.inMemoryMetadataOfSpilledData = map;
|
||||
this.schema = schema;
|
||||
this.payloadClazz = payloadClazz;
|
||||
this.valueConverter = valueConverter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<T> iterator() {
|
||||
public Iterator<R> iterator() {
|
||||
try {
|
||||
return new LazyFileIterator<>(readOnlyFileHandle, inMemoryMetadataOfSpilledData, schema, payloadClazz);
|
||||
} catch(IOException io) {
|
||||
return new LazyFileIterator<>(readOnlyFileHandle, inMemoryMetadataOfSpilledData,
|
||||
valueConverter);
|
||||
} catch (IOException io) {
|
||||
throw new HoodieException("Unable to initialize iterator for file on disk", io);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterator implementation for the iterable defined above.
|
||||
* @param <T>
|
||||
*/
|
||||
public class LazyFileIterator<T> implements Iterator<T> {
|
||||
public class LazyFileIterator<T, R> implements Iterator<R> {
|
||||
|
||||
private RandomAccessFile readOnlyFileHandle;
|
||||
private Schema schema;
|
||||
private String payloadClazz;
|
||||
private Iterator<Map.Entry<T, DiskBasedMap.ValueMetadata>> metadataIterator;
|
||||
private final Converter<R> valueConverter;
|
||||
|
||||
public LazyFileIterator(RandomAccessFile file, Map<T, DiskBasedMap.ValueMetadata> map,
|
||||
Schema schema, String payloadClazz) throws IOException {
|
||||
Converter<R> valueConverter) throws IOException {
|
||||
this.readOnlyFileHandle = file;
|
||||
this.schema = schema;
|
||||
this.payloadClazz = payloadClazz;
|
||||
this.valueConverter = valueConverter;
|
||||
// sort the map in increasing order of offset of value so disk seek is only in one(forward) direction
|
||||
this.metadataIterator = map
|
||||
.entrySet()
|
||||
.stream()
|
||||
.sorted((Map.Entry<T, DiskBasedMap.ValueMetadata> o1, Map.Entry<T, DiskBasedMap.ValueMetadata> o2) ->
|
||||
o1.getValue().getOffsetOfValue().compareTo(o2.getValue().getOffsetOfValue()))
|
||||
.sorted(
|
||||
(Map.Entry<T, DiskBasedMap.ValueMetadata> o1, Map.Entry<T, DiskBasedMap.ValueMetadata> o2) ->
|
||||
o1.getValue().getOffsetOfValue().compareTo(o2.getValue().getOffsetOfValue()))
|
||||
.collect(Collectors.toList()).iterator();
|
||||
}
|
||||
|
||||
@@ -93,12 +87,12 @@ public class LazyFileIterable<T> implements Iterable<T> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
public R next() {
|
||||
Map.Entry<T, DiskBasedMap.ValueMetadata> entry = this.metadataIterator.next();
|
||||
try {
|
||||
return SpillableMapUtils.readFromDisk(readOnlyFileHandle, schema,
|
||||
payloadClazz, entry.getValue().getOffsetOfValue(), entry.getValue().getSizeOfValue());
|
||||
} catch(IOException e) {
|
||||
return valueConverter.getData(SpillableMapUtils.readBytesFromDisk(readOnlyFileHandle,
|
||||
entry.getValue().getOffsetOfValue(), entry.getValue().getSizeOfValue()));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Unable to read hoodie record from value spilled to disk", e);
|
||||
}
|
||||
}
|
||||
@@ -109,7 +103,7 @@ public class LazyFileIterable<T> implements Iterable<T> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachRemaining(Consumer<? super T> action) {
|
||||
public void forEachRemaining(Consumer<? super R> action) {
|
||||
action.accept(next());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.util.collection.converter;
|
||||
|
||||
/**
|
||||
* A converter interface to getBytes or deserialize a payload. This is used in {@link
|
||||
* com.uber.hoodie.common.util.collection.ExternalSpillableMap} to spillToDisk
|
||||
*/
|
||||
public interface Converter<T> {
|
||||
|
||||
/**
|
||||
* This method is used to convert a payload to bytes
|
||||
*/
|
||||
byte[] getBytes(T t);
|
||||
|
||||
/**
|
||||
* This method is used to convert the serialized payload (in bytes) to the actual payload instance
|
||||
*/
|
||||
T getData(byte[] bytes);
|
||||
|
||||
/**
|
||||
* This method is used to estimate the size of a payload in memory
|
||||
*/
|
||||
long sizeEstimate(T t);
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.util.collection.converter;
|
||||
|
||||
import com.twitter.common.objectsize.ObjectSizeCalculator;
|
||||
import com.uber.hoodie.common.model.HoodieKey;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.model.HoodieRecordPayload;
|
||||
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
||||
import com.uber.hoodie.common.util.ReflectionUtils;
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
import com.uber.hoodie.exception.HoodieNotSerializableException;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.Arrays;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.commons.lang3.SerializationUtils;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* A default converter implementation for HoodieRecord
|
||||
*/
|
||||
public class HoodieRecordConverter<V> implements
|
||||
Converter<HoodieRecord<? extends HoodieRecordPayload>> {
|
||||
|
||||
// Schema used to get GenericRecord from HoodieRecordPayload then convert to bytes and vice-versa
|
||||
private final Schema schema;
|
||||
// The client implementation of HoodieRecordPayload used to re-create HoodieRecord from bytes
|
||||
private final String payloadClazz;
|
||||
|
||||
private static Logger log = LogManager.getLogger(HoodieRecordConverter.class);
|
||||
|
||||
public HoodieRecordConverter(Schema schema, String payloadClazz) {
|
||||
this.schema = schema;
|
||||
this.payloadClazz = payloadClazz;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getBytes(HoodieRecord hoodieRecord) {
|
||||
try {
|
||||
// Need to initialize this to 0 bytes since deletes are handled by putting an empty record in HoodieRecord
|
||||
byte[] val = new byte[0];
|
||||
if (hoodieRecord.getData().getInsertValue(schema).isPresent()) {
|
||||
val = HoodieAvroUtils
|
||||
.avroToBytes((GenericRecord) hoodieRecord.getData().getInsertValue(schema).get());
|
||||
}
|
||||
Pair<Pair<String, String>, byte[]> data =
|
||||
Pair.of(Pair.of(hoodieRecord.getKey().getRecordKey(),
|
||||
hoodieRecord.getKey().getPartitionPath()), val);
|
||||
return SerializationUtils.serialize(data);
|
||||
} catch (IOException io) {
|
||||
throw new HoodieNotSerializableException("Cannot serialize value to bytes", io);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieRecord getData(byte[] bytes) {
|
||||
try {
|
||||
Pair<Pair<String, String>, byte[]> data = SerializationUtils.deserialize(bytes);
|
||||
Optional<GenericRecord> payload = Optional.empty();
|
||||
if (data.getValue().length > 0) {
|
||||
// This can happen if the record is deleted, the payload is optional with 0 bytes
|
||||
payload = Optional.of(HoodieAvroUtils.bytesToAvro(data.getValue(), schema));
|
||||
}
|
||||
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(
|
||||
new HoodieKey(data.getKey().getKey(), data.getKey().getValue()),
|
||||
ReflectionUtils
|
||||
.loadPayload(payloadClazz,
|
||||
new Object[]{payload}, Optional.class));
|
||||
return hoodieRecord;
|
||||
} catch (IOException io) {
|
||||
throw new HoodieNotSerializableException("Cannot de-serialize value from bytes", io);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long sizeEstimate(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) {
|
||||
// Most HoodieRecords are bound to have data + schema. Although, the same schema object is shared amongst
|
||||
// all records in the JVM. Calculate and print the size of the Schema and of the Record to
|
||||
// note the sizes and differences. A correct estimation in such cases is handled in
|
||||
/** {@link com.uber.hoodie.common.util.collection.ExternalSpillableMap} **/
|
||||
long sizeOfRecord = ObjectSizeCalculator.getObjectSize(hoodieRecord);
|
||||
long sizeOfSchema = ObjectSizeCalculator.getObjectSize(schema);
|
||||
log.info("SizeOfRecord => " + sizeOfRecord + " SizeOfSchema => " + sizeOfSchema);
|
||||
return sizeOfRecord;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.util.collection.converter;
|
||||
|
||||
import com.twitter.common.objectsize.ObjectSizeCalculator;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
/**
|
||||
* A default converter implementation for String type of payload key
|
||||
*/
|
||||
public class StringConverter implements Converter<String> {
|
||||
|
||||
@Override
|
||||
public byte[] getBytes(String s) {
|
||||
return s.getBytes(StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getData(byte[] bytes) {
|
||||
return new String(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long sizeEstimate(String s) {
|
||||
return ObjectSizeCalculator.getObjectSize(s);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.exception;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class HoodieNotSerializableException extends HoodieException {
|
||||
|
||||
private IOException ioException;
|
||||
|
||||
public HoodieNotSerializableException(String msg, IOException t) {
|
||||
super(msg, t);
|
||||
this.ioException = t;
|
||||
}
|
||||
|
||||
public HoodieNotSerializableException(String msg) {
|
||||
super(msg);
|
||||
}
|
||||
|
||||
public IOException getIOException() {
|
||||
return ioException;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user