1
0

Removing OLD MAGIC header since a) it's no longer used b) causes issues when the data actually has OLD MAGIC

This commit is contained in:
Nishith Agarwal
2019-04-19 11:21:45 -07:00
committed by vinoth chandar
parent 2f1e3e15fb
commit 26f24b6728
3 changed files with 10 additions and 125 deletions

View File

@@ -54,7 +54,6 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
private final FSDataInputStream inputStream;
private final HoodieLogFile logFile;
private static final byte[] oldMagicBuffer = new byte[4];
private static final byte[] magicBuffer = new byte[6];
private final Schema readerSchema;
private HoodieLogFormat.LogFormatVersion nextBlockVersion;
@@ -121,23 +120,11 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
Map<HeaderMetadataType, String> header = null;
try {
if (isOldMagic()) {
// 1 Read the block type for a log block
type = inputStream.readInt();
Preconditions.checkArgument(type < HoodieLogBlockType.values().length,
"Invalid block byte type found " + type);
blockType = HoodieLogBlockType.values()[type];
// 2 Read the total size of the block
blocksize = inputStream.readInt();
} else {
// 1 Read the total size of the block
blocksize = (int) inputStream.readLong();
}
// 1 Read the total size of the block
blocksize = (int) inputStream.readLong();
} catch (EOFException | CorruptedLogFileException e) {
// An exception reading any of the above indicates a corrupt block
// Create a corrupt block by finding the next OLD_MAGIC marker or EOF
// Create a corrupt block by finding the next MAGIC marker or EOF
return createCorruptBlock();
}
@@ -297,22 +284,12 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
}
/**
* Read log format version from log file, if present For old log files written with Magic header
* OLD_MAGIC and without version, return DEFAULT_VERSION
* Read log format version from log file.
*/
private HoodieLogFormat.LogFormatVersion readVersion() throws IOException {
// If not old log file format (written with Magic header OLD_MAGIC), then read log version
if (Arrays.equals(oldMagicBuffer, HoodieLogFormat.OLD_MAGIC)) {
Arrays.fill(oldMagicBuffer, (byte) 0);
return new HoodieLogFormatVersion(HoodieLogFormatVersion.DEFAULT_VERSION);
}
return new HoodieLogFormatVersion(inputStream.readInt());
}
private boolean isOldMagic() {
return Arrays.equals(oldMagicBuffer, HoodieLogFormat.OLD_MAGIC);
}
private boolean readMagic() throws IOException {
try {
@@ -334,13 +311,7 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
// 1. Read magic header from the start of the block
inputStream.readFully(magicBuffer, 0, 6);
if (!Arrays.equals(magicBuffer, HoodieLogFormat.MAGIC)) {
inputStream.seek(pos);
// 1. Read old magic header from the start of the block
// (for backwards compatibility of older log files written without log version)
inputStream.readFully(oldMagicBuffer, 0, 4);
if (!Arrays.equals(oldMagicBuffer, HoodieLogFormat.OLD_MAGIC)) {
return false;
}
return false;
}
return true;
}

View File

@@ -29,25 +29,16 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* File Format for Hoodie Log Files. The File Format consists of blocks each seperated with a
* OLD_MAGIC sync marker. A Block can either be a Data block, Command block or Delete Block. Data
* File Format for Hoodie Log Files. The File Format consists of blocks each separated with a
* MAGIC sync marker. A Block can either be a Data block, Command block or Delete Block. Data
* Block - Contains log records serialized as Avro Binary Format Command Block - Specific commands
* like RoLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written block Delete Block - List of
* like ROLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written block Delete Block - List of
* keys to delete - tombstone for keys
*/
public interface HoodieLogFormat {
/**
* Magic 4 bytes we put at the start of every block in the log file. Sync marker. We could make
* this file specific (generate a random 4 byte magic and stick it in the file header), but this I
* think is suffice for now - PR
*/
byte[] OLD_MAGIC = new byte[] {'H', 'U', 'D', 'I'};
/**
* Magic 6 bytes we put at the start of every block in the log file. This is added to maintain
* backwards compatiblity due to lack of log format/block version in older log files. All new log
* block will now write this OLD_MAGIC value
* Magic 6 bytes we put at the start of every block in the log file.
*/
byte[] MAGIC = new byte[] {'#', 'H', 'U', 'D', 'I', '#'};