1
0

[HUDI-1486] Remove inline inflight rollback in hoodie writer (#2359)

1. Refactor rollback and move cleaning failed commits logic into cleaner
2. Introduce hoodie heartbeat to ascertain failed commits
3. Fix test cases
This commit is contained in:
n3nash
2021-02-19 20:12:22 -08:00
committed by GitHub
parent c9fcf964b2
commit ffcfb58bac
64 changed files with 1541 additions and 306 deletions

View File

@@ -18,6 +18,13 @@
package org.apache.hudi.common.functional;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieArchivedLogFile;
@@ -39,19 +46,12 @@ import org.apache.hudi.common.table.log.block.HoodieHFileDataBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
import org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType;
import org.apache.hudi.common.testutils.FileCreateUtils;
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.testutils.SchemaTestUtil;
import org.apache.hudi.common.testutils.minicluster.MiniClusterUtil;
import org.apache.hudi.exception.CorruptedLogFileException;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
@@ -472,7 +472,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
writer.appendBlock(dataBlock);
}
writer.close();
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
// scan all log blocks (across multiple log files)
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
.withFileSystem(fs)
@@ -626,6 +626,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
.withFileSystem(fs)
.withBasePath(basePath)
@@ -698,6 +700,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
FileCreateUtils.createDeltaCommit(basePath, "102", fs);
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
.withFileSystem(fs)
.withBasePath(basePath)
@@ -722,7 +726,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
}
@Test
public void testAvroLogRecordReaderWithRollbackPartialBlock()
public void testAvroLogRecordReaderWithFailedPartialBlock()
throws IOException, URISyntaxException, InterruptedException {
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
// Set a small threshold so that every block is a new version
@@ -761,17 +765,9 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
outputStream.flush();
outputStream.close();
// Rollback the last write
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "102");
header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "101");
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
writer =
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
writer.appendBlock(commandBlock);
// Write 3
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "103");
List<IndexedRecord> records3 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
@@ -787,6 +783,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
FileCreateUtils.createDeltaCommit(basePath, "103", fs);
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
.withFileSystem(fs)
.withBasePath(basePath)
@@ -857,6 +855,10 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
.map(s -> s.getPath().toString()).collect(Collectors.toList());
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
FileCreateUtils.createDeltaCommit(basePath, "101", fs);
FileCreateUtils.createDeltaCommit(basePath, "102", fs);
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
.withFileSystem(fs)
.withBasePath(basePath)
@@ -869,6 +871,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
.withBufferSize(bufferSize)
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
.build();
assertEquals(200, scanner.getTotalLogRecords(), "We still would read 200 records");
final List<String> readKeys = new ArrayList<>(200);
final List<Boolean> emptyPayloads = new ArrayList<>();
@@ -897,6 +900,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
writer.appendBlock(commandBlock);
FileCreateUtils.deleteDeltaCommit(basePath, "102", fs);
readKeys.clear();
scanner = HoodieMergedLogRecordScanner.newBuilder()
.withFileSystem(fs)
@@ -954,6 +959,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
writer.appendBlock(deleteBlock);
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
// Attempt 1 : Write rollback block for a failed write
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
@@ -990,6 +997,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
final List<String> readKeys = new ArrayList<>();
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
assertEquals(0, readKeys.size(), "Stream collect should return all 0 records");
FileCreateUtils.deleteDeltaCommit(basePath, "100", fs);
}
@ParameterizedTest
@@ -1023,6 +1031,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
writer.appendBlock(deleteBlock);
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
// Write 2 rollback blocks (1 data block + 1 delete bloc) for a failed write
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
@@ -1047,6 +1057,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
.build();
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
FileCreateUtils.deleteDeltaCommit(basePath, "100", fs);
}
@ParameterizedTest
@@ -1067,6 +1078,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
HoodieDataBlock dataBlock = getDataBlock(records1, header);
writer.appendBlock(dataBlock);
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
// Write invalid rollback for a failed write (possible for in-flight commits)
header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "101");
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
@@ -1130,8 +1143,11 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
writer.appendBlock(deleteBlock);
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
// Write 1 rollback block for a failed write
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101");
header.put(HeaderMetadataType.TARGET_INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
@@ -1179,6 +1195,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
writer.appendBlock(dataBlock);
writer.close();
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
// Append some arbit byte[] to the end of the log (mimics a partially written commit)
fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
@@ -1254,6 +1272,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
.build();
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
FileCreateUtils.deleteDeltaCommit(basePath, "100", fs);
}
/*
@@ -1304,6 +1323,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
// Get the size of the block
writer2.close();
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
// From the two log files generated, read the records
List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1",
HoodieLogFile.DELTA_EXTENSION, "100").map(s -> s.getPath().toString()).collect(Collectors.toList());
@@ -1398,6 +1419,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
writer.appendBlock(dataBlock);
writer.close();
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
HoodieLogFileReader reader = new HoodieLogFileReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(),
bufferSize, readBlocksLazily, true);
@@ -1446,6 +1469,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
writer.appendBlock(dataBlock);
writer.close();
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
// Append some arbit byte[] to thee end of the log (mimics a partially written commit)
fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
@@ -1522,6 +1547,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
writer.appendBlock(dataBlock);
writer.close();
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
HoodieLogFileReader reader = new HoodieLogFileReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(),
bufferSize, readBlocksLazily, true);

View File

@@ -201,7 +201,7 @@ public class TestHoodieActiveTimeline extends HoodieCommonTestHarness {
// return the correct set of Instant
checkTimeline.accept(timeline.getCommitsTimeline(),
CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
checkTimeline.accept(timeline.getCommitsAndCompactionTimeline(),
checkTimeline.accept(timeline.getWriteTimeline(),
CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
checkTimeline.accept(timeline.getCommitTimeline(), CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
checkTimeline.accept(timeline.getDeltaCommitTimeline(), Collections.singleton(HoodieTimeline.DELTA_COMMIT_ACTION));

View File

@@ -83,6 +83,17 @@ public class FileCreateUtils {
}
}
private static void createMetaFile(String basePath, String instantTime, String suffix, FileSystem fs) throws IOException {
org.apache.hadoop.fs.Path parentPath = new org.apache.hadoop.fs.Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
if (!fs.exists(parentPath)) {
fs.create(parentPath).close();
}
org.apache.hadoop.fs.Path metaFilePath = new org.apache.hadoop.fs.Path(parentPath, instantTime + suffix);
if (!fs.exists(metaFilePath)) {
fs.create(metaFilePath).close();
}
}
private static void createMetaFile(String basePath, String instantTime, String suffix, byte[] content) throws IOException {
Path parentPath = Paths.get(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
Files.createDirectories(parentPath);
@@ -92,10 +103,22 @@ public class FileCreateUtils {
}
}
private static void deleteMetaFile(String basePath, String instantTime, String suffix, FileSystem fs) throws IOException {
org.apache.hadoop.fs.Path parentPath = new org.apache.hadoop.fs.Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
org.apache.hadoop.fs.Path metaFilePath = new org.apache.hadoop.fs.Path(parentPath, instantTime + suffix);
if (fs.exists(metaFilePath)) {
fs.delete(metaFilePath, true);
}
}
public static void createCommit(String basePath, String instantTime) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.COMMIT_EXTENSION);
}
public static void createCommit(String basePath, String instantTime, FileSystem fs) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.COMMIT_EXTENSION, fs);
}
public static void createRequestedCommit(String basePath, String instantTime) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_COMMIT_EXTENSION);
}
@@ -108,6 +131,10 @@ public class FileCreateUtils {
createMetaFile(basePath, instantTime, HoodieTimeline.DELTA_COMMIT_EXTENSION);
}
public static void createDeltaCommit(String basePath, String instantTime, FileSystem fs) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.DELTA_COMMIT_EXTENSION, fs);
}
public static void createRequestedDeltaCommit(String basePath, String instantTime) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_DELTA_COMMIT_EXTENSION);
}
@@ -235,4 +262,8 @@ public class FileCreateUtils {
throw new HoodieException("Error reading hoodie table as a dataframe", e);
}
}
public static void deleteDeltaCommit(String basePath, String instantTime, FileSystem fs) throws IOException {
deleteMetaFile(basePath, instantTime, HoodieTimeline.DELTA_COMMIT_EXTENSION, fs);
}
}