[HUDI-1486] Remove inline inflight rollback in hoodie writer (#2359)
1. Refactor rollback and move cleaning failed commits logic into cleaner 2. Introduce hoodie heartbeat to ascertain failed commits 3. Fix test cases
This commit is contained in:
@@ -22,5 +22,5 @@ package org.apache.hudi.common.model;
|
||||
* Hoodie cleaning policies.
|
||||
*/
|
||||
public enum HoodieCleaningPolicy {
|
||||
KEEP_LATEST_FILE_VERSIONS, KEEP_LATEST_COMMITS
|
||||
KEEP_LATEST_FILE_VERSIONS, KEEP_LATEST_COMMITS;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.common.model;
|
||||
|
||||
/**
|
||||
* Policy controlling how to perform cleaning for failed writes.
|
||||
*/
|
||||
public enum HoodieFailedWritesCleaningPolicy {
|
||||
// performs cleaning of failed writes inline every write operation
|
||||
EAGER,
|
||||
// performs cleaning of failed writes lazily during clean
|
||||
LAZY,
|
||||
// Does not clean failed writes
|
||||
NEVER;
|
||||
|
||||
public boolean isEager() {
|
||||
return this == EAGER;
|
||||
}
|
||||
|
||||
public boolean isLazy() {
|
||||
return this == LAZY;
|
||||
}
|
||||
|
||||
public boolean isNever() {
|
||||
return this == NEVER;
|
||||
}
|
||||
}
|
||||
@@ -45,6 +45,7 @@ import org.apache.hadoop.fs.PathFilter;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
@@ -71,10 +72,10 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
private static final Logger LOG = LogManager.getLogger(HoodieTableMetaClient.class);
|
||||
public static final String METAFOLDER_NAME = ".hoodie";
|
||||
public static final String TEMPFOLDER_NAME = METAFOLDER_NAME + Path.SEPARATOR + ".temp";
|
||||
public static final String AUXILIARYFOLDER_NAME = METAFOLDER_NAME + Path.SEPARATOR + ".aux";
|
||||
public static final String BOOTSTRAP_INDEX_ROOT_FOLDER_PATH = AUXILIARYFOLDER_NAME + Path.SEPARATOR + ".bootstrap";
|
||||
|
||||
public static final String TEMPFOLDER_NAME = METAFOLDER_NAME + File.separator + ".temp";
|
||||
public static final String AUXILIARYFOLDER_NAME = METAFOLDER_NAME + File.separator + ".aux";
|
||||
public static final String BOOTSTRAP_INDEX_ROOT_FOLDER_PATH = AUXILIARYFOLDER_NAME + File.separator + ".bootstrap";
|
||||
public static final String HEARTBEAT_FOLDER_NAME = METAFOLDER_NAME + File.separator + ".heartbeat";
|
||||
public static final String BOOTSTRAP_INDEX_BY_PARTITION_FOLDER_PATH = BOOTSTRAP_INDEX_ROOT_FOLDER_PATH
|
||||
+ Path.SEPARATOR + ".partitions";
|
||||
public static final String BOOTSTRAP_INDEX_BY_FILE_ID_FOLDER_PATH = BOOTSTRAP_INDEX_ROOT_FOLDER_PATH + Path.SEPARATOR
|
||||
@@ -196,6 +197,13 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
return basePath + Path.SEPARATOR + AUXILIARYFOLDER_NAME;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Heartbeat folder path.
|
||||
*/
|
||||
public static String getHeartbeatFolderPath(String basePath) {
|
||||
return String.format("%s%s%s", basePath, File.separator, HEARTBEAT_FOLDER_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Bootstrap Index By Partition Folder
|
||||
*/
|
||||
@@ -516,7 +524,7 @@ public class HoodieTableMetaClient implements Serializable {
|
||||
case COPY_ON_WRITE:
|
||||
return getActiveTimeline().getCommitTimeline();
|
||||
case MERGE_ON_READ:
|
||||
return getActiveTimeline().getCommitsAndCompactionTimeline();
|
||||
return getActiveTimeline().getWriteTimeline();
|
||||
default:
|
||||
throw new HoodieException("Unsupported table type :" + this.getTableType());
|
||||
}
|
||||
|
||||
@@ -18,6 +18,11 @@
|
||||
|
||||
package org.apache.hudi.common.table.log;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieLogFile;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
@@ -33,12 +38,6 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.SpillableMapUtils;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@@ -53,6 +52,7 @@ import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType.INSTANT_TIME;
|
||||
import static org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType.COMMAND_BLOCK;
|
||||
import static org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType.CORRUPT_BLOCK;
|
||||
|
||||
/**
|
||||
@@ -125,6 +125,9 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
*/
|
||||
public void scan() {
|
||||
HoodieLogFormatReader logFormatReaderWrapper = null;
|
||||
HoodieTimeline commitsTimeline = this.hoodieTableMetaClient.getCommitsTimeline();
|
||||
HoodieTimeline completedInstantsTimeline = commitsTimeline.filterCompletedInstants();
|
||||
HoodieTimeline inflightInstantsTimeline = commitsTimeline.filterInflights();
|
||||
try {
|
||||
// iterate over the paths
|
||||
logFormatReaderWrapper = new HoodieLogFormatReader(fs,
|
||||
@@ -145,6 +148,14 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
// hit a block with instant time greater than should be processed, stop processing further
|
||||
break;
|
||||
}
|
||||
if (r.getBlockType() != CORRUPT_BLOCK && r.getBlockType() != COMMAND_BLOCK) {
|
||||
String instantTime = r.getLogBlockHeader().get(INSTANT_TIME);
|
||||
if (!completedInstantsTimeline.containsOrBeforeTimelineStarts(instantTime)
|
||||
|| inflightInstantsTimeline.containsInstant(instantTime)) {
|
||||
// hit an uncommitted block possibly from a failed write, move to the next one and skip processing this one
|
||||
continue;
|
||||
}
|
||||
}
|
||||
switch (r.getBlockType()) {
|
||||
case HFILE_DATA_BLOCK:
|
||||
case AVRO_DATA_BLOCK:
|
||||
|
||||
@@ -69,9 +69,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
INFLIGHT_COMPACTION_EXTENSION, REQUESTED_COMPACTION_EXTENSION,
|
||||
INFLIGHT_RESTORE_EXTENSION, RESTORE_EXTENSION,
|
||||
ROLLBACK_EXTENSION, INFLIGHT_ROLLBACK_EXTENSION,
|
||||
REQUESTED_REPLACE_COMMIT_EXTENSION, INFLIGHT_REPLACE_COMMIT_EXTENSION, REPLACE_COMMIT_EXTENSION
|
||||
));
|
||||
|
||||
REQUESTED_REPLACE_COMMIT_EXTENSION, INFLIGHT_REPLACE_COMMIT_EXTENSION, REPLACE_COMMIT_EXTENSION));
|
||||
private static final Logger LOG = LogManager.getLogger(HoodieActiveTimeline.class);
|
||||
protected HoodieTableMetaClient metaClient;
|
||||
private static AtomicReference<String> lastInstantTime = new AtomicReference<>(String.valueOf(Integer.MIN_VALUE));
|
||||
|
||||
@@ -107,7 +107,7 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieDefaultTimeline getCommitsAndCompactionTimeline() {
|
||||
public HoodieDefaultTimeline getWriteTimeline() {
|
||||
Set<String> validActions = CollectionUtils.createSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, COMPACTION_ACTION, REPLACE_COMMIT_ACTION);
|
||||
return new HoodieDefaultTimeline(instants.stream().filter(s -> validActions.contains(s.getAction())), details);
|
||||
}
|
||||
@@ -280,6 +280,11 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
|
||||
return instants.stream().anyMatch(s -> s.equals(instant));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsInstant(String ts) {
|
||||
return instants.stream().anyMatch(s -> s.getTimestamp().equals(ts));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsOrBeforeTimelineStarts(String instant) {
|
||||
return instants.stream().anyMatch(s -> s.getTimestamp().equals(instant)) || isBeforeTimelineStarts(instant);
|
||||
|
||||
@@ -131,11 +131,11 @@ public interface HoodieTimeline extends Serializable {
|
||||
HoodieTimeline filterCompletedAndCompactionInstants();
|
||||
|
||||
/**
|
||||
* Timeline to just include commits (commit/deltacommit) and compaction actions.
|
||||
* Timeline to just include commits (commit/deltacommit), compaction and replace actions.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
HoodieTimeline getCommitsAndCompactionTimeline();
|
||||
HoodieTimeline getWriteTimeline();
|
||||
|
||||
/**
|
||||
* Timeline to just include replace instants that have valid (commit/deltacommit) actions.
|
||||
@@ -156,7 +156,6 @@ public interface HoodieTimeline extends Serializable {
|
||||
*/
|
||||
HoodieTimeline filterPendingReplaceTimeline();
|
||||
|
||||
|
||||
/**
|
||||
* Create a new Timeline with all the instants after startTs.
|
||||
*/
|
||||
@@ -227,6 +226,11 @@ public interface HoodieTimeline extends Serializable {
|
||||
*/
|
||||
boolean containsInstant(HoodieInstant instant);
|
||||
|
||||
/**
|
||||
* @return true if the passed instant is present as a completed instant on the timeline
|
||||
*/
|
||||
boolean containsInstant(String ts);
|
||||
|
||||
/**
|
||||
* @return true if the passed instant is present as a completed instant on the timeline or if the instant is before
|
||||
* the first completed instant in the timeline
|
||||
|
||||
@@ -49,7 +49,7 @@ public class TimelineUtils {
|
||||
* Does not include internal operations such as clean in the timeline.
|
||||
*/
|
||||
public static List<String> getPartitionsWritten(HoodieTimeline timeline) {
|
||||
HoodieTimeline timelineToSync = timeline.getCommitsAndCompactionTimeline();
|
||||
HoodieTimeline timelineToSync = timeline.getWriteTimeline();
|
||||
return getAffectedPartitions(timelineToSync);
|
||||
}
|
||||
|
||||
|
||||
@@ -118,7 +118,7 @@ public abstract class AbstractTableFileSystemView implements SyncableFileSystemV
|
||||
* @param visibleActiveTimeline Visible Active Timeline
|
||||
*/
|
||||
protected void refreshTimeline(HoodieTimeline visibleActiveTimeline) {
|
||||
this.visibleCommitsAndCompactionTimeline = visibleActiveTimeline.getCommitsAndCompactionTimeline();
|
||||
this.visibleCommitsAndCompactionTimeline = visibleActiveTimeline.getWriteTimeline();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -25,8 +25,10 @@ import org.apache.hudi.avro.model.HoodieCleanPartitionMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
||||
import org.apache.hudi.common.HoodieCleanStat;
|
||||
import org.apache.hudi.common.model.CleanFileInfo;
|
||||
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||
import org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataMigrator;
|
||||
import org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataV1MigrationHandler;
|
||||
@@ -38,6 +40,8 @@ import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION;
|
||||
|
||||
public class CleanerUtils {
|
||||
public static final Integer CLEAN_METADATA_VERSION_1 = CleanMetadataV1MigrationHandler.VERSION;
|
||||
public static final Integer CLEAN_METADATA_VERSION_2 = CleanMetadataV2MigrationHandler.VERSION;
|
||||
@@ -112,4 +116,36 @@ public class CleanerUtils {
|
||||
public static List<HoodieCleanFileInfo> convertToHoodieCleanFileInfoList(List<CleanFileInfo> cleanFileInfoList) {
|
||||
return cleanFileInfoList.stream().map(CleanFileInfo::toHoodieFileCleanInfo).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute {@link HoodieFailedWritesCleaningPolicy} to rollback failed writes for different actions.
|
||||
* @param cleaningPolicy
|
||||
* @param actionType
|
||||
* @param rollbackFailedWritesFunc
|
||||
*/
|
||||
public static void rollbackFailedWrites(HoodieFailedWritesCleaningPolicy cleaningPolicy, String actionType,
|
||||
Functions.Function0<Boolean> rollbackFailedWritesFunc) {
|
||||
switch (actionType) {
|
||||
case HoodieTimeline.CLEAN_ACTION:
|
||||
if (cleaningPolicy.isEager()) {
|
||||
// No need to do any special cleanup for failed operations during clean
|
||||
return;
|
||||
} else if (cleaningPolicy.isLazy()) {
|
||||
// Perform rollback of failed operations for all types of actions during clean
|
||||
rollbackFailedWritesFunc.apply();
|
||||
return;
|
||||
}
|
||||
// No action needed for cleaning policy NEVER
|
||||
break;
|
||||
case COMMIT_ACTION:
|
||||
// For any other actions, perform rollback of failed writes
|
||||
if (cleaningPolicy.isEager()) {
|
||||
rollbackFailedWritesFunc.apply();
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unsupported action type " + actionType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.exception;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown for Hoodie hearbeat failures. The root of the exception hierarchy.
|
||||
* </p>
|
||||
* <p>
|
||||
* Hoodie Write/Read clients will throw this exception if any of its operations fail. This is a runtime (unchecked)
|
||||
* exception.
|
||||
* </p>
|
||||
*/
|
||||
public class HoodieHeartbeatException extends RuntimeException implements Serializable {
|
||||
|
||||
public HoodieHeartbeatException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public HoodieHeartbeatException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public HoodieHeartbeatException(String message, Throwable t) {
|
||||
super(message, t);
|
||||
}
|
||||
|
||||
public HoodieHeartbeatException(Throwable t) {
|
||||
super(t);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -18,6 +18,13 @@
|
||||
|
||||
package org.apache.hudi.common.functional;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieArchivedLogFile;
|
||||
@@ -39,19 +46,12 @@ import org.apache.hudi.common.table.log.block.HoodieHFileDataBlock;
|
||||
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
|
||||
import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
|
||||
import org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType;
|
||||
import org.apache.hudi.common.testutils.FileCreateUtils;
|
||||
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
|
||||
import org.apache.hudi.common.testutils.HoodieTestUtils;
|
||||
import org.apache.hudi.common.testutils.SchemaTestUtil;
|
||||
import org.apache.hudi.common.testutils.minicluster.MiniClusterUtil;
|
||||
import org.apache.hudi.exception.CorruptedLogFileException;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
@@ -472,7 +472,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
writer.appendBlock(dataBlock);
|
||||
}
|
||||
writer.close();
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
// scan all log blocks (across multiple log files)
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
@@ -626,6 +626,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
@@ -698,6 +700,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
FileCreateUtils.createDeltaCommit(basePath, "102", fs);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
@@ -722,7 +726,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAvroLogRecordReaderWithRollbackPartialBlock()
|
||||
public void testAvroLogRecordReaderWithFailedPartialBlock()
|
||||
throws IOException, URISyntaxException, InterruptedException {
|
||||
Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
|
||||
// Set a small threshold so that every block is a new version
|
||||
@@ -761,17 +765,9 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
outputStream.flush();
|
||||
outputStream.close();
|
||||
|
||||
// Rollback the last write
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "102");
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "101");
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
|
||||
String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
|
||||
HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
|
||||
writer =
|
||||
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
|
||||
.withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
|
||||
writer.appendBlock(commandBlock);
|
||||
|
||||
// Write 3
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "103");
|
||||
List<IndexedRecord> records3 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
|
||||
@@ -787,6 +783,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
FileCreateUtils.createDeltaCommit(basePath, "103", fs);
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
@@ -857,6 +855,10 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
|
||||
.map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
FileCreateUtils.createDeltaCommit(basePath, "101", fs);
|
||||
FileCreateUtils.createDeltaCommit(basePath, "102", fs);
|
||||
|
||||
HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
.withBasePath(basePath)
|
||||
@@ -869,6 +871,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
.withBufferSize(bufferSize)
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
|
||||
assertEquals(200, scanner.getTotalLogRecords(), "We still would read 200 records");
|
||||
final List<String> readKeys = new ArrayList<>(200);
|
||||
final List<Boolean> emptyPayloads = new ArrayList<>();
|
||||
@@ -897,6 +900,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
|
||||
writer.appendBlock(commandBlock);
|
||||
|
||||
FileCreateUtils.deleteDeltaCommit(basePath, "102", fs);
|
||||
|
||||
readKeys.clear();
|
||||
scanner = HoodieMergedLogRecordScanner.newBuilder()
|
||||
.withFileSystem(fs)
|
||||
@@ -954,6 +959,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
|
||||
writer.appendBlock(deleteBlock);
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
|
||||
// Attempt 1 : Write rollback block for a failed write
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
|
||||
String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
|
||||
@@ -990,6 +997,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
final List<String> readKeys = new ArrayList<>();
|
||||
scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
|
||||
assertEquals(0, readKeys.size(), "Stream collect should return all 0 records");
|
||||
FileCreateUtils.deleteDeltaCommit(basePath, "100", fs);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@@ -1023,6 +1031,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
|
||||
writer.appendBlock(deleteBlock);
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
|
||||
// Write 2 rollback blocks (1 data block + 1 delete bloc) for a failed write
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
|
||||
String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
|
||||
@@ -1047,6 +1057,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
|
||||
FileCreateUtils.deleteDeltaCommit(basePath, "100", fs);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@@ -1067,6 +1078,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
HoodieDataBlock dataBlock = getDataBlock(records1, header);
|
||||
writer.appendBlock(dataBlock);
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
|
||||
// Write invalid rollback for a failed write (possible for in-flight commits)
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "101");
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
|
||||
@@ -1130,8 +1143,11 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new HoodieKey[50]), header);
|
||||
writer.appendBlock(deleteBlock);
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
|
||||
// Write 1 rollback block for a failed write
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101");
|
||||
header.put(HeaderMetadataType.TARGET_INSTANT_TIME, "100");
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
|
||||
String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
|
||||
HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
|
||||
@@ -1179,6 +1195,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
writer.appendBlock(dataBlock);
|
||||
|
||||
writer.close();
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
|
||||
// Append some arbit byte[] to the end of the log (mimics a partially written commit)
|
||||
fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
|
||||
FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
|
||||
@@ -1254,6 +1272,7 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
.withSpillableMapBasePath(BASE_OUTPUT_PATH)
|
||||
.build();
|
||||
assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
|
||||
FileCreateUtils.deleteDeltaCommit(basePath, "100", fs);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1304,6 +1323,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
// Get the size of the block
|
||||
writer2.close();
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
|
||||
// From the two log files generated, read the records
|
||||
List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1",
|
||||
HoodieLogFile.DELTA_EXTENSION, "100").map(s -> s.getPath().toString()).collect(Collectors.toList());
|
||||
@@ -1398,6 +1419,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
writer.appendBlock(dataBlock);
|
||||
writer.close();
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
|
||||
HoodieLogFileReader reader = new HoodieLogFileReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(),
|
||||
bufferSize, readBlocksLazily, true);
|
||||
|
||||
@@ -1446,6 +1469,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
writer.appendBlock(dataBlock);
|
||||
writer.close();
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
|
||||
// Append some arbit byte[] to thee end of the log (mimics a partially written commit)
|
||||
fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
|
||||
FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
|
||||
@@ -1522,6 +1547,8 @@ public class TestHoodieLogFormat extends HoodieCommonTestHarness {
|
||||
writer.appendBlock(dataBlock);
|
||||
writer.close();
|
||||
|
||||
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
|
||||
|
||||
HoodieLogFileReader reader = new HoodieLogFileReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(),
|
||||
bufferSize, readBlocksLazily, true);
|
||||
|
||||
|
||||
@@ -201,7 +201,7 @@ public class TestHoodieActiveTimeline extends HoodieCommonTestHarness {
|
||||
// return the correct set of Instant
|
||||
checkTimeline.accept(timeline.getCommitsTimeline(),
|
||||
CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
|
||||
checkTimeline.accept(timeline.getCommitsAndCompactionTimeline(),
|
||||
checkTimeline.accept(timeline.getWriteTimeline(),
|
||||
CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
|
||||
checkTimeline.accept(timeline.getCommitTimeline(), CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
|
||||
checkTimeline.accept(timeline.getDeltaCommitTimeline(), Collections.singleton(HoodieTimeline.DELTA_COMMIT_ACTION));
|
||||
|
||||
@@ -83,6 +83,17 @@ public class FileCreateUtils {
|
||||
}
|
||||
}
|
||||
|
||||
private static void createMetaFile(String basePath, String instantTime, String suffix, FileSystem fs) throws IOException {
|
||||
org.apache.hadoop.fs.Path parentPath = new org.apache.hadoop.fs.Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
|
||||
if (!fs.exists(parentPath)) {
|
||||
fs.create(parentPath).close();
|
||||
}
|
||||
org.apache.hadoop.fs.Path metaFilePath = new org.apache.hadoop.fs.Path(parentPath, instantTime + suffix);
|
||||
if (!fs.exists(metaFilePath)) {
|
||||
fs.create(metaFilePath).close();
|
||||
}
|
||||
}
|
||||
|
||||
private static void createMetaFile(String basePath, String instantTime, String suffix, byte[] content) throws IOException {
|
||||
Path parentPath = Paths.get(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
|
||||
Files.createDirectories(parentPath);
|
||||
@@ -92,10 +103,22 @@ public class FileCreateUtils {
|
||||
}
|
||||
}
|
||||
|
||||
private static void deleteMetaFile(String basePath, String instantTime, String suffix, FileSystem fs) throws IOException {
|
||||
org.apache.hadoop.fs.Path parentPath = new org.apache.hadoop.fs.Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME);
|
||||
org.apache.hadoop.fs.Path metaFilePath = new org.apache.hadoop.fs.Path(parentPath, instantTime + suffix);
|
||||
if (fs.exists(metaFilePath)) {
|
||||
fs.delete(metaFilePath, true);
|
||||
}
|
||||
}
|
||||
|
||||
public static void createCommit(String basePath, String instantTime) throws IOException {
|
||||
createMetaFile(basePath, instantTime, HoodieTimeline.COMMIT_EXTENSION);
|
||||
}
|
||||
|
||||
public static void createCommit(String basePath, String instantTime, FileSystem fs) throws IOException {
|
||||
createMetaFile(basePath, instantTime, HoodieTimeline.COMMIT_EXTENSION, fs);
|
||||
}
|
||||
|
||||
public static void createRequestedCommit(String basePath, String instantTime) throws IOException {
|
||||
createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_COMMIT_EXTENSION);
|
||||
}
|
||||
@@ -108,6 +131,10 @@ public class FileCreateUtils {
|
||||
createMetaFile(basePath, instantTime, HoodieTimeline.DELTA_COMMIT_EXTENSION);
|
||||
}
|
||||
|
||||
public static void createDeltaCommit(String basePath, String instantTime, FileSystem fs) throws IOException {
|
||||
createMetaFile(basePath, instantTime, HoodieTimeline.DELTA_COMMIT_EXTENSION, fs);
|
||||
}
|
||||
|
||||
public static void createRequestedDeltaCommit(String basePath, String instantTime) throws IOException {
|
||||
createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_DELTA_COMMIT_EXTENSION);
|
||||
}
|
||||
@@ -235,4 +262,8 @@ public class FileCreateUtils {
|
||||
throw new HoodieException("Error reading hoodie table as a dataframe", e);
|
||||
}
|
||||
}
|
||||
|
||||
public static void deleteDeltaCommit(String basePath, String instantTime, FileSystem fs) throws IOException {
|
||||
deleteMetaFile(basePath, instantTime, HoodieTimeline.DELTA_COMMIT_EXTENSION, fs);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user