[HUDI-1740] Fix insert-overwrite API archival (#2784)
- fix problem of archiving replace commits - Fix problem when getting empty replacecommit.requested - Improved the logic of handling empty and non-empty requested/inflight commit files. Added unit tests to cover both empty and non-empty inflight files cases and cleaned up some unused test util methods Co-authored-by: yorkzero831 <yorkzero8312@gmail.com> Co-authored-by: zheren.yu <zheren.yu@paypay-corp.co.jp>
This commit is contained in:
@@ -68,6 +68,11 @@ public class ReplaceArchivalHelper implements Serializable {
|
||||
public static boolean deleteReplacedFileGroups(HoodieEngineContext context, HoodieTableMetaClient metaClient,
|
||||
TableFileSystemView fileSystemView,
|
||||
HoodieInstant instant, List<String> replacedPartitions) {
|
||||
// There is no file id to be replaced in the very first replace commit file for insert overwrite operation
|
||||
if (replacedPartitions.isEmpty()) {
|
||||
LOG.warn("Found no partition files to replace");
|
||||
return true;
|
||||
}
|
||||
context.setJobStatus(ReplaceArchivalHelper.class.getSimpleName(), "Delete replaced file groups");
|
||||
List<Boolean> f = context.map(replacedPartitions, partition -> {
|
||||
Stream<FileSlice> fileSlices = fileSystemView.getReplacedFileGroupsBeforeOrOn(instant.getTimestamp(), partition)
|
||||
|
||||
@@ -21,6 +21,7 @@ package org.apache.hudi.client.utils;
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieArchivedMetaEntry;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
|
||||
@@ -37,8 +38,8 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||
import org.apache.hudi.common.util.CleanerUtils;
|
||||
import org.apache.hudi.common.util.ClusteringUtils;
|
||||
import org.apache.hudi.common.util.CompactionUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
/**
|
||||
* Helper class to convert between different action related payloads and {@link HoodieArchivedMetaEntry}.
|
||||
@@ -72,10 +73,21 @@ public class MetadataConversionUtils {
|
||||
HoodieReplaceCommitMetadata replaceCommitMetadata = HoodieReplaceCommitMetadata
|
||||
.fromBytes(metaClient.getActiveTimeline().getInstantDetails(hoodieInstant).get(), HoodieReplaceCommitMetadata.class);
|
||||
archivedMetaWrapper.setHoodieReplaceCommitMetadata(ReplaceArchivalHelper.convertReplaceCommitMetadata(replaceCommitMetadata));
|
||||
} else if (hoodieInstant.isInflight()) {
|
||||
// inflight replacecommit files have the same meta data body as HoodieCommitMetadata
|
||||
// so we could re-use it without further creating an inflight extension.
|
||||
// Or inflight replacecommit files are empty under clustering circumstance
|
||||
Option<HoodieCommitMetadata> inflightCommitMetadata = getInflightReplaceMetadata(metaClient, hoodieInstant);
|
||||
if (inflightCommitMetadata.isPresent()) {
|
||||
archivedMetaWrapper.setHoodieInflightReplaceMetadata(convertCommitMetadata(inflightCommitMetadata.get()));
|
||||
}
|
||||
} else {
|
||||
HoodieRequestedReplaceMetadata requestedReplaceMetadata =
|
||||
ClusteringUtils.getRequestedReplaceMetadata(metaClient, hoodieInstant).get();
|
||||
archivedMetaWrapper.setHoodieRequestedReplaceMetadata(requestedReplaceMetadata);
|
||||
// we may have cases with empty HoodieRequestedReplaceMetadata e.g. insert_overwrite_table or insert_overwrite
|
||||
// without clustering. However, we should revisit the requested commit file standardization
|
||||
Option<HoodieRequestedReplaceMetadata> requestedReplaceMetadata = getRequestedReplaceMetadata(metaClient, hoodieInstant);
|
||||
if (requestedReplaceMetadata.isPresent()) {
|
||||
archivedMetaWrapper.setHoodieRequestedReplaceMetadata(requestedReplaceMetadata.get());
|
||||
}
|
||||
}
|
||||
archivedMetaWrapper.setActionType(ActionType.replacecommit.name());
|
||||
break;
|
||||
@@ -107,14 +119,25 @@ public class MetadataConversionUtils {
|
||||
return archivedMetaWrapper;
|
||||
}
|
||||
|
||||
public static HoodieArchivedMetaEntry createMetaWrapper(HoodieInstant hoodieInstant,
|
||||
HoodieCommitMetadata hoodieCommitMetadata) {
|
||||
HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry();
|
||||
archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp());
|
||||
archivedMetaWrapper.setActionState(hoodieInstant.getState().name());
|
||||
archivedMetaWrapper.setHoodieCommitMetadata(convertCommitMetadata(hoodieCommitMetadata));
|
||||
archivedMetaWrapper.setActionType(ActionType.commit.name());
|
||||
return archivedMetaWrapper;
|
||||
public static Option<HoodieCommitMetadata> getInflightReplaceMetadata(HoodieTableMetaClient metaClient, HoodieInstant instant) throws IOException {
|
||||
Option<byte[]> inflightContent = metaClient.getActiveTimeline().getInstantDetails(instant);
|
||||
if (!inflightContent.isPresent() || inflightContent.get().length == 0) {
|
||||
// inflight files can be empty in some certain cases, e.g. when users opt in clustering
|
||||
return Option.empty();
|
||||
}
|
||||
return Option.of(HoodieCommitMetadata.fromBytes(inflightContent.get(), HoodieCommitMetadata.class));
|
||||
}
|
||||
|
||||
public static Option<HoodieRequestedReplaceMetadata> getRequestedReplaceMetadata(HoodieTableMetaClient metaClient, HoodieInstant instant) throws IOException {
|
||||
Option<byte[]> requestedContent = metaClient.getActiveTimeline().getInstantDetails(instant);
|
||||
if (!requestedContent.isPresent() || requestedContent.get().length == 0) {
|
||||
// requested commit files can be empty in some certain cases, e.g. insert_overwrite or insert_overwrite_table.
|
||||
// However, it appears requested files are supposed to contain meta data and we should revisit the standardization
|
||||
// of requested commit files
|
||||
// TODO revisit requested commit file standardization https://issues.apache.org/jira/browse/HUDI-1739
|
||||
return Option.empty();
|
||||
}
|
||||
return Option.of(TimelineMetadataUtils.deserializeRequestedReplaceMetadata(requestedContent.get()));
|
||||
}
|
||||
|
||||
public static org.apache.hudi.avro.model.HoodieCommitMetadata convertCommitMetadata(
|
||||
|
||||
@@ -296,7 +296,6 @@ public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
|
||||
|
||||
public void archive(HoodieEngineContext context, List<HoodieInstant> instants) throws HoodieCommitException {
|
||||
try {
|
||||
HoodieTimeline commitTimeline = metaClient.getActiveTimeline().getAllCommitsTimeline().filterCompletedInstants();
|
||||
Schema wrapperSchema = HoodieArchivedMetaEntry.getClassSchema();
|
||||
LOG.info("Wrapper schema " + wrapperSchema.toString());
|
||||
List<IndexedRecord> records = new ArrayList<>();
|
||||
@@ -308,7 +307,7 @@ public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
|
||||
}
|
||||
try {
|
||||
deleteAnyLeftOverMarkerFiles(context, hoodieInstant);
|
||||
records.add(convertToAvroRecord(commitTimeline, hoodieInstant));
|
||||
records.add(convertToAvroRecord(hoodieInstant));
|
||||
if (records.size() >= this.config.getCommitArchivalBatchSize()) {
|
||||
writeToFile(wrapperSchema, records);
|
||||
}
|
||||
@@ -365,8 +364,8 @@ public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
|
||||
}
|
||||
}
|
||||
|
||||
private IndexedRecord convertToAvroRecord(HoodieTimeline commitTimeline, HoodieInstant hoodieInstant)
|
||||
throws IOException {
|
||||
private IndexedRecord convertToAvroRecord(HoodieInstant hoodieInstant)
|
||||
throws IOException {
|
||||
return MetadataConversionUtils.createMetaWrapper(hoodieInstant, metaClient);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -383,7 +383,7 @@ public class TestSimpleConcurrentFileWritesConflictResolutionStrategy extends Ho
|
||||
requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
|
||||
requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
|
||||
HoodieTestTable.of(metaClient)
|
||||
.addRequestedReplace(instantTime, requestedReplaceMetadata)
|
||||
.addRequestedReplace(instantTime, Option.of(requestedReplaceMetadata))
|
||||
.withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
|
||||
}
|
||||
|
||||
@@ -413,7 +413,7 @@ public class TestSimpleConcurrentFileWritesConflictResolutionStrategy extends Ho
|
||||
requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
|
||||
requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
|
||||
HoodieTestTable.of(metaClient)
|
||||
.addReplaceCommit(instantTime, requestedReplaceMetadata, replaceMetadata)
|
||||
.addReplaceCommit(instantTime, Option.of(requestedReplaceMetadata), Option.empty(), replaceMetadata)
|
||||
.withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ package org.apache.hudi.utils;
|
||||
|
||||
import static org.apache.hudi.common.util.CleanerUtils.convertCleanMetadata;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
@@ -80,13 +81,43 @@ public class TestMetadataConversionUtils extends HoodieCommonTestHarness {
|
||||
@Test
|
||||
public void testCompletedReplace() throws Exception {
|
||||
String newCommitTime = HoodieTestTable.makeNewCommitTime();
|
||||
createReplace(newCommitTime, WriteOperationType.INSERT_OVERWRITE);
|
||||
createReplace(newCommitTime, WriteOperationType.INSERT_OVERWRITE, true);
|
||||
HoodieArchivedMetaEntry metaEntry = MetadataConversionUtils.createMetaWrapper(
|
||||
new HoodieInstant(State.COMPLETED, HoodieTimeline.REPLACE_COMMIT_ACTION, newCommitTime), metaClient);
|
||||
assertEquals(metaEntry.getActionState(), State.COMPLETED.toString());
|
||||
assertEquals(metaEntry.getHoodieReplaceCommitMetadata().getOperationType(), WriteOperationType.INSERT_OVERWRITE.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyRequestedReplace() throws Exception {
|
||||
String newCommitTime = HoodieTestTable.makeNewCommitTime();
|
||||
createReplace(newCommitTime, WriteOperationType.INSERT_OVERWRITE_TABLE, false);
|
||||
HoodieArchivedMetaEntry metaEntry = MetadataConversionUtils.createMetaWrapper(
|
||||
new HoodieInstant(State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, newCommitTime), metaClient);
|
||||
assertEquals(metaEntry.getActionState(), State.REQUESTED.toString());
|
||||
assertNull(metaEntry.getHoodieRequestedReplaceMetadata());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyInflightReplace() throws Exception {
|
||||
String newCommitTime = HoodieTestTable.makeNewCommitTime();
|
||||
createReplace(newCommitTime, WriteOperationType.INSERT_OVERWRITE_TABLE, true);
|
||||
HoodieArchivedMetaEntry metaEntry = MetadataConversionUtils.createMetaWrapper(
|
||||
new HoodieInstant(State.INFLIGHT, HoodieTimeline.REPLACE_COMMIT_ACTION, newCommitTime), metaClient);
|
||||
assertEquals(metaEntry.getActionState(), State.INFLIGHT.toString());
|
||||
assertNull(metaEntry.getHoodieInflightReplaceMetadata());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonEmptyInflightReplace() throws Exception {
|
||||
String newCommitTime = HoodieTestTable.makeNewCommitTime();
|
||||
createReplace(newCommitTime, WriteOperationType.INSERT_OVERWRITE_TABLE, false);
|
||||
HoodieArchivedMetaEntry metaEntry = MetadataConversionUtils.createMetaWrapper(
|
||||
new HoodieInstant(State.INFLIGHT, HoodieTimeline.REPLACE_COMMIT_ACTION, newCommitTime), metaClient);
|
||||
assertEquals(metaEntry.getActionState(), State.INFLIGHT.toString());
|
||||
assertEquals(metaEntry.getHoodieInflightReplaceMetadata().getOperationType(), WriteOperationType.INSERT_OVERWRITE_TABLE.name());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompletedCommitOrDeltaCommit() throws Exception {
|
||||
String newCommitTime = HoodieTestTable.makeNewCommitTime();
|
||||
@@ -169,7 +200,8 @@ public class TestMetadataConversionUtils extends HoodieCommonTestHarness {
|
||||
.withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
|
||||
}
|
||||
|
||||
private void createReplace(String instantTime, WriteOperationType writeOperationType) throws Exception {
|
||||
private void createReplace(String instantTime, WriteOperationType writeOperationType, Boolean isClustering)
|
||||
throws Exception {
|
||||
String fileId1 = "file-1";
|
||||
String fileId2 = "file-2";
|
||||
|
||||
@@ -182,18 +214,29 @@ public class TestMetadataConversionUtils extends HoodieCommonTestHarness {
|
||||
writeStat.setFileId("file-1");
|
||||
replaceMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
|
||||
replaceMetadata.setOperationType(writeOperationType);
|
||||
// create replace instant to mark fileId1 as deleted
|
||||
HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
|
||||
requestedReplaceMetadata.setOperationType(WriteOperationType.INSERT_OVERWRITE.name());
|
||||
HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
|
||||
HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
|
||||
HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
|
||||
clusteringGroup.setSlices(Arrays.asList(sliceInfo));
|
||||
clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
|
||||
requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
|
||||
requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
|
||||
// some cases requestedReplaceMetadata will be null
|
||||
// e.g. insert_overwrite_table or insert_overwrite without clustering
|
||||
HoodieRequestedReplaceMetadata requestedReplaceMetadata = null;
|
||||
HoodieCommitMetadata inflightReplaceMetadata = null;
|
||||
if (isClustering) {
|
||||
requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
|
||||
requestedReplaceMetadata.setOperationType(writeOperationType.name());
|
||||
HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
|
||||
HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
|
||||
HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
|
||||
clusteringGroup.setSlices(Arrays.asList(sliceInfo));
|
||||
clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
|
||||
requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
|
||||
requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
|
||||
} else {
|
||||
// inflightReplaceMetadata will be null in clustering but not null
|
||||
// in insert_overwrite or insert_overwrite_table
|
||||
inflightReplaceMetadata = new HoodieCommitMetadata();
|
||||
inflightReplaceMetadata.setOperationType(writeOperationType);
|
||||
inflightReplaceMetadata.setCompacted(false);
|
||||
}
|
||||
HoodieTestTable.of(metaClient)
|
||||
.addReplaceCommit(instantTime, requestedReplaceMetadata, replaceMetadata)
|
||||
.addReplaceCommit(instantTime, Option.ofNullable(requestedReplaceMetadata), Option.ofNullable(inflightReplaceMetadata), replaceMetadata)
|
||||
.withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user