1
0

[HUDI-1661] Exclude clustering commits from getExtraMetadataFromLatest API (#2632)

This commit is contained in:
satishkotha
2021-03-05 13:42:19 -08:00
committed by GitHub
parent f2159c4573
commit 11ad4ed26b
2 changed files with 75 additions and 9 deletions

View File

@@ -28,6 +28,7 @@ import org.apache.hudi.common.model.HoodieCleaningPolicy;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -73,7 +74,8 @@ public class TestTimelineUtils extends HoodieCommonTestHarness {
activeTimeline.createNewInstant(instant1);
// create replace metadata only with replaced file Ids (no new files created)
activeTimeline.saveAsComplete(instant1,
Option.of(getReplaceCommitMetadata(basePath, ts1, replacePartition, 2, newFilePartition, 0, Collections.emptyMap())));
Option.of(getReplaceCommitMetadata(basePath, ts1, replacePartition, 2,
newFilePartition, 0, Collections.emptyMap(), WriteOperationType.CLUSTER)));
metaClient.reloadActiveTimeline();
List<String> partitions = TimelineUtils.getAffectedPartitions(metaClient.getActiveTimeline().findInstantsAfter("0", 10));
@@ -85,7 +87,8 @@ public class TestTimelineUtils extends HoodieCommonTestHarness {
activeTimeline.createNewInstant(instant2);
// create replace metadata only with replaced file Ids (no new files created)
activeTimeline.saveAsComplete(instant2,
Option.of(getReplaceCommitMetadata(basePath, ts2, replacePartition, 0, newFilePartition, 3, Collections.emptyMap())));
Option.of(getReplaceCommitMetadata(basePath, ts2, replacePartition, 0,
newFilePartition, 3, Collections.emptyMap(), WriteOperationType.CLUSTER)));
metaClient.reloadActiveTimeline();
partitions = TimelineUtils.getAffectedPartitions(metaClient.getActiveTimeline().findInstantsAfter("1", 10));
assertEquals(1, partitions.size());
@@ -211,16 +214,42 @@ public class TestTimelineUtils extends HoodieCommonTestHarness {
metaClient.reloadActiveTimeline();
// verify modified partitions included cleaned data
Option<String> extraLatestValue = TimelineUtils.getExtraMetadataFromLatest(metaClient, extraMetadataKey);
assertTrue(extraLatestValue.isPresent());
assertEquals(extraMetadataValue1, extraLatestValue.get());
verifyExtraMetadataLatestValue(extraMetadataKey, extraMetadataValue1, false);
assertFalse(TimelineUtils.getExtraMetadataFromLatest(metaClient, "unknownKey").isPresent());
// verify adding clustering commit doesnt change behavior of getExtraMetadataFromLatest
String ts2 = "2";
HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, ts2);
activeTimeline.createNewInstant(instant2);
String newValueForMetadata = "newValue2";
extraMetadata.put(extraMetadataKey, newValueForMetadata);
activeTimeline.saveAsComplete(instant2,
Option.of(getReplaceCommitMetadata(basePath, ts2, "p2", 0,
"p2", 3, extraMetadata, WriteOperationType.CLUSTER)));
metaClient.reloadActiveTimeline();
verifyExtraMetadataLatestValue(extraMetadataKey, extraMetadataValue1, false);
verifyExtraMetadataLatestValue(extraMetadataKey, newValueForMetadata, true);
assertFalse(TimelineUtils.getExtraMetadataFromLatest(metaClient, "unknownKey").isPresent());
Map<String, Option<String>> extraMetadataEntries = TimelineUtils.getAllExtraMetadataForKey(metaClient, extraMetadataKey);
assertEquals(2, extraMetadataEntries.size());
assertEquals(3, extraMetadataEntries.size());
assertFalse(extraMetadataEntries.get("0").isPresent());
assertTrue(extraMetadataEntries.get("1").isPresent());
assertEquals(extraMetadataValue1, extraMetadataEntries.get("1").get());
assertTrue(extraMetadataEntries.get("2").isPresent());
assertEquals(newValueForMetadata, extraMetadataEntries.get("2").get());
}
private void verifyExtraMetadataLatestValue(String extraMetadataKey, String expected, boolean includeClustering) {
final Option<String> extraLatestValue;
if (includeClustering) {
extraLatestValue = TimelineUtils.getExtraMetadataFromLatestIncludeClustering(metaClient, extraMetadataKey);
} else {
extraLatestValue = TimelineUtils.getExtraMetadataFromLatest(metaClient, extraMetadataKey);
}
assertTrue(extraLatestValue.isPresent());
assertEquals(expected, extraLatestValue.get());
}
private byte[] getRestoreMetadata(String basePath, String partition, String commitTs, int count, String actionType) throws IOException {
@@ -265,9 +294,11 @@ public class TestTimelineUtils extends HoodieCommonTestHarness {
}
private byte[] getReplaceCommitMetadata(String basePath, String commitTs, String replacePartition, int replaceCount,
String newFilePartition, int newFileCount, Map<String, String> extraMetadata)
String newFilePartition, int newFileCount, Map<String, String> extraMetadata,
WriteOperationType operationType)
throws IOException {
HoodieReplaceCommitMetadata commit = new HoodieReplaceCommitMetadata();
commit.setOperationType(operationType);
for (int i = 1; i <= newFileCount; i++) {
HoodieWriteStat stat = new HoodieWriteStat();
stat.setFileId(i + "");