1
0

[HUDI-2488][HUDI-3175] Implement async metadata indexing (#4693)

- Add a new action called INDEX, whose state transition is described in the RFC.
- Changes in timeline to support the new action.
- Add an index planner in ScheduleIndexActionExecutor.
- Add index plan executor in RunIndexActionExecutor.
- Add 3 APIs in HoodieTableMetadataWriter; a) scheduleIndex: will generate an index plan based on latest completed instant, initialize file groups and add a requested INDEX instant, b) index: executes the index plan and also takes care of writes that happened after indexing was requested, c) dropIndex: will drop index by removing the given metadata partition.
- Add 2 new table configs to serve as the source of truth for inflight and completed indexes.
- Support upgrade/downgrade taking care of the newly added configs.
- Add tool to trigger indexing in HoodieIndexer.
- Handle corner cases related to partial failures.
- Abort gracefully after deleting partition and instant.
- Handle other actions in timeline to consider before catching up
This commit is contained in:
Sagar Sumit
2022-04-01 01:33:12 +05:30
committed by GitHub
parent 1da196c1e8
commit 28dafa774e
44 changed files with 2123 additions and 150 deletions

View File

@@ -199,6 +199,46 @@ public class TestHoodieActiveTimeline extends HoodieCommonTestHarness {
assertTrue(activeCommitTimeline.isBeforeTimelineStarts("00"));
}
@Test
public void testGetContiguousCompletedWriteTimeline() {
// a mock timeline with holes
timeline = new MockHoodieTimeline(Stream.of("01", "03", "05", "07", "13", "15", "17"),
Stream.of("09", "11", "19"));
assertTrue(timeline.getContiguousCompletedWriteTimeline().lastInstant().isPresent());
assertEquals("07", timeline.getContiguousCompletedWriteTimeline().lastInstant().get().getTimestamp());
// add some instants where two are inflight and one of them (instant8 below) is not part of write timeline
HoodieInstant instant1 = new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "1");
HoodieInstant instant2 = new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "2");
HoodieInstant instant3 = new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "3");
HoodieInstant instant4 = new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "4");
HoodieInstant instant5 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "5");
HoodieInstant instant6 = new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "6");
HoodieInstant instant7 = new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "7");
HoodieInstant instant8 = new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, "8");
timeline = new HoodieActiveTimeline(metaClient);
timeline.createNewInstant(instant1);
timeline.createNewInstant(instant2);
timeline.createNewInstant(instant3);
timeline.createNewInstant(instant4);
timeline.createNewInstant(instant5);
timeline.createNewInstant(instant6);
timeline.createNewInstant(instant7);
timeline.createNewInstant(instant8);
timeline.setInstants(Stream.of(instant1, instant2, instant3, instant4, instant5, instant6, instant7, instant8).collect(Collectors.toList()));
assertTrue(timeline.getContiguousCompletedWriteTimeline().lastInstant().isPresent());
assertEquals(instant4.getTimestamp(), timeline.getContiguousCompletedWriteTimeline().lastInstant().get().getTimestamp());
// transition both inflight instants to complete
timeline.saveAsComplete(new HoodieInstant(true, instant5.getAction(), instant5.getTimestamp()), Option.empty());
timeline.saveAsComplete(new HoodieInstant(true, instant8.getAction(), instant8.getTimestamp()), Option.empty());
timeline = timeline.reload();
// instant8 in not considered in write timeline, so last completed instant in timeline should be instant7
assertTrue(timeline.getContiguousCompletedWriteTimeline().lastInstant().isPresent());
assertEquals(instant7.getTimestamp(), timeline.getContiguousCompletedWriteTimeline().lastInstant().get().getTimestamp());
}
@Test
public void testTimelineGetOperations() {
List<HoodieInstant> allInstants = getAllInstants();
@@ -218,20 +258,19 @@ public class TestHoodieActiveTimeline extends HoodieCommonTestHarness {
// Test that various types of getXXX operations from HoodieActiveTimeline
// return the correct set of Instant
checkTimeline.accept(timeline.getCommitsTimeline(),
CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
checkTimeline.accept(timeline.getWriteTimeline(),
CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
checkTimeline.accept(timeline.getCommitsTimeline(), CollectionUtils.createSet(
HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
checkTimeline.accept(timeline.getWriteTimeline(), CollectionUtils.createSet(
HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
checkTimeline.accept(timeline.getCommitTimeline(), CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION));
checkTimeline.accept(timeline.getDeltaCommitTimeline(), Collections.singleton(HoodieTimeline.DELTA_COMMIT_ACTION));
checkTimeline.accept(timeline.getCleanerTimeline(), Collections.singleton(HoodieTimeline.CLEAN_ACTION));
checkTimeline.accept(timeline.getRollbackTimeline(), Collections.singleton(HoodieTimeline.ROLLBACK_ACTION));
checkTimeline.accept(timeline.getRestoreTimeline(), Collections.singleton(HoodieTimeline.RESTORE_ACTION));
checkTimeline.accept(timeline.getSavePointTimeline(), Collections.singleton(HoodieTimeline.SAVEPOINT_ACTION));
checkTimeline.accept(timeline.getAllCommitsTimeline(),
CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION,
HoodieTimeline.CLEAN_ACTION, HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION,
HoodieTimeline.SAVEPOINT_ACTION, HoodieTimeline.ROLLBACK_ACTION));
checkTimeline.accept(timeline.getAllCommitsTimeline(), CollectionUtils.createSet(
HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.CLEAN_ACTION, HoodieTimeline.COMPACTION_ACTION,
HoodieTimeline.REPLACE_COMMIT_ACTION, HoodieTimeline.SAVEPOINT_ACTION, HoodieTimeline.ROLLBACK_ACTION, HoodieTimeline.INDEXING_ACTION));
// Get some random Instants
Random rand = new Random();

View File

@@ -20,6 +20,12 @@ package org.apache.hudi.common.util;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
@@ -61,4 +67,20 @@ public class TestStringUtils {
assertNotEquals(null, StringUtils.isNullOrEmpty("this is not empty"));
assertTrue(StringUtils.isNullOrEmpty(""));
}
@Test
public void testStringToSet() {
assertEquals(new HashSet<>(), StringUtils.toSet(null));
assertEquals(new HashSet<>(), StringUtils.toSet(""));
Set<String> expected = new HashSet<>(Arrays.asList("a", "b", "c"));
assertEquals(expected, StringUtils.toSet("a,b, c"));
}
@Test
public void testStringToList() {
assertEquals(new ArrayList<>(), StringUtils.toList(null));
assertEquals(new ArrayList<>(), StringUtils.toList(""));
List<String> expected = Arrays.asList("a", "b", "c");
assertEquals(expected, StringUtils.toList("a,b, c"));
}
}