[HUDI-2480] FileSlice after pending compaction-requested instant-time… (#3703)
* [HUDI-2480] FileSlice after pending compaction-requested instant-time is ignored by MOR snapshot reader * include file slice after a pending compaction for spark reader Co-authored-by: garyli1019 <yanjia.gary.li@gmail.com>
This commit is contained in:
@@ -303,8 +303,8 @@ public class HoodieTableSource implements
|
|||||||
}
|
}
|
||||||
|
|
||||||
HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient,
|
HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient,
|
||||||
metaClient.getActiveTimeline().getCommitsTimeline()
|
// file-slice after pending compaction-requested instant-time is also considered valid
|
||||||
.filterCompletedInstants(), fileStatuses);
|
metaClient.getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants(), fileStatuses);
|
||||||
String latestCommit = fsView.getLastInstant().get().getTimestamp();
|
String latestCommit = fsView.getLastInstant().get().getTimestamp();
|
||||||
final String mergeType = this.conf.getString(FlinkOptions.MERGE_TYPE);
|
final String mergeType = this.conf.getString(FlinkOptions.MERGE_TYPE);
|
||||||
final AtomicInteger cnt = new AtomicInteger(0);
|
final AtomicInteger cnt = new AtomicInteger(0);
|
||||||
|
|||||||
@@ -463,6 +463,32 @@ public class TestInputFormat {
|
|||||||
TestData.assertRowDataEquals(result, TestData.DATA_SET_INSERT);
|
TestData.assertRowDataEquals(result, TestData.DATA_SET_INSERT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test reading file groups with compaction plan scheduled and delta logs.
|
||||||
|
* File-slice after pending compaction-requested instant-time should also be considered valid.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
void testReadMORWithCompactionPlanScheduled() throws Exception {
|
||||||
|
Map<String, String> options = new HashMap<>();
|
||||||
|
// compact for each commit
|
||||||
|
options.put(FlinkOptions.COMPACTION_DELTA_COMMITS.key(), "1");
|
||||||
|
options.put(FlinkOptions.COMPACTION_ASYNC_ENABLED.key(), "false");
|
||||||
|
beforeEach(HoodieTableType.MERGE_ON_READ, options);
|
||||||
|
|
||||||
|
// write three commits
|
||||||
|
for (int i = 0; i < 6; i += 2) {
|
||||||
|
List<RowData> dataset = TestData.dataSetInsert(i + 1, i + 2);
|
||||||
|
TestData.writeData(dataset, conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
InputFormat<RowData, ?> inputFormat1 = this.tableSource.getInputFormat();
|
||||||
|
assertThat(inputFormat1, instanceOf(MergeOnReadInputFormat.class));
|
||||||
|
|
||||||
|
List<RowData> actual = readData(inputFormat1);
|
||||||
|
final List<RowData> expected = TestData.dataSetInsert(1, 2, 3, 4, 5, 6);
|
||||||
|
TestData.assertRowDataEquals(actual, expected);
|
||||||
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
// Utilities
|
// Utilities
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -237,7 +237,7 @@ public class HoodieRealtimeInputFormatUtils extends HoodieInputFormatUtils {
|
|||||||
try {
|
try {
|
||||||
// Both commit and delta-commits are included - pick the latest completed one
|
// Both commit and delta-commits are included - pick the latest completed one
|
||||||
Option<HoodieInstant> latestCompletedInstant =
|
Option<HoodieInstant> latestCompletedInstant =
|
||||||
metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant();
|
metaClient.getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants().lastInstant();
|
||||||
|
|
||||||
Stream<FileSlice> latestFileSlices = latestCompletedInstant
|
Stream<FileSlice> latestFileSlices = latestCompletedInstant
|
||||||
.map(instant -> fsView.getLatestMergedFileSlicesBeforeOrOn(relPartitionPath, instant.getTimestamp()))
|
.map(instant -> fsView.getLatestMergedFileSlicesBeforeOrOn(relPartitionPath, instant.getTimestamp()))
|
||||||
|
|||||||
@@ -151,8 +151,9 @@ class MergeOnReadSnapshotRelation(val sqlContext: SQLContext,
|
|||||||
// Load files from the global paths if it has defined to be compatible with the original mode
|
// Load files from the global paths if it has defined to be compatible with the original mode
|
||||||
val inMemoryFileIndex = HoodieSparkUtils.createInMemoryFileIndex(sqlContext.sparkSession, globPaths.get)
|
val inMemoryFileIndex = HoodieSparkUtils.createInMemoryFileIndex(sqlContext.sparkSession, globPaths.get)
|
||||||
val fsView = new HoodieTableFileSystemView(metaClient,
|
val fsView = new HoodieTableFileSystemView(metaClient,
|
||||||
metaClient.getActiveTimeline.getCommitsTimeline
|
// file-slice after pending compaction-requested instant-time is also considered valid
|
||||||
.filterCompletedInstants, inMemoryFileIndex.allFiles().toArray)
|
metaClient.getCommitsAndCompactionTimeline.filterCompletedAndCompactionInstants,
|
||||||
|
inMemoryFileIndex.allFiles().toArray)
|
||||||
val partitionPaths = fsView.getLatestBaseFiles.iterator().asScala.toList.map(_.getFileStatus.getPath.getParent)
|
val partitionPaths = fsView.getLatestBaseFiles.iterator().asScala.toList.map(_.getFileStatus.getPath.getParent)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user