[HUDI-2480] FileSlice after pending compaction-requested instant-time… (#3703)
* [HUDI-2480] FileSlice after pending compaction-requested instant-time is ignored by MOR snapshot reader * include file slice after a pending compaction for spark reader Co-authored-by: garyli1019 <yanjia.gary.li@gmail.com>
This commit is contained in:
@@ -303,8 +303,8 @@ public class HoodieTableSource implements
|
||||
}
|
||||
|
||||
HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient,
|
||||
metaClient.getActiveTimeline().getCommitsTimeline()
|
||||
.filterCompletedInstants(), fileStatuses);
|
||||
// file-slice after pending compaction-requested instant-time is also considered valid
|
||||
metaClient.getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants(), fileStatuses);
|
||||
String latestCommit = fsView.getLastInstant().get().getTimestamp();
|
||||
final String mergeType = this.conf.getString(FlinkOptions.MERGE_TYPE);
|
||||
final AtomicInteger cnt = new AtomicInteger(0);
|
||||
|
||||
@@ -463,6 +463,32 @@ public class TestInputFormat {
|
||||
TestData.assertRowDataEquals(result, TestData.DATA_SET_INSERT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test reading file groups with compaction plan scheduled and delta logs.
|
||||
* File-slice after pending compaction-requested instant-time should also be considered valid.
|
||||
*/
|
||||
@Test
|
||||
void testReadMORWithCompactionPlanScheduled() throws Exception {
|
||||
Map<String, String> options = new HashMap<>();
|
||||
// compact for each commit
|
||||
options.put(FlinkOptions.COMPACTION_DELTA_COMMITS.key(), "1");
|
||||
options.put(FlinkOptions.COMPACTION_ASYNC_ENABLED.key(), "false");
|
||||
beforeEach(HoodieTableType.MERGE_ON_READ, options);
|
||||
|
||||
// write three commits
|
||||
for (int i = 0; i < 6; i += 2) {
|
||||
List<RowData> dataset = TestData.dataSetInsert(i + 1, i + 2);
|
||||
TestData.writeData(dataset, conf);
|
||||
}
|
||||
|
||||
InputFormat<RowData, ?> inputFormat1 = this.tableSource.getInputFormat();
|
||||
assertThat(inputFormat1, instanceOf(MergeOnReadInputFormat.class));
|
||||
|
||||
List<RowData> actual = readData(inputFormat1);
|
||||
final List<RowData> expected = TestData.dataSetInsert(1, 2, 3, 4, 5, 6);
|
||||
TestData.assertRowDataEquals(actual, expected);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Utilities
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@@ -237,7 +237,7 @@ public class HoodieRealtimeInputFormatUtils extends HoodieInputFormatUtils {
|
||||
try {
|
||||
// Both commit and delta-commits are included - pick the latest completed one
|
||||
Option<HoodieInstant> latestCompletedInstant =
|
||||
metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant();
|
||||
metaClient.getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants().lastInstant();
|
||||
|
||||
Stream<FileSlice> latestFileSlices = latestCompletedInstant
|
||||
.map(instant -> fsView.getLatestMergedFileSlicesBeforeOrOn(relPartitionPath, instant.getTimestamp()))
|
||||
|
||||
@@ -151,8 +151,9 @@ class MergeOnReadSnapshotRelation(val sqlContext: SQLContext,
|
||||
// Load files from the global paths if it has defined to be compatible with the original mode
|
||||
val inMemoryFileIndex = HoodieSparkUtils.createInMemoryFileIndex(sqlContext.sparkSession, globPaths.get)
|
||||
val fsView = new HoodieTableFileSystemView(metaClient,
|
||||
metaClient.getActiveTimeline.getCommitsTimeline
|
||||
.filterCompletedInstants, inMemoryFileIndex.allFiles().toArray)
|
||||
// file-slice after pending compaction-requested instant-time is also considered valid
|
||||
metaClient.getCommitsAndCompactionTimeline.filterCompletedAndCompactionInstants,
|
||||
inMemoryFileIndex.allFiles().toArray)
|
||||
val partitionPaths = fsView.getLatestBaseFiles.iterator().asScala.toList.map(_.getFileStatus.getPath.getParent)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user