[HUDI-4078][HUDI-FLINK]BootstrapOperator contains the pending compact… (#5545)
* [HUDI-4078][HUDI-FLINK]BootstrapOperator contains the pending compaction files
This commit is contained in:
@@ -124,7 +124,7 @@ public interface TableFileSystemView {
|
|||||||
* @param maxInstantTime Max Instant Time
|
* @param maxInstantTime Max Instant Time
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public Stream<FileSlice> getLatestMergedFileSlicesBeforeOrOn(String partitionPath, String maxInstantTime);
|
Stream<FileSlice> getLatestMergedFileSlicesBeforeOrOn(String partitionPath, String maxInstantTime);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stream all the latest file slices, in the given range.
|
* Stream all the latest file slices, in the given range.
|
||||||
|
|||||||
@@ -33,28 +33,28 @@ public interface Functions {
|
|||||||
/**
|
/**
|
||||||
* A function which has not any parameter.
|
* A function which has not any parameter.
|
||||||
*/
|
*/
|
||||||
public interface Function0<R> extends Serializable {
|
interface Function0<R> extends Serializable {
|
||||||
R apply();
|
R apply();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A function which contains only one parameter.
|
* A function which contains only one parameter.
|
||||||
*/
|
*/
|
||||||
public interface Function1<T1, R> extends Serializable {
|
interface Function1<T1, R> extends Serializable {
|
||||||
R apply(T1 val1);
|
R apply(T1 val1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A function which contains two parameters.
|
* A function which contains two parameters.
|
||||||
*/
|
*/
|
||||||
public interface Function2<T1, T2, R> extends Serializable {
|
interface Function2<T1, T2, R> extends Serializable {
|
||||||
R apply(T1 val1, T2 val2);
|
R apply(T1 val1, T2 val2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A function which contains three parameters.
|
* A function which contains three parameters.
|
||||||
*/
|
*/
|
||||||
public interface Function3<T1, T2, T3, R> extends Serializable {
|
interface Function3<T1, T2, T3, R> extends Serializable {
|
||||||
R apply(T1 val1, T2 val2, T3 val3);
|
R apply(T1 val1, T2 val2, T3 val3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -199,7 +199,7 @@ public class BootstrapOperator<I, O extends HoodieRecord<?>>
|
|||||||
Schema schema = new TableSchemaResolver(this.hoodieTable.getMetaClient()).getTableAvroSchema();
|
Schema schema = new TableSchemaResolver(this.hoodieTable.getMetaClient()).getTableAvroSchema();
|
||||||
|
|
||||||
List<FileSlice> fileSlices = this.hoodieTable.getSliceView()
|
List<FileSlice> fileSlices = this.hoodieTable.getSliceView()
|
||||||
.getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.get().getTimestamp(), true)
|
.getLatestMergedFileSlicesBeforeOrOn(partitionPath, latestCommitTime.get().getTimestamp())
|
||||||
.collect(toList());
|
.collect(toList());
|
||||||
|
|
||||||
for (FileSlice fileSlice : fileSlices) {
|
for (FileSlice fileSlice : fileSlices) {
|
||||||
|
|||||||
@@ -352,6 +352,10 @@ public class TestWriteCopyOnWrite extends TestWriteBase {
|
|||||||
|
|
||||||
// reset the config option
|
// reset the config option
|
||||||
conf.setBoolean(FlinkOptions.INDEX_BOOTSTRAP_ENABLED, true);
|
conf.setBoolean(FlinkOptions.INDEX_BOOTSTRAP_ENABLED, true);
|
||||||
|
validateIndexLoaded();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void validateIndexLoaded() throws Exception {
|
||||||
preparePipeline(conf)
|
preparePipeline(conf)
|
||||||
.consume(TestData.DATA_SET_UPDATE_INSERT)
|
.consume(TestData.DATA_SET_UPDATE_INSERT)
|
||||||
.checkIndexLoaded(
|
.checkIndexLoaded(
|
||||||
@@ -418,7 +422,7 @@ public class TestWriteCopyOnWrite extends TestWriteBase {
|
|||||||
return TestHarness.instance().preparePipeline(tempFile, conf);
|
return TestHarness.instance().preparePipeline(tempFile, conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
private TestHarness preparePipeline(Configuration conf) throws Exception {
|
protected TestHarness preparePipeline(Configuration conf) throws Exception {
|
||||||
return TestHarness.instance().preparePipeline(tempFile, conf);
|
return TestHarness.instance().preparePipeline(tempFile, conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,8 +20,10 @@ package org.apache.hudi.sink;
|
|||||||
|
|
||||||
import org.apache.hudi.common.model.HoodieTableType;
|
import org.apache.hudi.common.model.HoodieTableType;
|
||||||
import org.apache.hudi.configuration.FlinkOptions;
|
import org.apache.hudi.configuration.FlinkOptions;
|
||||||
|
import org.apache.hudi.utils.TestData;
|
||||||
|
|
||||||
import org.apache.flink.configuration.Configuration;
|
import org.apache.flink.configuration.Configuration;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@@ -36,6 +38,27 @@ public class TestWriteMergeOnRead extends TestWriteCopyOnWrite {
|
|||||||
conf.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, false);
|
conf.setBoolean(FlinkOptions.COMPACTION_ASYNC_ENABLED, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIndexStateBootstrapWithCompactionScheduled() throws Exception {
|
||||||
|
// sets up the delta commits as 1 to generate a new compaction plan.
|
||||||
|
conf.setInteger(FlinkOptions.COMPACTION_DELTA_COMMITS, 1);
|
||||||
|
// open the function and ingest data
|
||||||
|
preparePipeline(conf)
|
||||||
|
.consume(TestData.DATA_SET_INSERT)
|
||||||
|
.assertEmptyDataFiles()
|
||||||
|
.checkpoint(1)
|
||||||
|
.assertNextEvent()
|
||||||
|
.checkpointComplete(1)
|
||||||
|
.checkWrittenData(EXPECTED1, 4)
|
||||||
|
.end();
|
||||||
|
|
||||||
|
// reset config options
|
||||||
|
conf.removeConfig(FlinkOptions.COMPACTION_DELTA_COMMITS);
|
||||||
|
// sets up index bootstrap
|
||||||
|
conf.setBoolean(FlinkOptions.INDEX_BOOTSTRAP_ENABLED, true);
|
||||||
|
validateIndexLoaded();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void testInsertClustering() {
|
public void testInsertClustering() {
|
||||||
// insert clustering is only valid for cow table.
|
// insert clustering is only valid for cow table.
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ package org.apache.hudi.utils;
|
|||||||
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
|
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
|
||||||
import org.apache.hudi.common.config.HoodieCommonConfig;
|
import org.apache.hudi.common.config.HoodieCommonConfig;
|
||||||
import org.apache.hudi.common.fs.FSUtils;
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
|
import org.apache.hudi.common.model.HoodieLogFile;
|
||||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
|
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
|
||||||
import org.apache.hudi.common.testutils.HoodieTestUtils;
|
import org.apache.hudi.common.testutils.HoodieTestUtils;
|
||||||
@@ -641,10 +642,11 @@ public class TestData {
|
|||||||
File[] dataFiles = partitionDir.listFiles(file ->
|
File[] dataFiles = partitionDir.listFiles(file ->
|
||||||
file.getName().contains(".log.") && !file.getName().startsWith(".."));
|
file.getName().contains(".log.") && !file.getName().startsWith(".."));
|
||||||
assertNotNull(dataFiles);
|
assertNotNull(dataFiles);
|
||||||
HoodieMergedLogRecordScanner scanner = getScanner(
|
List<String> logPaths = Arrays.stream(dataFiles)
|
||||||
fs, baseFile.getPath(), Arrays.stream(dataFiles).map(File::getAbsolutePath)
|
.sorted((f1, f2) -> HoodieLogFile.getLogFileComparator()
|
||||||
.sorted(Comparator.naturalOrder()).collect(Collectors.toList()),
|
.compare(new HoodieLogFile(f1.getPath()), new HoodieLogFile(f2.getPath())))
|
||||||
schema, latestInstant);
|
.map(File::getAbsolutePath).collect(Collectors.toList());
|
||||||
|
HoodieMergedLogRecordScanner scanner = getScanner(fs, baseFile.getPath(), logPaths, schema, latestInstant);
|
||||||
List<String> readBuffer = scanner.getRecords().values().stream()
|
List<String> readBuffer = scanner.getRecords().values().stream()
|
||||||
.map(hoodieRecord -> {
|
.map(hoodieRecord -> {
|
||||||
try {
|
try {
|
||||||
|
|||||||
Reference in New Issue
Block a user