1
0

[HUDI-1999] Refresh the base file view cache for WriteProfile (#3067)

Refresh the view to discover new small files.
This commit is contained in:
Danny Chan
2021-06-15 23:18:38 +08:00
committed by GitHub
parent f922837064
commit cb642ceb75
7 changed files with 197 additions and 61 deletions

View File

@@ -22,6 +22,8 @@ import org.apache.hudi.client.FlinkTaskContextSupplier;
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
import org.apache.hudi.common.config.SerializableConfiguration;
import org.apache.hudi.common.model.HoodieRecordLocation;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.sink.partitioner.profile.WriteProfile;
import org.apache.hudi.table.action.commit.BucketInfo;
@@ -29,6 +31,7 @@ import org.apache.hudi.table.action.commit.BucketType;
import org.apache.hudi.table.action.commit.SmallFile;
import org.apache.hudi.util.StreamerUtil;
import org.apache.hudi.utils.TestConfigurations;
import org.apache.hudi.utils.TestData;
import org.apache.flink.configuration.Configuration;
import org.junit.jupiter.api.BeforeEach;
@@ -45,6 +48,10 @@ import java.util.Map;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
* Test cases for {@link BucketAssigner}.
@@ -52,6 +59,7 @@ import static org.hamcrest.MatcherAssert.assertThat;
public class TestBucketAssigner {
private HoodieWriteConfig writeConfig;
private HoodieFlinkEngineContext context;
private Configuration conf;
@TempDir
File tempFile;
@@ -59,7 +67,7 @@ public class TestBucketAssigner {
@BeforeEach
public void before() throws IOException {
final String basePath = tempFile.getAbsolutePath();
final Configuration conf = TestConfigurations.getDefaultConf(basePath);
conf = TestConfigurations.getDefaultConf(basePath);
writeConfig = StreamerUtil.getHoodieClientConfig(conf);
context = new HoodieFlinkEngineContext(
@@ -291,6 +299,44 @@ public class TestBucketAssigner {
assertBucketEquals(bucketInfo2, "par1", BucketType.UPDATE, "f1");
}
@Test
public void testWriteProfileReload() throws Exception {
WriteProfile writeProfile = new WriteProfile(writeConfig, context);
List<SmallFile> smallFiles1 = writeProfile.getSmallFiles("par1");
assertTrue(smallFiles1.isEmpty(), "Should have no small files");
TestData.writeData(TestData.DATA_SET_INSERT, conf);
Option<String> instantOption = getLastCompleteInstant(writeProfile);
assertFalse(instantOption.isPresent());
writeProfile.reload(1);
String instant1 = getLastCompleteInstant(writeProfile).orElse(null);
assertNotNull(instant1);
List<SmallFile> smallFiles2 = writeProfile.getSmallFiles("par1");
assertThat("Should have 1 small file", smallFiles2.size(), is(1));
assertThat("Small file should have same timestamp as last complete instant",
smallFiles2.get(0).location.getInstantTime(), is(instant1));
TestData.writeData(TestData.DATA_SET_INSERT, conf);
List<SmallFile> smallFiles3 = writeProfile.getSmallFiles("par1");
assertThat("Should have 1 small file", smallFiles3.size(), is(1));
assertThat("Non-reloaded write profile has the same base file view as before",
smallFiles3.get(0).location.getInstantTime(), is(instant1));
writeProfile.reload(2);
String instant2 = getLastCompleteInstant(writeProfile).orElse(null);
assertNotEquals(instant2, instant1, "Should have new complete instant");
List<SmallFile> smallFiles4 = writeProfile.getSmallFiles("par1");
assertThat("Should have 1 small file", smallFiles4.size(), is(1));
assertThat("Small file should have same timestamp as last complete instant",
smallFiles4.get(0).location.getInstantTime(), is(instant2));
}
private static Option<String> getLastCompleteInstant(WriteProfile profile) {
return profile.getTable().getMetaClient().getCommitsTimeline()
.filterCompletedInstants().lastInstant().map(HoodieInstant::getTimestamp);
}
private void assertBucketEquals(
BucketInfo bucketInfo,
String partition,