1
0

[HUDI-2924] Refresh the fs view on successful checkpoints for write profile (#4199)

This commit is contained in:
Danny Chan
2021-12-03 16:12:59 +08:00
committed by GitHub
parent f74b3d12aa
commit 0699521f83
5 changed files with 25 additions and 5 deletions

View File

@@ -60,7 +60,9 @@ public class CleanFunction<T> extends AbstractRichFunction
public void open(Configuration parameters) throws Exception {
super.open(parameters);
if (conf.getBoolean(FlinkOptions.CLEAN_ASYNC_ENABLED)) {
this.writeClient = StreamerUtil.createWriteClient(conf, getRuntimeContext());
// do not use the remote filesystem view because the async cleaning service
// local timeline is very probably to fall behind with the remote one.
this.writeClient = StreamerUtil.createWriteClient(conf, getRuntimeContext(), false);
this.executor = NonThrownExecutor.builder(LOG).build();
}
}

View File

@@ -113,7 +113,7 @@ public class BucketAssignFunction<K, I, O extends HoodieRecord<?>>
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
HoodieWriteConfig writeConfig = StreamerUtil.getHoodieClientConfig(this.conf, true);
HoodieWriteConfig writeConfig = StreamerUtil.getHoodieClientConfig(this.conf, false);
HoodieFlinkEngineContext context = new HoodieFlinkEngineContext(
new SerializableConfiguration(StreamerUtil.getHadoopConf()),
new FlinkTaskContextSupplier(getRuntimeContext()));

View File

@@ -59,7 +59,7 @@ public class DeltaWriteProfile extends WriteProfile {
List<FileSlice> allSmallFileSlices = new ArrayList<>();
// If we can index log files, we can add more inserts to log files for fileIds including those under
// pending compaction.
List<FileSlice> allFileSlices = getFileSystemView().getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp(), true)
List<FileSlice> allFileSlices = fsView.getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp(), true)
.collect(Collectors.toList());
for (FileSlice fileSlice : allFileSlices) {
if (isSmallFile(fileSlice)) {

View File

@@ -93,6 +93,11 @@ public class WriteProfile {
*/
private long reloadedCheckpointId;
/**
* The file system view cache for one checkpoint interval.
*/
protected SyncableFileSystemView fsView;
/**
* Metadata cache to reduce IO of metadata files.
*/
@@ -111,6 +116,7 @@ public class WriteProfile {
this.recordsPerBucket = config.getCopyOnWriteInsertSplitSize();
this.metaClient = StreamerUtil.createMetaClient(config.getBasePath(), context.getHadoopConf().get());
this.metadataCache = new HashMap<>();
this.fsView = getFileSystemView();
// profile the record statistics on construction
recordProfile();
}
@@ -190,7 +196,7 @@ public class WriteProfile {
if (!commitTimeline.empty()) { // if we have some commits
HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
List<HoodieBaseFile> allFiles = getFileSystemView()
List<HoodieBaseFile> allFiles = fsView
.getLatestBaseFilesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp()).collect(Collectors.toList());
for (HoodieBaseFile file : allFiles) {
@@ -243,6 +249,7 @@ public class WriteProfile {
return;
}
this.metaClient.reloadActiveTimeline();
this.fsView = getFileSystemView();
recordProfile();
cleanMetadataCache(this.metaClient.getCommitsTimeline().filterCompletedInstants().getInstants());
this.smallFilesMap.clear();

View File

@@ -367,12 +367,23 @@ public class StreamerUtil {
*/
@SuppressWarnings("rawtypes")
public static HoodieFlinkWriteClient createWriteClient(Configuration conf, RuntimeContext runtimeContext) {
return createWriteClient(conf, runtimeContext, true);
}
/**
* Creates the Flink write client.
*
* <p>This expects to be used by client, set flag {@code loadFsViewStorageConfig} to use
* remote filesystem view storage config, or an in-memory filesystem view storage is used.
*/
@SuppressWarnings("rawtypes")
public static HoodieFlinkWriteClient createWriteClient(Configuration conf, RuntimeContext runtimeContext, boolean loadFsViewStorageConfig) {
HoodieFlinkEngineContext context =
new HoodieFlinkEngineContext(
new SerializableConfiguration(getHadoopConf()),
new FlinkTaskContextSupplier(runtimeContext));
HoodieWriteConfig writeConfig = getHoodieClientConfig(conf, true);
HoodieWriteConfig writeConfig = getHoodieClientConfig(conf, loadFsViewStorageConfig);
return new HoodieFlinkWriteClient<>(context, writeConfig);
}