1
0

[HUDI-2072] Add pre-commit validator framework (#3153)

* [HUDI-2072] Add pre-commit validator framework

* trigger Travis rebuild
This commit is contained in:
satishkotha
2021-08-03 12:07:45 -07:00
committed by GitHub
parent bec23bda50
commit 826a04d142
14 changed files with 1130 additions and 32 deletions

View File

@@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.table.view;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.Option;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Create PreCommitFileSystemView by only filtering instants that are of interest.
* For example, we want to exclude
* other inflight instants. This is achieved by combining
* 1) FileSystemView with completed commits
* 2) Using list of files written/replaced by inflight instant that we are validating
*
*/
public class HoodieTablePreCommitFileSystemView {
private Map<String, List<String>> partitionToReplaceFileIds;
private List<HoodieWriteStat> filesWritten;
private String preCommitInstantTime;
private SyncableFileSystemView completedCommitsFileSystemView;
private HoodieTableMetaClient tableMetaClient;
/**
* Create a file system view for the inflight commit that we are validating.
*/
public HoodieTablePreCommitFileSystemView(HoodieTableMetaClient metaClient,
SyncableFileSystemView completedCommitsFileSystemView,
List<HoodieWriteStat> filesWritten,
Map<String, List<String>> partitionToReplaceFileIds,
String instantTime) {
this.completedCommitsFileSystemView = completedCommitsFileSystemView;
this.filesWritten = filesWritten;
this.partitionToReplaceFileIds = partitionToReplaceFileIds;
this.preCommitInstantTime = instantTime;
this.tableMetaClient = metaClient;
}
/**
* Combine committed base files + new files created/replaced for given partition.
*/
public final Stream<HoodieBaseFile> getLatestBaseFiles(String partitionStr) {
// get fileIds replaced by current inflight commit
List<String> replacedFileIdsForPartition = partitionToReplaceFileIds.getOrDefault(partitionStr, Collections.emptyList());
// get new files written by current inflight commit
Map<String, HoodieBaseFile> newFilesWrittenForPartition = filesWritten.stream()
.filter(file -> partitionStr.equals(file.getPartitionPath()))
.collect(Collectors.toMap(HoodieWriteStat::getFileId, writeStat ->
new HoodieBaseFile(new Path(tableMetaClient.getBasePath(), writeStat.getPath()).toString())));
Stream<HoodieBaseFile> committedBaseFiles = this.completedCommitsFileSystemView.getLatestBaseFiles(partitionStr);
Stream<HoodieBaseFile> baseFilesForCommittedFileIds = committedBaseFiles
// Remove files replaced by current inflight commit
.filter(baseFile -> !replacedFileIdsForPartition.contains(baseFile.getFileId()))
// if there is new version of file created by inflight commit, use that file instead of committed version
.map(baseFile -> {
HoodieBaseFile fileIdNewVersionExists = newFilesWrittenForPartition.remove(baseFile.getFileId());
return Option.ofNullable(fileIdNewVersionExists).orElse(baseFile);
});
Stream<HoodieBaseFile> baseFilesWithNewFileIds = newFilesWrittenForPartition.values().stream();
return Stream.concat(baseFilesForCommittedFileIds, baseFilesWithNewFileIds);
}
}

View File

@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.exception;
/**
* <p>
* Exception thrown for validation failures.
* </p>
*/
public class HoodieValidationException extends HoodieException {
public HoodieValidationException(String msg, Throwable t) {
super(msg, t);
}
public HoodieValidationException(String msg) {
super(msg);
}
}