[HUDI-2072] Add pre-commit validator framework (#3153)
* [HUDI-2072] Add pre-commit validator framework * trigger Travis rebuild
This commit is contained in:
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.common.table.view;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Create PreCommitFileSystemView by only filtering instants that are of interest.
|
||||
* For example, we want to exclude
|
||||
* other inflight instants. This is achieved by combining
|
||||
* 1) FileSystemView with completed commits
|
||||
* 2) Using list of files written/replaced by inflight instant that we are validating
|
||||
*
|
||||
*/
|
||||
public class HoodieTablePreCommitFileSystemView {
|
||||
|
||||
private Map<String, List<String>> partitionToReplaceFileIds;
|
||||
private List<HoodieWriteStat> filesWritten;
|
||||
private String preCommitInstantTime;
|
||||
private SyncableFileSystemView completedCommitsFileSystemView;
|
||||
private HoodieTableMetaClient tableMetaClient;
|
||||
|
||||
/**
|
||||
* Create a file system view for the inflight commit that we are validating.
|
||||
*/
|
||||
public HoodieTablePreCommitFileSystemView(HoodieTableMetaClient metaClient,
|
||||
SyncableFileSystemView completedCommitsFileSystemView,
|
||||
List<HoodieWriteStat> filesWritten,
|
||||
Map<String, List<String>> partitionToReplaceFileIds,
|
||||
String instantTime) {
|
||||
this.completedCommitsFileSystemView = completedCommitsFileSystemView;
|
||||
this.filesWritten = filesWritten;
|
||||
this.partitionToReplaceFileIds = partitionToReplaceFileIds;
|
||||
this.preCommitInstantTime = instantTime;
|
||||
this.tableMetaClient = metaClient;
|
||||
}
|
||||
|
||||
/**
|
||||
* Combine committed base files + new files created/replaced for given partition.
|
||||
*/
|
||||
public final Stream<HoodieBaseFile> getLatestBaseFiles(String partitionStr) {
|
||||
// get fileIds replaced by current inflight commit
|
||||
List<String> replacedFileIdsForPartition = partitionToReplaceFileIds.getOrDefault(partitionStr, Collections.emptyList());
|
||||
|
||||
// get new files written by current inflight commit
|
||||
Map<String, HoodieBaseFile> newFilesWrittenForPartition = filesWritten.stream()
|
||||
.filter(file -> partitionStr.equals(file.getPartitionPath()))
|
||||
.collect(Collectors.toMap(HoodieWriteStat::getFileId, writeStat ->
|
||||
new HoodieBaseFile(new Path(tableMetaClient.getBasePath(), writeStat.getPath()).toString())));
|
||||
|
||||
Stream<HoodieBaseFile> committedBaseFiles = this.completedCommitsFileSystemView.getLatestBaseFiles(partitionStr);
|
||||
Stream<HoodieBaseFile> baseFilesForCommittedFileIds = committedBaseFiles
|
||||
// Remove files replaced by current inflight commit
|
||||
.filter(baseFile -> !replacedFileIdsForPartition.contains(baseFile.getFileId()))
|
||||
// if there is new version of file created by inflight commit, use that file instead of committed version
|
||||
.map(baseFile -> {
|
||||
HoodieBaseFile fileIdNewVersionExists = newFilesWrittenForPartition.remove(baseFile.getFileId());
|
||||
return Option.ofNullable(fileIdNewVersionExists).orElse(baseFile);
|
||||
});
|
||||
|
||||
Stream<HoodieBaseFile> baseFilesWithNewFileIds = newFilesWrittenForPartition.values().stream();
|
||||
return Stream.concat(baseFilesForCommittedFileIds, baseFilesWithNewFileIds);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.exception;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown for validation failures.
|
||||
* </p>
|
||||
*/
|
||||
public class HoodieValidationException extends HoodieException {
|
||||
|
||||
public HoodieValidationException(String msg, Throwable t) {
|
||||
super(msg, t);
|
||||
}
|
||||
|
||||
public HoodieValidationException(String msg) {
|
||||
super(msg);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user