[HUDI-808] Support cleaning bootstrap source data (#1870)
Co-authored-by: Wenning Ding <wenningd@amazon.com> Co-authored-by: Balaji Varadarajan <vbalaji@apache.org>
This commit is contained in:
@@ -24,23 +24,22 @@
|
||||
{"name": "totalFilesDeleted", "type": "int"},
|
||||
{"name": "earliestCommitToRetain", "type": "string"},
|
||||
{"name": "partitionMetadata", "type": {
|
||||
"type" : "map", "values" : {
|
||||
"type": "record",
|
||||
"name": "HoodieCleanPartitionMetadata",
|
||||
"fields": [
|
||||
{"name": "partitionPath", "type": "string"},
|
||||
{"name": "policy", "type": "string"},
|
||||
{"name": "deletePathPatterns", "type": {"type": "array", "items": "string"}},
|
||||
{"name": "successDeleteFiles", "type": {"type": "array", "items": "string"}},
|
||||
{"name": "failedDeleteFiles", "type": {"type": "array", "items": "string"}}
|
||||
]
|
||||
}
|
||||
}
|
||||
"type" : "map", "values" : "HoodieCleanPartitionMetadata"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name":"version",
|
||||
"type":["int", "null"],
|
||||
"default": 1
|
||||
},
|
||||
{
|
||||
"name": "bootstrapPartitionMetadata",
|
||||
"type": [ "null", {
|
||||
"type" : "map",
|
||||
"values" : "HoodieCleanPartitionMetadata",
|
||||
"default" : null
|
||||
}],
|
||||
"default" : null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
29
hudi-common/src/main/avro/HoodieCleanPartitionMetadata.avsc
Normal file
29
hudi-common/src/main/avro/HoodieCleanPartitionMetadata.avsc
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
{
|
||||
"namespace": "org.apache.hudi.avro.model",
|
||||
"type": "record",
|
||||
"name": "HoodieCleanPartitionMetadata",
|
||||
"fields": [
|
||||
{"name": "partitionPath", "type": "string"},
|
||||
{"name": "policy", "type": "string"},
|
||||
{"name": "deletePathPatterns", "type": {"type": "array", "items": "string"}},
|
||||
{"name": "successDeleteFiles", "type": {"type": "array", "items": "string"}},
|
||||
{"name": "failedDeleteFiles", "type": {"type": "array", "items": "string"}}
|
||||
]
|
||||
}
|
||||
@@ -47,6 +47,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
/** This is deprecated and replaced by the field filePathsToBeDeletedPerPartition **/
|
||||
"name": "filesToBeDeletedPerPartition",
|
||||
"type": [
|
||||
"null", {
|
||||
@@ -64,6 +65,33 @@
|
||||
"name":"version",
|
||||
"type":["int", "null"],
|
||||
"default": 1
|
||||
},
|
||||
{
|
||||
"name": "filePathsToBeDeletedPerPartition",
|
||||
"doc": "This field replaces the field filesToBeDeletedPerPartition",
|
||||
"type": [
|
||||
"null", {
|
||||
"type":"map",
|
||||
"values": {
|
||||
"type":"array",
|
||||
"items":{
|
||||
"name":"HoodieCleanFileInfo",
|
||||
"type": "record",
|
||||
"fields":[
|
||||
{
|
||||
"name":"filePath",
|
||||
"type":["null","string"],
|
||||
"default": null
|
||||
},
|
||||
{
|
||||
"name":"isBootstrapBaseFile",
|
||||
"type":["null","boolean"],
|
||||
"default": null
|
||||
}
|
||||
]
|
||||
}
|
||||
}}],
|
||||
"default" : null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ package org.apache.hudi.common;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieCleaningPolicy;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
import java.io.Serializable;
|
||||
@@ -39,17 +40,35 @@ public class HoodieCleanStat implements Serializable {
|
||||
private final List<String> successDeleteFiles;
|
||||
// Files that could not be deleted
|
||||
private final List<String> failedDeleteFiles;
|
||||
// Bootstrap Base Path patterns that were generated for the delete operation
|
||||
private final List<String> deleteBootstrapBasePathPatterns;
|
||||
private final List<String> successDeleteBootstrapBaseFiles;
|
||||
// Files that could not be deleted
|
||||
private final List<String> failedDeleteBootstrapBaseFiles;
|
||||
// Earliest commit that was retained in this clean
|
||||
private final String earliestCommitToRetain;
|
||||
|
||||
public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath, List<String> deletePathPatterns,
|
||||
List<String> successDeleteFiles, List<String> failedDeleteFiles, String earliestCommitToRetain) {
|
||||
this(policy, partitionPath, deletePathPatterns, successDeleteFiles, failedDeleteFiles, earliestCommitToRetain,
|
||||
CollectionUtils.createImmutableList(), CollectionUtils.createImmutableList(),
|
||||
CollectionUtils.createImmutableList());
|
||||
}
|
||||
|
||||
public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath, List<String> deletePathPatterns,
|
||||
List<String> successDeleteFiles, List<String> failedDeleteFiles,
|
||||
String earliestCommitToRetain, List<String> deleteBootstrapBasePathPatterns,
|
||||
List<String> successDeleteBootstrapBaseFiles,
|
||||
List<String> failedDeleteBootstrapBaseFiles) {
|
||||
this.policy = policy;
|
||||
this.partitionPath = partitionPath;
|
||||
this.deletePathPatterns = deletePathPatterns;
|
||||
this.successDeleteFiles = successDeleteFiles;
|
||||
this.failedDeleteFiles = failedDeleteFiles;
|
||||
this.earliestCommitToRetain = earliestCommitToRetain;
|
||||
this.deleteBootstrapBasePathPatterns = deleteBootstrapBasePathPatterns;
|
||||
this.successDeleteBootstrapBaseFiles = successDeleteBootstrapBaseFiles;
|
||||
this.failedDeleteBootstrapBaseFiles = failedDeleteBootstrapBaseFiles;
|
||||
}
|
||||
|
||||
public HoodieCleaningPolicy getPolicy() {
|
||||
@@ -72,6 +91,18 @@ public class HoodieCleanStat implements Serializable {
|
||||
return failedDeleteFiles;
|
||||
}
|
||||
|
||||
public List<String> getDeleteBootstrapBasePathPatterns() {
|
||||
return deleteBootstrapBasePathPatterns;
|
||||
}
|
||||
|
||||
public List<String> getSuccessDeleteBootstrapBaseFiles() {
|
||||
return successDeleteBootstrapBaseFiles;
|
||||
}
|
||||
|
||||
public List<String> getFailedDeleteBootstrapBaseFiles() {
|
||||
return failedDeleteBootstrapBaseFiles;
|
||||
}
|
||||
|
||||
public String getEarliestCommitToRetain() {
|
||||
return earliestCommitToRetain;
|
||||
}
|
||||
@@ -91,6 +122,9 @@ public class HoodieCleanStat implements Serializable {
|
||||
private List<String> failedDeleteFiles;
|
||||
private String partitionPath;
|
||||
private String earliestCommitToRetain;
|
||||
private List<String> deleteBootstrapBasePathPatterns;
|
||||
private List<String> successDeleteBootstrapBaseFiles;
|
||||
private List<String> failedDeleteBootstrapBaseFiles;
|
||||
|
||||
public Builder withPolicy(HoodieCleaningPolicy policy) {
|
||||
this.policy = policy;
|
||||
@@ -112,6 +146,21 @@ public class HoodieCleanStat implements Serializable {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withDeleteBootstrapBasePathPatterns(List<String> deletePathPatterns) {
|
||||
this.deleteBootstrapBasePathPatterns = deletePathPatterns;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withSuccessfulDeleteBootstrapBaseFiles(List<String> successDeleteFiles) {
|
||||
this.successDeleteBootstrapBaseFiles = successDeleteFiles;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withFailedDeleteBootstrapBaseFiles(List<String> failedDeleteFiles) {
|
||||
this.failedDeleteBootstrapBaseFiles = failedDeleteFiles;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withPartitionPath(String partitionPath) {
|
||||
this.partitionPath = partitionPath;
|
||||
return this;
|
||||
@@ -125,7 +174,8 @@ public class HoodieCleanStat implements Serializable {
|
||||
|
||||
public HoodieCleanStat build() {
|
||||
return new HoodieCleanStat(policy, partitionPath, deletePathPatterns, successDeleteFiles, failedDeleteFiles,
|
||||
earliestCommitToRetain);
|
||||
earliestCommitToRetain, deleteBootstrapBasePathPatterns, successDeleteBootstrapBaseFiles,
|
||||
failedDeleteBootstrapBaseFiles);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -137,7 +187,10 @@ public class HoodieCleanStat implements Serializable {
|
||||
+ ", deletePathPatterns=" + deletePathPatterns
|
||||
+ ", successDeleteFiles=" + successDeleteFiles
|
||||
+ ", failedDeleteFiles=" + failedDeleteFiles
|
||||
+ ", earliestCommitToRetain='" + earliestCommitToRetain + '\''
|
||||
+ ", earliestCommitToRetain='" + earliestCommitToRetain
|
||||
+ ", deleteBootstrapBasePathPatterns=" + deleteBootstrapBasePathPatterns
|
||||
+ ", successDeleteBootstrapBaseFiles=" + successDeleteBootstrapBaseFiles
|
||||
+ ", failedDeleteBootstrapBaseFiles=" + failedDeleteBootstrapBaseFiles + '\''
|
||||
+ '}';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.common.model;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieCleanFileInfo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class CleanFileInfo implements Serializable {
|
||||
|
||||
private final String filePath;
|
||||
private final boolean isBootstrapBaseFile;
|
||||
|
||||
public CleanFileInfo(String filePath, boolean isBootstrapBaseFile) {
|
||||
this.filePath = filePath;
|
||||
this.isBootstrapBaseFile = isBootstrapBaseFile;
|
||||
}
|
||||
|
||||
public String getFilePath() {
|
||||
return filePath;
|
||||
}
|
||||
|
||||
public boolean isBootstrapBaseFile() {
|
||||
return isBootstrapBaseFile;
|
||||
}
|
||||
|
||||
public HoodieCleanFileInfo toHoodieFileCleanInfo() {
|
||||
return new HoodieCleanFileInfo(filePath, isBootstrapBaseFile);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ public class CleanMetadataMigrator extends MetadataMigrator<HoodieCleanMetadata>
|
||||
public CleanMetadataMigrator(HoodieTableMetaClient metaClient) {
|
||||
super(metaClient,
|
||||
Arrays
|
||||
.asList(new CleanV1MigrationHandler(metaClient),
|
||||
new CleanV2MigrationHandler(metaClient)));
|
||||
.asList(new CleanMetadataV1MigrationHandler(metaClient),
|
||||
new CleanMetadataV2MigrationHandler(metaClient)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,11 +31,11 @@ import org.apache.hadoop.fs.Path;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class CleanV1MigrationHandler extends AbstractMigratorBase<HoodieCleanMetadata> {
|
||||
public class CleanMetadataV1MigrationHandler extends AbstractMigratorBase<HoodieCleanMetadata> {
|
||||
|
||||
public static final Integer VERSION = 1;
|
||||
|
||||
public CleanV1MigrationHandler(HoodieTableMetaClient metaClient) {
|
||||
public CleanMetadataV1MigrationHandler(HoodieTableMetaClient metaClient) {
|
||||
super(metaClient);
|
||||
}
|
||||
|
||||
@@ -31,11 +31,11 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class CleanV2MigrationHandler extends AbstractMigratorBase<HoodieCleanMetadata> {
|
||||
public class CleanMetadataV2MigrationHandler extends AbstractMigratorBase<HoodieCleanMetadata> {
|
||||
|
||||
public static final Integer VERSION = 2;
|
||||
|
||||
public CleanV2MigrationHandler(HoodieTableMetaClient metaClient) {
|
||||
public CleanMetadataV2MigrationHandler(HoodieTableMetaClient metaClient) {
|
||||
super(metaClient);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.common.table.timeline.versioning.clean;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.versioning.MetadataMigrator;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Manages upgrade/downgrade of cleaner plan.
|
||||
*/
|
||||
public class CleanPlanMigrator extends MetadataMigrator<HoodieCleanerPlan> {
|
||||
|
||||
public CleanPlanMigrator(HoodieTableMetaClient metaClient) {
|
||||
super(metaClient,
|
||||
Arrays.asList(new CleanPlanV1MigrationHandler(metaClient), new CleanPlanV2MigrationHandler(metaClient)));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.common.table.timeline.versioning.clean;
|
||||
|
||||
import java.util.HashMap;
|
||||
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.versioning.AbstractMigratorBase;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class CleanPlanV1MigrationHandler extends AbstractMigratorBase<HoodieCleanerPlan> {
|
||||
|
||||
public static final Integer VERSION = 1;
|
||||
|
||||
public CleanPlanV1MigrationHandler(HoodieTableMetaClient metaClient) {
|
||||
super(metaClient);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer getManagedVersion() {
|
||||
return VERSION;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieCleanerPlan upgradeFrom(HoodieCleanerPlan plan) {
|
||||
throw new IllegalArgumentException(
|
||||
"This is the lowest version. Plan cannot be any lower version");
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieCleanerPlan downgradeFrom(HoodieCleanerPlan plan) {
|
||||
if (metaClient.getTableConfig().getBootstrapBasePath().isPresent()) {
|
||||
throw new IllegalArgumentException(
|
||||
"This version do not support METADATA_ONLY bootstrapped tables. Failed to downgrade.");
|
||||
}
|
||||
Map<String, List<String>> filesPerPartition = plan.getFilePathsToBeDeletedPerPartition().entrySet().stream()
|
||||
.map(e -> {
|
||||
return Pair.of(e.getKey(), e.getValue().stream().map(v -> new Path(v.getFilePath()).getName())
|
||||
.collect(Collectors.toList()));
|
||||
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
return new HoodieCleanerPlan(plan.getEarliestInstantToRetain(), plan.getPolicy(), filesPerPartition, VERSION,
|
||||
new HashMap<>());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.common.table.timeline.versioning.clean;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieCleanFileInfo;
|
||||
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.versioning.AbstractMigratorBase;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class CleanPlanV2MigrationHandler extends AbstractMigratorBase<HoodieCleanerPlan> {
|
||||
|
||||
public static final Integer VERSION = 2;
|
||||
|
||||
public CleanPlanV2MigrationHandler(HoodieTableMetaClient metaClient) {
|
||||
super(metaClient);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer getManagedVersion() {
|
||||
return VERSION;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieCleanerPlan upgradeFrom(HoodieCleanerPlan plan) {
|
||||
Map<String, List<HoodieCleanFileInfo>> filePathsPerPartition =
|
||||
plan.getFilesToBeDeletedPerPartition().entrySet().stream().map(e -> Pair.of(e.getKey(), e.getValue().stream()
|
||||
.map(v -> new HoodieCleanFileInfo(
|
||||
new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), e.getKey()), v).toString(), false))
|
||||
.collect(Collectors.toList()))).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
return new HoodieCleanerPlan(plan.getEarliestInstantToRetain(), plan.getPolicy(), new HashMap<>(), VERSION,
|
||||
filePathsPerPartition);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieCleanerPlan downgradeFrom(HoodieCleanerPlan input) {
|
||||
throw new IllegalArgumentException(
|
||||
"This is the current highest version. Plan cannot be any higher version");
|
||||
}
|
||||
}
|
||||
@@ -252,7 +252,8 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
|
||||
}
|
||||
|
||||
/**
|
||||
* Add newly found clean instant.
|
||||
* Add newly found clean instant. Note that cleaner metadata (.clean.completed)
|
||||
* contains only relative paths unlike clean plans (.clean.requested) which contains absolute paths.
|
||||
*
|
||||
* @param timeline Timeline
|
||||
* @param instant Clean instant
|
||||
|
||||
@@ -18,16 +18,20 @@
|
||||
|
||||
package org.apache.hudi.common.util;
|
||||
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hudi.avro.model.HoodieCleanFileInfo;
|
||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieCleanPartitionMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
||||
import org.apache.hudi.common.HoodieCleanStat;
|
||||
import org.apache.hudi.common.model.CleanFileInfo;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||
import org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataMigrator;
|
||||
import org.apache.hudi.common.table.timeline.versioning.clean.CleanV1MigrationHandler;
|
||||
import org.apache.hudi.common.table.timeline.versioning.clean.CleanV2MigrationHandler;
|
||||
import org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataV1MigrationHandler;
|
||||
import org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataV2MigrationHandler;
|
||||
import org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanMigrator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
@@ -35,14 +39,16 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class CleanerUtils {
|
||||
public static final Integer CLEAN_METADATA_VERSION_1 = CleanV1MigrationHandler.VERSION;
|
||||
public static final Integer CLEAN_METADATA_VERSION_2 = CleanV2MigrationHandler.VERSION;
|
||||
public static final Integer CLEAN_METADATA_VERSION_1 = CleanMetadataV1MigrationHandler.VERSION;
|
||||
public static final Integer CLEAN_METADATA_VERSION_2 = CleanMetadataV2MigrationHandler.VERSION;
|
||||
public static final Integer LATEST_CLEAN_METADATA_VERSION = CLEAN_METADATA_VERSION_2;
|
||||
|
||||
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime,
|
||||
Option<Long> durationInMs,
|
||||
List<HoodieCleanStat> cleanStats) {
|
||||
Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = new HashMap<>();
|
||||
Map<String, HoodieCleanPartitionMetadata> partitionBootstrapMetadataMap = new HashMap<>();
|
||||
|
||||
int totalDeleted = 0;
|
||||
String earliestCommitToRetain = null;
|
||||
for (HoodieCleanStat stat : cleanStats) {
|
||||
@@ -50,6 +56,13 @@ public class CleanerUtils {
|
||||
new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
|
||||
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
|
||||
partitionMetadataMap.put(stat.getPartitionPath(), metadata);
|
||||
if ((null != stat.getDeleteBootstrapBasePathPatterns())
|
||||
&& (!stat.getDeleteBootstrapBasePathPatterns().isEmpty())) {
|
||||
HoodieCleanPartitionMetadata bootstrapMetadata = new HoodieCleanPartitionMetadata(stat.getPartitionPath(),
|
||||
stat.getPolicy().name(), stat.getDeleteBootstrapBasePathPatterns(), stat.getSuccessDeleteBootstrapBaseFiles(),
|
||||
stat.getFailedDeleteBootstrapBaseFiles());
|
||||
partitionBootstrapMetadataMap.put(stat.getPartitionPath(), bootstrapMetadata);
|
||||
}
|
||||
totalDeleted += stat.getSuccessDeleteFiles().size();
|
||||
if (earliestCommitToRetain == null) {
|
||||
// This will be the same for all partitions
|
||||
@@ -57,8 +70,8 @@ public class CleanerUtils {
|
||||
}
|
||||
}
|
||||
|
||||
return new HoodieCleanMetadata(startCleanTime,
|
||||
durationInMs.orElseGet(() -> -1L), totalDeleted, earliestCommitToRetain, partitionMetadataMap, CLEAN_METADATA_VERSION_2);
|
||||
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L), totalDeleted,
|
||||
earliestCommitToRetain, partitionMetadataMap, CLEAN_METADATA_VERSION_2, partitionBootstrapMetadataMap);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -77,7 +90,7 @@ public class CleanerUtils {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Cleaner Plan corresponding to a clean instant.
|
||||
* Get Latest version of cleaner plan corresponding to a clean instant.
|
||||
* @param metaClient Hoodie Table Meta Client
|
||||
* @param cleanInstant Instant referring to clean action
|
||||
* @return Cleaner plan corresponding to clean instant
|
||||
@@ -85,7 +98,18 @@ public class CleanerUtils {
|
||||
*/
|
||||
public static HoodieCleanerPlan getCleanerPlan(HoodieTableMetaClient metaClient, HoodieInstant cleanInstant)
|
||||
throws IOException {
|
||||
return TimelineMetadataUtils.deserializeAvroMetadata(metaClient.getActiveTimeline().readCleanerInfoAsBytes(cleanInstant).get(),
|
||||
HoodieCleanerPlan.class);
|
||||
CleanPlanMigrator cleanPlanMigrator = new CleanPlanMigrator(metaClient);
|
||||
HoodieCleanerPlan cleanerPlan = TimelineMetadataUtils.deserializeAvroMetadata(
|
||||
metaClient.getActiveTimeline().readCleanerInfoAsBytes(cleanInstant).get(), HoodieCleanerPlan.class);
|
||||
return cleanPlanMigrator.upgradeToLatest(cleanerPlan, cleanerPlan.getVersion());
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert list of cleanFileInfo instances to list of avro-generated HoodieCleanFileInfo instances.
|
||||
* @param cleanFileInfoList
|
||||
* @return
|
||||
*/
|
||||
public static List<HoodieCleanFileInfo> convertToHoodieCleanFileInfoList(List<CleanFileInfo> cleanFileInfoList) {
|
||||
return cleanFileInfoList.stream().map(CleanFileInfo::toHoodieFileCleanInfo).collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user