1
0

[HUDI-294] Delete Paths written in Cleaner plan needs to be relative to partition-path (#1062)

[HUDI-294] Delete Paths written in Cleaner plan needs to be relative to partition-path
This commit is contained in:
leesf
2019-12-04 02:11:03 +08:00
committed by Balaji Varadarajan
parent 845e261658
commit 98ab33bb6e
11 changed files with 425 additions and 35 deletions

View File

@@ -32,6 +32,7 @@ import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.AvroUtils;
import org.apache.hudi.common.util.CompactionUtils;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TimelineDiffHelper;
import org.apache.hudi.common.util.TimelineDiffHelper.TimelineDiffResult;
@@ -261,7 +262,13 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
HoodieCleanMetadata cleanMetadata =
AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(instant).get());
cleanMetadata.getPartitionMetadata().entrySet().stream().forEach(entry -> {
removeFileSlicesForPartition(timeline, instant, entry.getKey(), entry.getValue().getSuccessDeleteFiles());
final String basePath = metaClient.getBasePath();
final String partitionPath = entry.getValue().getPartitionPath();
List<String> fullPathList = entry.getValue().getSuccessDeleteFiles()
.stream().map(fileName -> new Path(FSUtils
.getPartitionPath(basePath, partitionPath), fileName).toString())
.collect(Collectors.toList());
removeFileSlicesForPartition(timeline, instant, entry.getKey(), fullPathList);
});
log.info("Done Syncing cleaner instant (" + instant + ")");
}

View File

@@ -19,7 +19,6 @@
package org.apache.hudi.common.util;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanPartitionMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
@@ -27,7 +26,6 @@ import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.avro.model.HoodieRollbackPartitionMetadata;
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
import org.apache.hudi.avro.model.HoodieSavepointPartitionMetadata;
import org.apache.hudi.common.HoodieCleanStat;
import org.apache.hudi.common.HoodieRollbackStat;
import com.google.common.base.Preconditions;
@@ -52,26 +50,6 @@ public class AvroUtils {
private static final Integer DEFAULT_VERSION = 1;
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime, Option<Long> durationInMs,
List<HoodieCleanStat> cleanStats) {
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder = ImmutableMap.builder();
int totalDeleted = 0;
String earliestCommitToRetain = null;
for (HoodieCleanStat stat : cleanStats) {
HoodieCleanPartitionMetadata metadata =
new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(), stat.getDeletePathPatterns());
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
totalDeleted += stat.getSuccessDeleteFiles().size();
if (earliestCommitToRetain == null) {
// This will be the same for all partitions
earliestCommitToRetain = stat.getEarliestCommitToRetain();
}
}
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L), totalDeleted,
earliestCommitToRetain, partitionMetadataBuilder.build(), DEFAULT_VERSION);
}
public static HoodieRestoreMetadata convertRestoreMetadata(String startRestoreTime, Option<Long> durationInMs,
List<String> commits, Map<String, List<HoodieRollbackStat>> commitToStats) {
ImmutableMap.Builder<String, List<HoodieRollbackMetadata>> commitToStatBuilder = ImmutableMap.builder();

View File

@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.util;
import com.google.common.collect.ImmutableMap;
import java.util.List;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanPartitionMetadata;
import org.apache.hudi.common.HoodieCleanStat;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.versioning.clean.CleanMetadataMigrator;
import org.apache.hudi.common.versioning.clean.CleanV1MigrationHandler;
import org.apache.hudi.common.versioning.clean.CleanV2MigrationHandler;
public class CleanerUtils {
public static final Integer CLEAN_METADATA_VERSION_1 = CleanV1MigrationHandler.VERSION;
public static final Integer CLEAN_METADATA_VERSION_2 = CleanV2MigrationHandler.VERSION;
public static final Integer LATEST_CLEAN_METADATA_VERSION = CLEAN_METADATA_VERSION_2;
public static HoodieCleanMetadata convertCleanMetadata(HoodieTableMetaClient metaClient,
String startCleanTime, Option<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder = ImmutableMap.builder();
int totalDeleted = 0;
String earliestCommitToRetain = null;
for (HoodieCleanStat stat : cleanStats) {
HoodieCleanPartitionMetadata metadata =
new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
totalDeleted += stat.getSuccessDeleteFiles().size();
if (earliestCommitToRetain == null) {
// This will be the same for all partitions
earliestCommitToRetain = stat.getEarliestCommitToRetain();
}
}
HoodieCleanMetadata metadata = new HoodieCleanMetadata(startCleanTime,
durationInMs.orElseGet(() -> -1L), totalDeleted, earliestCommitToRetain,
partitionMetadataBuilder.build(), CLEAN_METADATA_VERSION_1);
CleanMetadataMigrator metadataMigrator = new CleanMetadataMigrator(metaClient);
return metadataMigrator.upgradeToLatest(metadata, metadata.getVersion());
}
}

View File

@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.versioning.clean;
import java.util.Arrays;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.versioning.MetadataMigrator;
public class CleanMetadataMigrator extends MetadataMigrator<HoodieCleanMetadata> {
public CleanMetadataMigrator(HoodieTableMetaClient metaClient) {
super(metaClient,
Arrays
.asList(new CleanV1MigrationHandler(metaClient),
new CleanV2MigrationHandler(metaClient)));
}
}

View File

@@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.versioning.clean;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanPartitionMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.common.versioning.AbstractMigratorBase;
import com.google.common.base.Preconditions;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.Path;
public class CleanV1MigrationHandler extends AbstractMigratorBase<HoodieCleanMetadata> {
public static final Integer VERSION = 1;
public CleanV1MigrationHandler(HoodieTableMetaClient metaClient) {
super(metaClient);
}
@Override
public Integer getManagedVersion() {
return VERSION;
}
@Override
public HoodieCleanMetadata upgradeFrom(HoodieCleanMetadata input) {
throw new IllegalArgumentException(
"This is the lowest version. Input cannot be any lower version");
}
@Override
public HoodieCleanMetadata downgradeFrom(HoodieCleanMetadata input) {
Preconditions.checkArgument(input.getVersion() == 2,
"Input version is " + input.getVersion() + ". Must be 2");
final Path basePath = new Path(metaClient.getBasePath());
final Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = input
.getPartitionMetadata()
.entrySet().stream().map(entry -> {
final String partitionPath = entry.getKey();
final HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
HoodieCleanPartitionMetadata cleanPartitionMetadata = HoodieCleanPartitionMetadata
.newBuilder()
.setDeletePathPatterns(partitionMetadata.getDeletePathPatterns().stream()
.map(
path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path))
.collect(Collectors.toList()))
.setSuccessDeleteFiles(partitionMetadata.getSuccessDeleteFiles().stream()
.map(
path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path))
.collect(Collectors.toList())).setPartitionPath(partitionPath)
.setFailedDeleteFiles(partitionMetadata.getFailedDeleteFiles().stream()
.map(
path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path))
.collect(Collectors.toList()))
.setPolicy(partitionMetadata.getPolicy()).setPartitionPath(partitionPath)
.build();
return Pair.of(partitionPath, cleanPartitionMetadata);
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
HoodieCleanMetadata metadata = HoodieCleanMetadata.newBuilder()
.setEarliestCommitToRetain(input.getEarliestCommitToRetain())
.setStartCleanTime(input.getStartCleanTime())
.setTimeTakenInMillis(input.getTimeTakenInMillis())
.setTotalFilesDeleted(input.getTotalFilesDeleted())
.setPartitionMetadata(partitionMetadataMap)
.setVersion(getManagedVersion()).build();
return metadata;
}
private static String convertToV1Path(Path basePath, String partitionPath, String fileName) {
if ((fileName == null) || (fileName.isEmpty())) {
return fileName;
}
return new Path(FSUtils.getPartitionPath(basePath, partitionPath), fileName).toString();
}
}

View File

@@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.versioning.clean;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanPartitionMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.common.versioning.AbstractMigratorBase;
import com.google.common.base.Preconditions;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.Path;
public class CleanV2MigrationHandler extends AbstractMigratorBase<HoodieCleanMetadata> {
public static final Integer VERSION = 2;
public CleanV2MigrationHandler(HoodieTableMetaClient metaClient) {
super(metaClient);
}
@Override
public Integer getManagedVersion() {
return VERSION;
}
@Override
public HoodieCleanMetadata upgradeFrom(HoodieCleanMetadata input) {
Preconditions.checkArgument(input.getVersion() == 1,
"Input version is " + input.getVersion() + ". Must be 1");
HoodieCleanMetadata metadata = new HoodieCleanMetadata();
metadata.setEarliestCommitToRetain(input.getEarliestCommitToRetain());
metadata.setTimeTakenInMillis(input.getTimeTakenInMillis());
metadata.setStartCleanTime(input.getStartCleanTime());
metadata.setTotalFilesDeleted(input.getTotalFilesDeleted());
metadata.setVersion(getManagedVersion());
Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = input.getPartitionMetadata()
.entrySet()
.stream().map(entry -> {
final String partitionPath = entry.getKey();
final HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
final List<String> deletePathPatterns = convertToV2Path(
partitionMetadata.getDeletePathPatterns());
final List<String> successDeleteFiles = convertToV2Path(
partitionMetadata.getSuccessDeleteFiles());
final List<String> failedDeleteFiles = convertToV2Path(
partitionMetadata.getFailedDeleteFiles());
final HoodieCleanPartitionMetadata cleanPartitionMetadata = HoodieCleanPartitionMetadata
.newBuilder().setPolicy(partitionMetadata.getPolicy())
.setPartitionPath(partitionMetadata.getPartitionPath())
.setDeletePathPatterns(deletePathPatterns)
.setSuccessDeleteFiles(successDeleteFiles)
.setFailedDeleteFiles(failedDeleteFiles).build();
return Pair.of(partitionPath, cleanPartitionMetadata);
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
return HoodieCleanMetadata.newBuilder()
.setEarliestCommitToRetain(input.getEarliestCommitToRetain())
.setStartCleanTime(input.getStartCleanTime())
.setTimeTakenInMillis(input.getTimeTakenInMillis())
.setTotalFilesDeleted(input.getTotalFilesDeleted())
.setPartitionMetadata(partitionMetadataMap).setVersion(getManagedVersion()).build();
}
@Override
public HoodieCleanMetadata downgradeFrom(HoodieCleanMetadata input) {
throw new IllegalArgumentException(
"This is the current highest version. Input cannot be any higher version");
}
private List<String> convertToV2Path(List<String> paths) {
return paths.stream().map(path -> new Path(path).getName())
.collect(Collectors.toList());
}
}

View File

@@ -33,6 +33,7 @@ import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
import org.apache.hudi.common.util.AvroUtils;
import org.apache.hudi.common.util.CleanerUtils;
import org.apache.hudi.common.util.CompactionUtils;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.HoodieAvroUtils;
@@ -269,7 +270,8 @@ public class HoodieTestUtils {
.exists();
}
public static void createCleanFiles(String basePath, String commitTime, Configuration configuration)
public static void createCleanFiles(HoodieTableMetaClient metaClient, String basePath,
String commitTime, Configuration configuration)
throws IOException {
Path commitFile = new Path(
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCleanerFileName(commitTime));
@@ -280,8 +282,9 @@ public class HoodieTestUtils {
DEFAULT_PARTITION_PATHS[rand.nextInt(DEFAULT_PARTITION_PATHS.length)], new ArrayList<>(), new ArrayList<>(),
new ArrayList<>(), commitTime);
// Create the clean metadata
HoodieCleanMetadata cleanMetadata =
AvroUtils.convertCleanMetadata(commitTime, Option.of(0L), Arrays.asList(cleanStats));
CleanerUtils.convertCleanMetadata(metaClient, commitTime, Option.of(0L), Arrays.asList(cleanStats));
// Write empty clean metadata
os.write(AvroUtils.serializeCleanMetadata(cleanMetadata).get());
} finally {
@@ -289,8 +292,9 @@ public class HoodieTestUtils {
}
}
public static void createCleanFiles(String basePath, String commitTime) throws IOException {
createCleanFiles(basePath, commitTime, HoodieTestUtils.getDefaultHadoopConf());
public static void createCleanFiles(HoodieTableMetaClient metaClient,
String basePath, String commitTime) throws IOException {
createCleanFiles(metaClient, basePath, commitTime, HoodieTestUtils.getDefaultHadoopConf());
}
public static String makeTestFileName(String instant) {

View File

@@ -40,6 +40,7 @@ import org.apache.hudi.common.table.SyncableFileSystemView;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
import org.apache.hudi.common.util.AvroUtils;
import org.apache.hudi.common.util.CleanerUtils;
import org.apache.hudi.common.util.CompactionUtils;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option;
@@ -417,7 +418,8 @@ public class TestIncrementalFSViewSync extends HoodieCommonTestHarness {
new ArrayList<>(), Integer.toString(Integer.parseInt(instant) + 1));
}).collect(Collectors.toList());
HoodieCleanMetadata cleanMetadata = AvroUtils.convertCleanMetadata(cleanInstant, Option.empty(), cleanStats);
HoodieCleanMetadata cleanMetadata = CleanerUtils
.convertCleanMetadata(metaClient, cleanInstant, Option.empty(), cleanStats);
metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.CLEAN_ACTION, cleanInstant),
AvroUtils.serializeCleanMetadata(cleanMetadata));
}