[HUDI-294] Delete Paths written in Cleaner plan needs to be relative to partition-path (#1062)
[HUDI-294] Delete Paths written in Cleaner plan needs to be relative to partition-path
This commit is contained in:
committed by
Balaji Varadarajan
parent
845e261658
commit
98ab33bb6e
@@ -23,11 +23,13 @@ import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
|||||||
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
||||||
import org.apache.hudi.common.HoodieCleanStat;
|
import org.apache.hudi.common.HoodieCleanStat;
|
||||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||||
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
import org.apache.hudi.common.table.HoodieTimeline;
|
import org.apache.hudi.common.table.HoodieTimeline;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
||||||
import org.apache.hudi.common.util.AvroUtils;
|
import org.apache.hudi.common.util.AvroUtils;
|
||||||
|
import org.apache.hudi.common.util.CleanerUtils;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.config.HoodieWriteConfig;
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
import org.apache.hudi.exception.HoodieIOException;
|
import org.apache.hudi.exception.HoodieIOException;
|
||||||
@@ -166,9 +168,10 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
logger.info("cleanerElaspsedTime (Minutes): " + durationInMs.get() / (1000 * 60));
|
logger.info("cleanerElaspsedTime (Minutes): " + durationInMs.get() / (1000 * 60));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HoodieTableMetaClient metaClient = createMetaClient(true);
|
||||||
// Create the metadata and save it
|
// Create the metadata and save it
|
||||||
HoodieCleanMetadata metadata =
|
HoodieCleanMetadata metadata =
|
||||||
AvroUtils.convertCleanMetadata(cleanInstant.getTimestamp(), durationInMs, cleanStats);
|
CleanerUtils.convertCleanMetadata(metaClient, cleanInstant.getTimestamp(), durationInMs, cleanStats);
|
||||||
logger.info("Cleaned " + metadata.getTotalFilesDeleted() + " files. Earliest Retained :" + metadata.getEarliestCommitToRetain());
|
logger.info("Cleaned " + metadata.getTotalFilesDeleted() + " files. Earliest Retained :" + metadata.getEarliestCommitToRetain());
|
||||||
metrics.updateCleanMetrics(durationInMs.orElseGet(() -> -1L), metadata.getTotalFilesDeleted());
|
metrics.updateCleanMetrics(durationInMs.orElseGet(() -> -1L), metadata.getTotalFilesDeleted());
|
||||||
|
|
||||||
|
|||||||
@@ -40,11 +40,13 @@ import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
|||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
||||||
import org.apache.hudi.common.util.AvroUtils;
|
import org.apache.hudi.common.util.AvroUtils;
|
||||||
|
import org.apache.hudi.common.util.CleanerUtils;
|
||||||
import org.apache.hudi.common.util.CompactionUtils;
|
import org.apache.hudi.common.util.CompactionUtils;
|
||||||
import org.apache.hudi.common.util.ConsistencyGuardConfig;
|
import org.apache.hudi.common.util.ConsistencyGuardConfig;
|
||||||
import org.apache.hudi.common.util.FSUtils;
|
import org.apache.hudi.common.util.FSUtils;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.common.util.collection.Pair;
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
|
import org.apache.hudi.common.versioning.clean.CleanMetadataMigrator;
|
||||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||||
import org.apache.hudi.config.HoodieWriteConfig;
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
import org.apache.hudi.index.HoodieIndex;
|
import org.apache.hudi.index.HoodieIndex;
|
||||||
@@ -65,6 +67,7 @@ import java.io.IOException;
|
|||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@@ -75,7 +78,9 @@ import java.util.TreeSet;
|
|||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
import scala.Tuple3;
|
||||||
|
|
||||||
|
import static org.apache.hudi.common.model.HoodieTestUtils.DEFAULT_PARTITION_PATHS;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
@@ -597,6 +602,103 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
file2P0L0, Option.of(2)));
|
file2P0L0, Option.of(2)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpgradeDowngrade() {
|
||||||
|
String commitTime = "000";
|
||||||
|
|
||||||
|
String partition1 = DEFAULT_PARTITION_PATHS[0];
|
||||||
|
String partition2 = DEFAULT_PARTITION_PATHS[1];
|
||||||
|
|
||||||
|
String fileName1 = "data1_1_000.parquet";
|
||||||
|
String fileName2 = "data2_1_000.parquet";
|
||||||
|
|
||||||
|
String filePath1 = metaClient.getBasePath() + "/" + partition1 + "/" + fileName1;
|
||||||
|
String filePath2 = metaClient.getBasePath() + "/" + partition1 + "/" + fileName2;
|
||||||
|
|
||||||
|
List<String> deletePathPatterns1 = Arrays.asList(filePath1, filePath2);
|
||||||
|
List<String> successDeleteFiles1 = Arrays.asList(filePath1);
|
||||||
|
List<String> failedDeleteFiles1 = Arrays.asList(filePath2);
|
||||||
|
|
||||||
|
// create partition1 clean stat.
|
||||||
|
HoodieCleanStat cleanStat1 = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS,
|
||||||
|
partition1, deletePathPatterns1, successDeleteFiles1,
|
||||||
|
failedDeleteFiles1, commitTime);
|
||||||
|
|
||||||
|
List<String> deletePathPatterns2 = new ArrayList<>();
|
||||||
|
List<String> successDeleteFiles2 = new ArrayList<>();
|
||||||
|
List<String> failedDeleteFiles2 = new ArrayList<>();
|
||||||
|
|
||||||
|
// create partition2 empty clean stat.
|
||||||
|
HoodieCleanStat cleanStat2 = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_COMMITS,
|
||||||
|
partition2, deletePathPatterns2, successDeleteFiles2,
|
||||||
|
failedDeleteFiles2, commitTime);
|
||||||
|
|
||||||
|
// map with absolute file path.
|
||||||
|
Map<String, Tuple3> oldExpected = new HashMap<>();
|
||||||
|
oldExpected.put(partition1, new Tuple3<>(deletePathPatterns1, successDeleteFiles1, failedDeleteFiles1));
|
||||||
|
oldExpected.put(partition2, new Tuple3<>(deletePathPatterns2, successDeleteFiles2, failedDeleteFiles2));
|
||||||
|
|
||||||
|
// map with relative path.
|
||||||
|
Map<String, Tuple3> newExpected = new HashMap<>();
|
||||||
|
newExpected.put(partition1, new Tuple3<>(Arrays.asList(fileName1, fileName2), Arrays.asList(fileName1), Arrays.asList(fileName2)));
|
||||||
|
newExpected.put(partition2, new Tuple3<>(deletePathPatterns2, successDeleteFiles2, failedDeleteFiles2));
|
||||||
|
|
||||||
|
HoodieCleanMetadata metadata =
|
||||||
|
CleanerUtils.convertCleanMetadata(metaClient, commitTime, Option.of(0L), Arrays.asList(cleanStat1, cleanStat2));
|
||||||
|
|
||||||
|
Assert.assertEquals(CleanerUtils.LATEST_CLEAN_METADATA_VERSION, metadata.getVersion());
|
||||||
|
testCleanMetadataPathEquality(metadata, newExpected);
|
||||||
|
|
||||||
|
CleanMetadataMigrator migrator = new CleanMetadataMigrator(metaClient);
|
||||||
|
HoodieCleanMetadata oldMetadata =
|
||||||
|
migrator.migrateToVersion(metadata, metadata.getVersion(), CleanerUtils.CLEAN_METADATA_VERSION_1);
|
||||||
|
Assert.assertEquals(CleanerUtils.CLEAN_METADATA_VERSION_1, oldMetadata.getVersion());
|
||||||
|
testCleanMetadataEquality(metadata, oldMetadata);
|
||||||
|
testCleanMetadataPathEquality(oldMetadata, oldExpected);
|
||||||
|
|
||||||
|
HoodieCleanMetadata newMetadata = migrator.upgradeToLatest(oldMetadata, oldMetadata.getVersion());
|
||||||
|
Assert.assertEquals(CleanerUtils.LATEST_CLEAN_METADATA_VERSION, newMetadata.getVersion());
|
||||||
|
testCleanMetadataEquality(oldMetadata, newMetadata);
|
||||||
|
testCleanMetadataPathEquality(newMetadata, newExpected);
|
||||||
|
testCleanMetadataPathEquality(oldMetadata, oldExpected);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCleanMetadataEquality(HoodieCleanMetadata input1, HoodieCleanMetadata input2) {
|
||||||
|
Assert.assertEquals(input1.getEarliestCommitToRetain(), input2.getEarliestCommitToRetain());
|
||||||
|
Assert.assertEquals(input1.getStartCleanTime(), input2.getStartCleanTime());
|
||||||
|
Assert.assertEquals(input1.getTimeTakenInMillis(), input2.getTimeTakenInMillis());
|
||||||
|
Assert.assertEquals(input1.getTotalFilesDeleted(), input2.getTotalFilesDeleted());
|
||||||
|
|
||||||
|
Map<String, HoodieCleanPartitionMetadata> map1 = input1.getPartitionMetadata();
|
||||||
|
Map<String, HoodieCleanPartitionMetadata> map2 = input2.getPartitionMetadata();
|
||||||
|
|
||||||
|
Assert.assertEquals(map1.keySet(), map2.keySet());
|
||||||
|
|
||||||
|
List<String> partitions1 = map1.values().stream().map(m -> m.getPartitionPath()).collect(
|
||||||
|
Collectors.toList());
|
||||||
|
List<String> partitions2 = map2.values().stream().map(m -> m.getPartitionPath()).collect(
|
||||||
|
Collectors.toList());
|
||||||
|
Assert.assertEquals(partitions1, partitions2);
|
||||||
|
|
||||||
|
List<String> policies1 = map1.values().stream().map(m -> m.getPolicy()).collect(Collectors.toList());
|
||||||
|
List<String> policies2 = map2.values().stream().map(m -> m.getPolicy()).collect(Collectors.toList());
|
||||||
|
Assert.assertEquals(policies1, policies2);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testCleanMetadataPathEquality(HoodieCleanMetadata metadata, Map<String, Tuple3> expected) {
|
||||||
|
|
||||||
|
Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = metadata.getPartitionMetadata();
|
||||||
|
|
||||||
|
for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : partitionMetadataMap.entrySet()) {
|
||||||
|
String partitionPath = entry.getKey();
|
||||||
|
HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
|
||||||
|
|
||||||
|
Assert.assertEquals(expected.get(partitionPath)._1(), partitionMetadata.getDeletePathPatterns());
|
||||||
|
Assert.assertEquals(expected.get(partitionPath)._2(), partitionMetadata.getSuccessDeleteFiles());
|
||||||
|
Assert.assertEquals(expected.get(partitionPath)._3(), partitionMetadata.getFailedDeleteFiles());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test HoodieTable.clean() Cleaning by commit logic for MOR table with Log files
|
* Test HoodieTable.clean() Cleaning by commit logic for MOR table with Log files
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -141,13 +141,13 @@ public class TestHoodieCommitArchiveLog extends HoodieClientTestHarness {
|
|||||||
|
|
||||||
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
|
assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
|
||||||
|
|
||||||
HoodieTestUtils.createCleanFiles(basePath, "100", dfs.getConf());
|
HoodieTestUtils.createCleanFiles(metaClient, basePath, "100", dfs.getConf());
|
||||||
HoodieTestUtils.createInflightCleanFiles(basePath, dfs.getConf(), "101");
|
HoodieTestUtils.createInflightCleanFiles(basePath, dfs.getConf(), "101");
|
||||||
HoodieTestUtils.createCleanFiles(basePath, "101", dfs.getConf());
|
HoodieTestUtils.createCleanFiles(metaClient, basePath, "101", dfs.getConf());
|
||||||
HoodieTestUtils.createCleanFiles(basePath, "102", dfs.getConf());
|
HoodieTestUtils.createCleanFiles(metaClient, basePath, "102", dfs.getConf());
|
||||||
HoodieTestUtils.createCleanFiles(basePath, "103", dfs.getConf());
|
HoodieTestUtils.createCleanFiles(metaClient, basePath, "103", dfs.getConf());
|
||||||
HoodieTestUtils.createCleanFiles(basePath, "104", dfs.getConf());
|
HoodieTestUtils.createCleanFiles(metaClient, basePath, "104", dfs.getConf());
|
||||||
HoodieTestUtils.createCleanFiles(basePath, "105", dfs.getConf());
|
HoodieTestUtils.createCleanFiles(metaClient, basePath, "105", dfs.getConf());
|
||||||
HoodieTestUtils.createInflightCleanFiles(basePath, dfs.getConf(), "106", "107");
|
HoodieTestUtils.createInflightCleanFiles(basePath, dfs.getConf(), "106", "107");
|
||||||
|
|
||||||
// reload the timeline and get all the commmits before archive
|
// reload the timeline and get all the commmits before archive
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ import org.apache.hudi.common.table.HoodieTimeline;
|
|||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.util.AvroUtils;
|
import org.apache.hudi.common.util.AvroUtils;
|
||||||
import org.apache.hudi.common.util.CompactionUtils;
|
import org.apache.hudi.common.util.CompactionUtils;
|
||||||
|
import org.apache.hudi.common.util.FSUtils;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.common.util.TimelineDiffHelper;
|
import org.apache.hudi.common.util.TimelineDiffHelper;
|
||||||
import org.apache.hudi.common.util.TimelineDiffHelper.TimelineDiffResult;
|
import org.apache.hudi.common.util.TimelineDiffHelper.TimelineDiffResult;
|
||||||
@@ -261,7 +262,13 @@ public abstract class IncrementalTimelineSyncFileSystemView extends AbstractTabl
|
|||||||
HoodieCleanMetadata cleanMetadata =
|
HoodieCleanMetadata cleanMetadata =
|
||||||
AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(instant).get());
|
AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(instant).get());
|
||||||
cleanMetadata.getPartitionMetadata().entrySet().stream().forEach(entry -> {
|
cleanMetadata.getPartitionMetadata().entrySet().stream().forEach(entry -> {
|
||||||
removeFileSlicesForPartition(timeline, instant, entry.getKey(), entry.getValue().getSuccessDeleteFiles());
|
final String basePath = metaClient.getBasePath();
|
||||||
|
final String partitionPath = entry.getValue().getPartitionPath();
|
||||||
|
List<String> fullPathList = entry.getValue().getSuccessDeleteFiles()
|
||||||
|
.stream().map(fileName -> new Path(FSUtils
|
||||||
|
.getPartitionPath(basePath, partitionPath), fileName).toString())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
removeFileSlicesForPartition(timeline, instant, entry.getKey(), fullPathList);
|
||||||
});
|
});
|
||||||
log.info("Done Syncing cleaner instant (" + instant + ")");
|
log.info("Done Syncing cleaner instant (" + instant + ")");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,7 +19,6 @@
|
|||||||
package org.apache.hudi.common.util;
|
package org.apache.hudi.common.util;
|
||||||
|
|
||||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieCleanPartitionMetadata;
|
|
||||||
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
||||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||||
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
||||||
@@ -27,7 +26,6 @@ import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
|||||||
import org.apache.hudi.avro.model.HoodieRollbackPartitionMetadata;
|
import org.apache.hudi.avro.model.HoodieRollbackPartitionMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
|
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieSavepointPartitionMetadata;
|
import org.apache.hudi.avro.model.HoodieSavepointPartitionMetadata;
|
||||||
import org.apache.hudi.common.HoodieCleanStat;
|
|
||||||
import org.apache.hudi.common.HoodieRollbackStat;
|
import org.apache.hudi.common.HoodieRollbackStat;
|
||||||
|
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
@@ -52,26 +50,6 @@ public class AvroUtils {
|
|||||||
|
|
||||||
private static final Integer DEFAULT_VERSION = 1;
|
private static final Integer DEFAULT_VERSION = 1;
|
||||||
|
|
||||||
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime, Option<Long> durationInMs,
|
|
||||||
List<HoodieCleanStat> cleanStats) {
|
|
||||||
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder = ImmutableMap.builder();
|
|
||||||
int totalDeleted = 0;
|
|
||||||
String earliestCommitToRetain = null;
|
|
||||||
for (HoodieCleanStat stat : cleanStats) {
|
|
||||||
HoodieCleanPartitionMetadata metadata =
|
|
||||||
new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
|
|
||||||
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(), stat.getDeletePathPatterns());
|
|
||||||
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
|
|
||||||
totalDeleted += stat.getSuccessDeleteFiles().size();
|
|
||||||
if (earliestCommitToRetain == null) {
|
|
||||||
// This will be the same for all partitions
|
|
||||||
earliestCommitToRetain = stat.getEarliestCommitToRetain();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L), totalDeleted,
|
|
||||||
earliestCommitToRetain, partitionMetadataBuilder.build(), DEFAULT_VERSION);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static HoodieRestoreMetadata convertRestoreMetadata(String startRestoreTime, Option<Long> durationInMs,
|
public static HoodieRestoreMetadata convertRestoreMetadata(String startRestoreTime, Option<Long> durationInMs,
|
||||||
List<String> commits, Map<String, List<HoodieRollbackStat>> commitToStats) {
|
List<String> commits, Map<String, List<HoodieRollbackStat>> commitToStats) {
|
||||||
ImmutableMap.Builder<String, List<HoodieRollbackMetadata>> commitToStatBuilder = ImmutableMap.builder();
|
ImmutableMap.Builder<String, List<HoodieRollbackMetadata>> commitToStatBuilder = ImmutableMap.builder();
|
||||||
|
|||||||
@@ -0,0 +1,60 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.common.util;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieCleanPartitionMetadata;
|
||||||
|
import org.apache.hudi.common.HoodieCleanStat;
|
||||||
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
|
import org.apache.hudi.common.versioning.clean.CleanMetadataMigrator;
|
||||||
|
import org.apache.hudi.common.versioning.clean.CleanV1MigrationHandler;
|
||||||
|
import org.apache.hudi.common.versioning.clean.CleanV2MigrationHandler;
|
||||||
|
|
||||||
|
public class CleanerUtils {
|
||||||
|
public static final Integer CLEAN_METADATA_VERSION_1 = CleanV1MigrationHandler.VERSION;
|
||||||
|
public static final Integer CLEAN_METADATA_VERSION_2 = CleanV2MigrationHandler.VERSION;
|
||||||
|
public static final Integer LATEST_CLEAN_METADATA_VERSION = CLEAN_METADATA_VERSION_2;
|
||||||
|
|
||||||
|
public static HoodieCleanMetadata convertCleanMetadata(HoodieTableMetaClient metaClient,
|
||||||
|
String startCleanTime, Option<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
|
||||||
|
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder = ImmutableMap.builder();
|
||||||
|
int totalDeleted = 0;
|
||||||
|
String earliestCommitToRetain = null;
|
||||||
|
for (HoodieCleanStat stat : cleanStats) {
|
||||||
|
HoodieCleanPartitionMetadata metadata =
|
||||||
|
new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
|
||||||
|
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
|
||||||
|
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
|
||||||
|
totalDeleted += stat.getSuccessDeleteFiles().size();
|
||||||
|
if (earliestCommitToRetain == null) {
|
||||||
|
// This will be the same for all partitions
|
||||||
|
earliestCommitToRetain = stat.getEarliestCommitToRetain();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HoodieCleanMetadata metadata = new HoodieCleanMetadata(startCleanTime,
|
||||||
|
durationInMs.orElseGet(() -> -1L), totalDeleted, earliestCommitToRetain,
|
||||||
|
partitionMetadataBuilder.build(), CLEAN_METADATA_VERSION_1);
|
||||||
|
|
||||||
|
CleanMetadataMigrator metadataMigrator = new CleanMetadataMigrator(metaClient);
|
||||||
|
return metadataMigrator.upgradeToLatest(metadata, metadata.getVersion());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.common.versioning.clean;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
|
import org.apache.hudi.common.versioning.MetadataMigrator;
|
||||||
|
|
||||||
|
public class CleanMetadataMigrator extends MetadataMigrator<HoodieCleanMetadata> {
|
||||||
|
|
||||||
|
public CleanMetadataMigrator(HoodieTableMetaClient metaClient) {
|
||||||
|
super(metaClient,
|
||||||
|
Arrays
|
||||||
|
.asList(new CleanV1MigrationHandler(metaClient),
|
||||||
|
new CleanV2MigrationHandler(metaClient)));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,102 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.common.versioning.clean;
|
||||||
|
|
||||||
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieCleanPartitionMetadata;
|
||||||
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
|
import org.apache.hudi.common.util.FSUtils;
|
||||||
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
|
import org.apache.hudi.common.versioning.AbstractMigratorBase;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
public class CleanV1MigrationHandler extends AbstractMigratorBase<HoodieCleanMetadata> {
|
||||||
|
|
||||||
|
public static final Integer VERSION = 1;
|
||||||
|
|
||||||
|
public CleanV1MigrationHandler(HoodieTableMetaClient metaClient) {
|
||||||
|
super(metaClient);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer getManagedVersion() {
|
||||||
|
return VERSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HoodieCleanMetadata upgradeFrom(HoodieCleanMetadata input) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"This is the lowest version. Input cannot be any lower version");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HoodieCleanMetadata downgradeFrom(HoodieCleanMetadata input) {
|
||||||
|
Preconditions.checkArgument(input.getVersion() == 2,
|
||||||
|
"Input version is " + input.getVersion() + ". Must be 2");
|
||||||
|
final Path basePath = new Path(metaClient.getBasePath());
|
||||||
|
|
||||||
|
final Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = input
|
||||||
|
.getPartitionMetadata()
|
||||||
|
.entrySet().stream().map(entry -> {
|
||||||
|
final String partitionPath = entry.getKey();
|
||||||
|
final HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
|
||||||
|
|
||||||
|
HoodieCleanPartitionMetadata cleanPartitionMetadata = HoodieCleanPartitionMetadata
|
||||||
|
.newBuilder()
|
||||||
|
.setDeletePathPatterns(partitionMetadata.getDeletePathPatterns().stream()
|
||||||
|
.map(
|
||||||
|
path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.setSuccessDeleteFiles(partitionMetadata.getSuccessDeleteFiles().stream()
|
||||||
|
.map(
|
||||||
|
path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path))
|
||||||
|
.collect(Collectors.toList())).setPartitionPath(partitionPath)
|
||||||
|
.setFailedDeleteFiles(partitionMetadata.getFailedDeleteFiles().stream()
|
||||||
|
.map(
|
||||||
|
path -> convertToV1Path(basePath, partitionMetadata.getPartitionPath(), path))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.setPolicy(partitionMetadata.getPolicy()).setPartitionPath(partitionPath)
|
||||||
|
.build();
|
||||||
|
return Pair.of(partitionPath, cleanPartitionMetadata);
|
||||||
|
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||||
|
|
||||||
|
HoodieCleanMetadata metadata = HoodieCleanMetadata.newBuilder()
|
||||||
|
.setEarliestCommitToRetain(input.getEarliestCommitToRetain())
|
||||||
|
.setStartCleanTime(input.getStartCleanTime())
|
||||||
|
.setTimeTakenInMillis(input.getTimeTakenInMillis())
|
||||||
|
.setTotalFilesDeleted(input.getTotalFilesDeleted())
|
||||||
|
.setPartitionMetadata(partitionMetadataMap)
|
||||||
|
.setVersion(getManagedVersion()).build();
|
||||||
|
|
||||||
|
return metadata;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String convertToV1Path(Path basePath, String partitionPath, String fileName) {
|
||||||
|
if ((fileName == null) || (fileName.isEmpty())) {
|
||||||
|
return fileName;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Path(FSUtils.getPartitionPath(basePath, partitionPath), fileName).toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,98 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.common.versioning.clean;
|
||||||
|
|
||||||
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieCleanPartitionMetadata;
|
||||||
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
|
import org.apache.hudi.common.versioning.AbstractMigratorBase;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
public class CleanV2MigrationHandler extends AbstractMigratorBase<HoodieCleanMetadata> {
|
||||||
|
|
||||||
|
public static final Integer VERSION = 2;
|
||||||
|
|
||||||
|
public CleanV2MigrationHandler(HoodieTableMetaClient metaClient) {
|
||||||
|
super(metaClient);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer getManagedVersion() {
|
||||||
|
return VERSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HoodieCleanMetadata upgradeFrom(HoodieCleanMetadata input) {
|
||||||
|
Preconditions.checkArgument(input.getVersion() == 1,
|
||||||
|
"Input version is " + input.getVersion() + ". Must be 1");
|
||||||
|
HoodieCleanMetadata metadata = new HoodieCleanMetadata();
|
||||||
|
metadata.setEarliestCommitToRetain(input.getEarliestCommitToRetain());
|
||||||
|
metadata.setTimeTakenInMillis(input.getTimeTakenInMillis());
|
||||||
|
metadata.setStartCleanTime(input.getStartCleanTime());
|
||||||
|
metadata.setTotalFilesDeleted(input.getTotalFilesDeleted());
|
||||||
|
metadata.setVersion(getManagedVersion());
|
||||||
|
|
||||||
|
Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = input.getPartitionMetadata()
|
||||||
|
.entrySet()
|
||||||
|
.stream().map(entry -> {
|
||||||
|
final String partitionPath = entry.getKey();
|
||||||
|
final HoodieCleanPartitionMetadata partitionMetadata = entry.getValue();
|
||||||
|
|
||||||
|
final List<String> deletePathPatterns = convertToV2Path(
|
||||||
|
partitionMetadata.getDeletePathPatterns());
|
||||||
|
final List<String> successDeleteFiles = convertToV2Path(
|
||||||
|
partitionMetadata.getSuccessDeleteFiles());
|
||||||
|
final List<String> failedDeleteFiles = convertToV2Path(
|
||||||
|
partitionMetadata.getFailedDeleteFiles());
|
||||||
|
|
||||||
|
final HoodieCleanPartitionMetadata cleanPartitionMetadata = HoodieCleanPartitionMetadata
|
||||||
|
.newBuilder().setPolicy(partitionMetadata.getPolicy())
|
||||||
|
.setPartitionPath(partitionMetadata.getPartitionPath())
|
||||||
|
.setDeletePathPatterns(deletePathPatterns)
|
||||||
|
.setSuccessDeleteFiles(successDeleteFiles)
|
||||||
|
.setFailedDeleteFiles(failedDeleteFiles).build();
|
||||||
|
|
||||||
|
return Pair.of(partitionPath, cleanPartitionMetadata);
|
||||||
|
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||||
|
|
||||||
|
return HoodieCleanMetadata.newBuilder()
|
||||||
|
.setEarliestCommitToRetain(input.getEarliestCommitToRetain())
|
||||||
|
.setStartCleanTime(input.getStartCleanTime())
|
||||||
|
.setTimeTakenInMillis(input.getTimeTakenInMillis())
|
||||||
|
.setTotalFilesDeleted(input.getTotalFilesDeleted())
|
||||||
|
.setPartitionMetadata(partitionMetadataMap).setVersion(getManagedVersion()).build();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HoodieCleanMetadata downgradeFrom(HoodieCleanMetadata input) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"This is the current highest version. Input cannot be any higher version");
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> convertToV2Path(List<String> paths) {
|
||||||
|
return paths.stream().map(path -> new Path(path).getName())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -33,6 +33,7 @@ import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
|||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
||||||
import org.apache.hudi.common.util.AvroUtils;
|
import org.apache.hudi.common.util.AvroUtils;
|
||||||
|
import org.apache.hudi.common.util.CleanerUtils;
|
||||||
import org.apache.hudi.common.util.CompactionUtils;
|
import org.apache.hudi.common.util.CompactionUtils;
|
||||||
import org.apache.hudi.common.util.FSUtils;
|
import org.apache.hudi.common.util.FSUtils;
|
||||||
import org.apache.hudi.common.util.HoodieAvroUtils;
|
import org.apache.hudi.common.util.HoodieAvroUtils;
|
||||||
@@ -269,7 +270,8 @@ public class HoodieTestUtils {
|
|||||||
.exists();
|
.exists();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void createCleanFiles(String basePath, String commitTime, Configuration configuration)
|
public static void createCleanFiles(HoodieTableMetaClient metaClient, String basePath,
|
||||||
|
String commitTime, Configuration configuration)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Path commitFile = new Path(
|
Path commitFile = new Path(
|
||||||
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCleanerFileName(commitTime));
|
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCleanerFileName(commitTime));
|
||||||
@@ -280,8 +282,9 @@ public class HoodieTestUtils {
|
|||||||
DEFAULT_PARTITION_PATHS[rand.nextInt(DEFAULT_PARTITION_PATHS.length)], new ArrayList<>(), new ArrayList<>(),
|
DEFAULT_PARTITION_PATHS[rand.nextInt(DEFAULT_PARTITION_PATHS.length)], new ArrayList<>(), new ArrayList<>(),
|
||||||
new ArrayList<>(), commitTime);
|
new ArrayList<>(), commitTime);
|
||||||
// Create the clean metadata
|
// Create the clean metadata
|
||||||
|
|
||||||
HoodieCleanMetadata cleanMetadata =
|
HoodieCleanMetadata cleanMetadata =
|
||||||
AvroUtils.convertCleanMetadata(commitTime, Option.of(0L), Arrays.asList(cleanStats));
|
CleanerUtils.convertCleanMetadata(metaClient, commitTime, Option.of(0L), Arrays.asList(cleanStats));
|
||||||
// Write empty clean metadata
|
// Write empty clean metadata
|
||||||
os.write(AvroUtils.serializeCleanMetadata(cleanMetadata).get());
|
os.write(AvroUtils.serializeCleanMetadata(cleanMetadata).get());
|
||||||
} finally {
|
} finally {
|
||||||
@@ -289,8 +292,9 @@ public class HoodieTestUtils {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void createCleanFiles(String basePath, String commitTime) throws IOException {
|
public static void createCleanFiles(HoodieTableMetaClient metaClient,
|
||||||
createCleanFiles(basePath, commitTime, HoodieTestUtils.getDefaultHadoopConf());
|
String basePath, String commitTime) throws IOException {
|
||||||
|
createCleanFiles(metaClient, basePath, commitTime, HoodieTestUtils.getDefaultHadoopConf());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String makeTestFileName(String instant) {
|
public static String makeTestFileName(String instant) {
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ import org.apache.hudi.common.table.SyncableFileSystemView;
|
|||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
||||||
import org.apache.hudi.common.util.AvroUtils;
|
import org.apache.hudi.common.util.AvroUtils;
|
||||||
|
import org.apache.hudi.common.util.CleanerUtils;
|
||||||
import org.apache.hudi.common.util.CompactionUtils;
|
import org.apache.hudi.common.util.CompactionUtils;
|
||||||
import org.apache.hudi.common.util.FSUtils;
|
import org.apache.hudi.common.util.FSUtils;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
@@ -417,7 +418,8 @@ public class TestIncrementalFSViewSync extends HoodieCommonTestHarness {
|
|||||||
new ArrayList<>(), Integer.toString(Integer.parseInt(instant) + 1));
|
new ArrayList<>(), Integer.toString(Integer.parseInt(instant) + 1));
|
||||||
}).collect(Collectors.toList());
|
}).collect(Collectors.toList());
|
||||||
|
|
||||||
HoodieCleanMetadata cleanMetadata = AvroUtils.convertCleanMetadata(cleanInstant, Option.empty(), cleanStats);
|
HoodieCleanMetadata cleanMetadata = CleanerUtils
|
||||||
|
.convertCleanMetadata(metaClient, cleanInstant, Option.empty(), cleanStats);
|
||||||
metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.CLEAN_ACTION, cleanInstant),
|
metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.CLEAN_ACTION, cleanInstant),
|
||||||
AvroUtils.serializeCleanMetadata(cleanMetadata));
|
AvroUtils.serializeCleanMetadata(cleanMetadata));
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user