1
0

Changing Day based compaction strategy to be IO agnostic

This commit is contained in:
Nishith Agarwal
2018-05-23 14:26:11 -07:00
committed by vinoth chandar
parent 3da063f83b
commit a6fe96fdfe
4 changed files with 55 additions and 13 deletions

View File

@@ -104,6 +104,10 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "24";
private static final String DEFAULT_MAX_COMMITS_TO_KEEP = String.valueOf(128);
private static final String DEFAULT_MIN_COMMITS_TO_KEEP = String.valueOf(96);
public static final String TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP = "hoodie.compaction.daybased.target"
+ ".partitions";
// 500GB of target IO per compaction (both read and write)
public static final String DEFAULT_TARGET_PARTITIONS_PER_DAYBASED_COMPACTION = String.valueOf(10);
private HoodieCompactionConfig(Properties props) {
super(props);
@@ -230,6 +234,12 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
return this;
}
public Builder withTargetPartitionsPerDayBasedCompaction(int targetPartitionsPerCompaction) {
props.setProperty(TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP,
String.valueOf(targetPartitionsPerCompaction));
return this;
}
public HoodieCompactionConfig build() {
HoodieCompactionConfig config = new HoodieCompactionConfig(props);
setDefaultOnCondition(props, !props.containsKey(AUTO_CLEAN_PROP), AUTO_CLEAN_PROP,
@@ -269,6 +279,8 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP, DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED);
setDefaultOnCondition(props, !props.containsKey(COMPACTION_REVERSE_LOG_READ_ENABLED_PROP),
COMPACTION_REVERSE_LOG_READ_ENABLED_PROP, DEFAULT_COMPACTION_REVERSE_LOG_READ_ENABLED);
setDefaultOnCondition(props, !props.containsKey(TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP),
TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP, DEFAULT_TARGET_PARTITIONS_PER_DAYBASED_COMPACTION);
HoodieCleaningPolicy.valueOf(props.getProperty(CLEANER_POLICY_PROP));
Preconditions.checkArgument(Integer.parseInt(props.getProperty(MAX_COMMITS_TO_KEEP)) > Integer

View File

@@ -237,6 +237,11 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
return props.getProperty(HoodieCompactionConfig.PAYLOAD_CLASS_PROP);
}
public int getTargetPartitionsPerDayBasedCompaction() {
return Integer
.parseInt(props.getProperty(HoodieCompactionConfig.TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP));
}
/**
* index properties
**/

View File

@@ -17,6 +17,7 @@
package com.uber.hoodie.io.compact.strategy;
import com.google.common.annotations.VisibleForTesting;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.exception.HoodieException;
import com.uber.hoodie.io.compact.CompactionOperation;
@@ -26,39 +27,46 @@ import java.util.Comparator;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;
/**
* This strategy orders compactions in reverse order of creation of Hive Partitions. It helps to
* compact data in latest partitions first and then older capped at the Total_IO allowed.
*/
public class DayBasedCompactionStrategy extends BoundedIOCompactionStrategy {
public class DayBasedCompactionStrategy extends CompactionStrategy {
// For now, use SimpleDateFormat as default partition format
private static String datePartitionFormat = "yyyy/MM/dd";
// Sorts compaction in LastInFirstCompacted order
private static Comparator<CompactionOperation> comparator = (CompactionOperation leftC,
CompactionOperation rightC) -> {
private static Comparator<String> comparator = (String leftPartition,
String rightPartition) -> {
try {
Date left = new SimpleDateFormat(datePartitionFormat, Locale.ENGLISH)
.parse(leftC.getPartitionPath());
.parse(leftPartition);
Date right = new SimpleDateFormat(datePartitionFormat, Locale.ENGLISH)
.parse(rightC.getPartitionPath());
.parse(rightPartition);
return left.after(right) ? -1 : right.after(left) ? 1 : 0;
} catch (ParseException e) {
throw new HoodieException("Invalid Partition Date Format", e);
}
};
public Comparator<CompactionOperation> getComparator() {
@VisibleForTesting
public Comparator<String> getComparator() {
return comparator;
}
@Override
public List<CompactionOperation> orderAndFilter(HoodieWriteConfig writeConfig,
List<CompactionOperation> operations) {
// Iterate through the operations and accept operations as long as we are within the IO limit
return super.orderAndFilter(writeConfig,
operations.stream().sorted(comparator).collect(Collectors.toList()));
// Iterate through the operations and accept operations as long as we are within the configured target partitions
// limit
List<CompactionOperation> filteredList = operations.stream()
.collect(Collectors.groupingBy(CompactionOperation::getPartitionPath)).entrySet().stream()
.sorted(Map.Entry.comparingByKey(comparator)).limit(writeConfig.getTargetPartitionsPerDayBasedCompaction())
.flatMap(e -> e.getValue().stream())
.collect(Collectors.toList());
return filteredList;
}
}