1
0

[HUDI-159] Redesigning bundles for lighter-weight integrations

- Documented principles applied for redesign at packaging/README.md
 - No longer depends on incl commons-codec, commons-io, commons-pool, commons-dbcp, commons-lang, commons-logging, avro-mapred
 - Introduce new FileIOUtils & added checkstyle rule for illegal import of above
 - Parquet, Avro dependencies moved to provided scope to enable being picked up from Hive/Spark/Presto instead
 - Pickup jackson jars for Hive sync tool from HIVE_HOME & unbundling jackson everywhere
 - Remove hive-jdbc standalone jar from being bundled in Spark/Hive/Utilities bundles
 - 6.5x reduced number of classes across bundles
This commit is contained in:
vinoth chandar
2019-09-02 16:15:55 -07:00
committed by Balaji Varadarajan
parent 0e6f078ec4
commit 7a973a6944
60 changed files with 689 additions and 1380 deletions

View File

@@ -18,13 +18,14 @@
package org.apache.hudi.io.compact.strategy;
import com.google.common.annotations.VisibleForTesting;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Comparator;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.hudi.avro.model.HoodieCompactionOperation;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.config.HoodieWriteConfig;
@@ -45,8 +46,8 @@ public class BoundedPartitionAwareCompactionStrategy extends DayBasedCompactionS
public List<HoodieCompactionOperation> orderAndFilter(HoodieWriteConfig writeConfig,
List<HoodieCompactionOperation> operations, List<HoodieCompactionPlan> pendingCompactionPlans) {
// The earliest partition to compact - current day minus the target partitions limit
String earliestPartitionPathToCompact = dateFormat.format(DateUtils.addDays(new Date(), -1 * writeConfig
.getTargetPartitionsPerDayBasedCompaction()));
String earliestPartitionPathToCompact = dateFormat.format(
getDateAtOffsetFromToday(-1 * writeConfig.getTargetPartitionsPerDayBasedCompaction()));
// Filter out all partitions greater than earliestPartitionPathToCompact
List<HoodieCompactionOperation> eligibleCompactionOperations = operations.stream()
.collect(Collectors.groupingBy(HoodieCompactionOperation::getPartitionPath)).entrySet().stream()
@@ -61,8 +62,8 @@ public class BoundedPartitionAwareCompactionStrategy extends DayBasedCompactionS
@Override
public List<String> filterPartitionPaths(HoodieWriteConfig writeConfig, List<String> partitionPaths) {
// The earliest partition to compact - current day minus the target partitions limit
String earliestPartitionPathToCompact = dateFormat.format(DateUtils.addDays(new Date(), -1 * writeConfig
.getTargetPartitionsPerDayBasedCompaction()));
String earliestPartitionPathToCompact = dateFormat.format(
getDateAtOffsetFromToday(-1 * writeConfig.getTargetPartitionsPerDayBasedCompaction()));
// Get all partitions and sort them
List<String> filteredPartitionPaths = partitionPaths.stream().map(partition -> partition.replace("/", "-"))
.sorted(Comparator.reverseOrder()).map(partitionPath -> partitionPath.replace("-", "/"))
@@ -70,4 +71,11 @@ public class BoundedPartitionAwareCompactionStrategy extends DayBasedCompactionS
.collect(Collectors.toList());
return filteredPartitionPaths;
}
@VisibleForTesting
public static Date getDateAtOffsetFromToday(int offset) {
Calendar calendar = Calendar.getInstance();
calendar.add(Calendar.DATE, offset);
return calendar.getTime();
}
}

View File

@@ -22,7 +22,6 @@ import com.codahale.metrics.Gauge;
import com.codahale.metrics.MetricRegistry;
import com.google.common.io.Closeables;
import java.io.Closeable;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.log4j.LogManager;
@@ -37,9 +36,9 @@ public class Metrics {
private static volatile boolean initialized = false;
private static Metrics metrics = null;
private final MetricRegistry registry;
private MetricsReporter reporter = null;
private MetricsReporter reporter;
private Metrics(HoodieWriteConfig metricConfig) throws ConfigurationException {
private Metrics(HoodieWriteConfig metricConfig) {
registry = new MetricRegistry();
reporter = MetricsReporterFactory.createReporter(metricConfig, registry);
@@ -72,7 +71,7 @@ public class Metrics {
}
try {
metrics = new Metrics(metricConfig);
} catch (ConfigurationException e) {
} catch (Exception e) {
throw new HoodieException(e);
}
initialized = true;