From 244d47494e2d4d5b3ca60e460e1feb9351fb8e69 Mon Sep 17 00:00:00 2001 From: rolandjohann Date: Wed, 20 May 2020 13:22:35 +0200 Subject: [PATCH] [HUDI-888] fix NullPointerException in HoodieCompactor (#1622) --- .../apache/hudi/cli/commands/SparkMain.java | 2 +- .../hudi/utilities/HoodieCompactor.java | 25 +++++++++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java index a8c2e72a1..b2871e41a 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java @@ -270,7 +270,7 @@ public class SparkMain { cfg.propsFilePath = propsFilePath; cfg.configs = configs; jsc.getConf().set("spark.executor.memory", sparkMemory); - return new HoodieCompactor(cfg).compact(jsc, retry); + return new HoodieCompactor(jsc, cfg).compact(retry); } private static int deduplicatePartitionPath(JavaSparkContext jsc, String duplicatedPartitionPath, diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java index 10be77b2f..88db12dcb 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java @@ -43,11 +43,22 @@ public class HoodieCompactor { private final Config cfg; private transient FileSystem fs; private TypedProperties props; + private final JavaSparkContext jsc; - public HoodieCompactor(Config cfg) { + public HoodieCompactor(JavaSparkContext jsc, Config cfg) { this.cfg = cfg; - this.props = cfg.propsFilePath == null ? UtilHelpers.buildProperties(cfg.configs) - : UtilHelpers.readConfig(fs, new Path(cfg.propsFilePath), cfg.configs).getConfig(); + this.jsc = jsc; + this.props = cfg.propsFilePath == null + ? UtilHelpers.buildProperties(cfg.configs) + : readConfigFromFileSystem(jsc, cfg); + } + + private TypedProperties readConfigFromFileSystem(JavaSparkContext jsc, Config cfg) { + final FileSystem fs = FSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration()); + + return UtilHelpers + .readConfig(fs, new Path(cfg.propsFilePath), cfg.configs) + .getConfig(); } public static class Config implements Serializable { @@ -90,12 +101,12 @@ public class HoodieCompactor { cmd.usage(); System.exit(1); } - HoodieCompactor compactor = new HoodieCompactor(cfg); - compactor.compact(UtilHelpers.buildSparkContext("compactor-" + cfg.tableName, cfg.sparkMaster, cfg.sparkMemory), - cfg.retry); + final JavaSparkContext jsc = UtilHelpers.buildSparkContext("compactor-" + cfg.tableName, cfg.sparkMaster, cfg.sparkMemory); + HoodieCompactor compactor = new HoodieCompactor(jsc, cfg); + compactor.compact(cfg.retry); } - public int compact(JavaSparkContext jsc, int retry) { + public int compact(int retry) { this.fs = FSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration()); int ret = -1; try {