1
0

Simplify and fix CLI to schedule and run compactions

This commit is contained in:
Balaji Varadarajan
2018-09-06 12:02:09 -07:00
committed by vinoth chandar
parent fad4b513ea
commit e2dee68ccd
4 changed files with 27 additions and 40 deletions

View File

@@ -34,7 +34,7 @@ import org.apache.spark.api.java.JavaSparkContext;
public class HoodieCompactor {
private static volatile Logger logger = LogManager.getLogger(HDFSParquetImporter.class);
private static volatile Logger logger = LogManager.getLogger(HoodieCompactor.class);
private final Config cfg;
private transient FileSystem fs;
@@ -51,12 +51,6 @@ public class HoodieCompactor {
@Parameter(names = {"--instant-time",
"-sp"}, description = "Compaction Instant time", required = true)
public String compactionInstantTime = null;
@Parameter(names = {"--row-key-field",
"-rk"}, description = "Row key field name", required = true)
public String rowKey = null;
@Parameter(names = {"--partition-key-field",
"-pk"}, description = "Partition key field name", required = true)
public String partitionKey = null;
@Parameter(names = {"--parallelism",
"-pl"}, description = "Parallelism for hoodie insert", required = true)
public int parallelism = 1;
@@ -120,8 +114,7 @@ public class HoodieCompactor {
private int doSchedule(JavaSparkContext jsc) throws Exception {
//Get schema.
String schemaStr = UtilHelpers.parseSchema(fs, cfg.schemaFile);
HoodieWriteClient client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism,
HoodieWriteClient client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, "", cfg.parallelism,
Optional.of(cfg.strategyClassName));
client.scheduleCompactionAtInstant(cfg.compactionInstantTime, Optional.empty());
return 0;