1
0

[HUDI-699] Fix CompactionCommand and add unit test for CompactionCommand (#2325)

This commit is contained in:
hongdd
2021-04-08 15:35:33 +08:00
committed by GitHub
parent 18459d4045
commit ecdbd2517f
11 changed files with 725 additions and 84 deletions

View File

@@ -143,4 +143,25 @@ public class HoodieTableHeaderFields {
public static final String HEADER_TOTAL_ROLLBACK_BLOCKS = "Total Rollback Blocks";
public static final String HEADER_TOTAL_LOG_RECORDS = "Total Log Records";
public static final String HEADER_TOTAL_UPDATED_RECORDS_COMPACTED = "Total Updated Records Compacted";
/**
* Fields of Compaction.
*/
public static final String HEADER_INSTANT_BLANK_TIME = "Instant Time";
public static final String HEADER_FILE_PATH = "File Path";
public static final String HEADER_COMPACTION_INSTANT_TIME = "Compaction " + HEADER_INSTANT_BLANK_TIME;
public static final String HEADER_STATE = "State";
public static final String HEADER_TOTAL_FILES_TO_BE_COMPACTED = "Total FileIds to be Compacted";
public static final String HEADER_EXTRA_METADATA = "Extra Metadata";
public static final String HEADER_DATA_FILE_PATH = "Data " + HEADER_FILE_PATH;
public static final String HEADER_TOTAL_DELTA_FILES = "Total " + HEADER_DELTA_FILES;
public static final String HEADER_METRICS = "getMetrics";
public static final String HEADER_BASE_INSTANT_TIME = "Base " + HEADER_INSTANT_BLANK_TIME;
public static final String HEADER_BASE_DATA_FILE = "Base Data File";
public static final String HEADER_VALID = "Valid";
public static final String HEADER_ERROR = "Error";
public static final String HEADER_SOURCE_FILE_PATH = "Source " + HEADER_FILE_PATH;
public static final String HEADER_DESTINATION_FILE_PATH = "Destination " + HEADER_FILE_PATH;
public static final String HEADER_RENAME_EXECUTED = "Rename Executed?";
public static final String HEADER_RENAME_SUCCEEDED = "Rename Succeeded?";
}

View File

@@ -22,6 +22,7 @@ import org.apache.hudi.avro.model.HoodieCompactionOperation;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.HoodiePrintHelper;
import org.apache.hudi.cli.HoodieTableHeaderFields;
import org.apache.hudi.cli.TableHeader;
import org.apache.hudi.cli.commands.SparkMain.SparkCommand;
import org.apache.hudi.cli.utils.CommitUtil;
@@ -97,8 +98,7 @@ public class CompactionCommand implements CommandMarker {
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
@CliOption(key = {"headeronly"}, help = "Print Header Only",
unspecifiedDefaultValue = "false") final boolean headerOnly)
throws IOException {
unspecifiedDefaultValue = "false") final boolean headerOnly) {
HoodieTableMetaClient client = checkAndGetMetaClient();
HoodieActiveTimeline activeTimeline = client.getActiveTimeline();
return printAllCompactions(activeTimeline,
@@ -139,8 +139,7 @@ public class CompactionCommand implements CommandMarker {
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
@CliOption(key = {"headeronly"}, help = "Print Header Only",
unspecifiedDefaultValue = "false") final boolean headerOnly)
throws Exception {
unspecifiedDefaultValue = "false") final boolean headerOnly) {
if (StringUtils.isNullOrEmpty(startTs)) {
startTs = CommitUtil.getTimeDaysAgo(10);
}
@@ -150,7 +149,7 @@ public class CompactionCommand implements CommandMarker {
HoodieTableMetaClient client = checkAndGetMetaClient();
HoodieArchivedTimeline archivedTimeline = client.getArchivedTimeline();
archivedTimeline.loadInstantDetailsInMemory(startTs, endTs);
archivedTimeline.loadCompactionDetailsInMemory(startTs, endTs);
try {
return printAllCompactions(archivedTimeline,
compactionPlanReader(this::readCompactionPlanForArchivedTimeline, archivedTimeline),
@@ -175,25 +174,25 @@ public class CompactionCommand implements CommandMarker {
HoodieArchivedTimeline archivedTimeline = client.getArchivedTimeline();
HoodieInstant instant = new HoodieInstant(HoodieInstant.State.COMPLETED,
HoodieTimeline.COMPACTION_ACTION, compactionInstantTime);
String startTs = CommitUtil.addHours(compactionInstantTime, -1);
String endTs = CommitUtil.addHours(compactionInstantTime, 1);
try {
archivedTimeline.loadInstantDetailsInMemory(startTs, endTs);
HoodieCompactionPlan compactionPlan = TimelineMetadataUtils.deserializeCompactionPlan(
archivedTimeline.getInstantDetails(instant).get());
archivedTimeline.loadCompactionDetailsInMemory(compactionInstantTime);
HoodieCompactionPlan compactionPlan = TimelineMetadataUtils.deserializeAvroRecordMetadata(
archivedTimeline.getInstantDetails(instant).get(), HoodieCompactionPlan.getClassSchema());
return printCompaction(compactionPlan, sortByField, descending, limit, headerOnly);
} finally {
archivedTimeline.clearInstantDetailsFromMemory(startTs, endTs);
archivedTimeline.clearInstantDetailsFromMemory(compactionInstantTime);
}
}
@CliCommand(value = "compaction schedule", help = "Schedule Compaction")
public String scheduleCompact(@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "1G",
help = "Spark executor memory") final String sparkMemory,
@CliOption(key = "propsFilePath", help = "path to properties file on localfs or dfs with configurations for hoodie client for compacting",
unspecifiedDefaultValue = "") final String propsFilePath,
@CliOption(key = "hoodieConfigs", help = "Any configuration that can be set in the properties file can be passed here in the form of an array",
unspecifiedDefaultValue = "") final String[] configs) throws Exception {
@CliOption(key = "propsFilePath", help = "path to properties file on localfs or dfs with configurations for hoodie client for compacting",
unspecifiedDefaultValue = "") final String propsFilePath,
@CliOption(key = "hoodieConfigs", help = "Any configuration that can be set in the properties file can be passed here in the form of an array",
unspecifiedDefaultValue = "") final String[] configs,
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master)
throws Exception {
HoodieTableMetaClient client = checkAndGetMetaClient();
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
@@ -204,8 +203,9 @@ public class CompactionCommand implements CommandMarker {
String sparkPropertiesPath =
Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkCommand.COMPACT_SCHEDULE.toString(), client.getBasePath(),
client.getTableConfig().getTableName(), compactionInstantTime, sparkMemory, propsFilePath);
String cmd = SparkCommand.COMPACT_SCHEDULE.toString();
sparkLauncher.addAppArgs(cmd, master, sparkMemory, client.getBasePath(),
client.getTableConfig().getTableName(), compactionInstantTime, propsFilePath);
UtilHelpers.validateAndAddProperties(configs, sparkLauncher);
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
@@ -222,6 +222,8 @@ public class CompactionCommand implements CommandMarker {
help = "Parallelism for hoodie compaction") final String parallelism,
@CliOption(key = "schemaFilePath", mandatory = true,
help = "Path for Avro schema file") final String schemaFilePath,
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local",
help = "Spark Master") String master,
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",
help = "Spark executor memory") final String sparkMemory,
@CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries") final String retry,
@@ -249,9 +251,9 @@ public class CompactionCommand implements CommandMarker {
String sparkPropertiesPath =
Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkCommand.COMPACT_RUN.toString(), client.getBasePath(),
sparkLauncher.addAppArgs(SparkCommand.COMPACT_RUN.toString(), master, sparkMemory, client.getBasePath(),
client.getTableConfig().getTableName(), compactionInstantTime, parallelism, schemaFilePath,
sparkMemory, retry, propsFilePath);
retry, propsFilePath);
UtilHelpers.validateAndAddProperties(configs, sparkLauncher);
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
@@ -279,15 +281,15 @@ public class CompactionCommand implements CommandMarker {
.filter(pair -> pair.getRight() != null)
.collect(Collectors.toList());
Set<HoodieInstant> committedInstants = timeline.getCommitTimeline().filterCompletedInstants()
.getInstants().collect(Collectors.toSet());
Set<String> committedInstants = timeline.getCommitTimeline().filterCompletedInstants()
.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
List<Comparable[]> rows = new ArrayList<>();
for (Pair<HoodieInstant, HoodieCompactionPlan> compactionPlan : compactionPlans) {
HoodieCompactionPlan plan = compactionPlan.getRight();
HoodieInstant instant = compactionPlan.getLeft();
final HoodieInstant.State state;
if (committedInstants.contains(instant)) {
if (committedInstants.contains(instant.getTimestamp())) {
state = HoodieInstant.State.COMPLETED;
} else {
state = instant.getState();
@@ -304,10 +306,12 @@ public class CompactionCommand implements CommandMarker {
}
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
TableHeader header = new TableHeader().addTableHeaderField("Compaction Instant Time").addTableHeaderField("State")
.addTableHeaderField("Total FileIds to be Compacted");
TableHeader header = new TableHeader()
.addTableHeaderField(HoodieTableHeaderFields.HEADER_COMPACTION_INSTANT_TIME)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_STATE)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_TO_BE_COMPACTED);
if (includeExtraMetadata) {
header = header.addTableHeaderField("Extra Metadata");
header = header.addTableHeaderField(HoodieTableHeaderFields.HEADER_EXTRA_METADATA);
}
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
}
@@ -326,14 +330,17 @@ public class CompactionCommand implements CommandMarker {
private HoodieCompactionPlan readCompactionPlanForArchivedTimeline(HoodieArchivedTimeline archivedTimeline,
HoodieInstant instant) {
if (!HoodieTimeline.COMPACTION_ACTION.equals(instant.getAction())) {
return null;
} else {
// filter inflight compaction
if (HoodieTimeline.COMPACTION_ACTION.equals(instant.getAction())
&& HoodieInstant.State.INFLIGHT.equals(instant.getState())) {
try {
return TimelineMetadataUtils.deserializeCompactionPlan(archivedTimeline.getInstantDetails(instant).get());
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
return TimelineMetadataUtils.deserializeAvroRecordMetadata(archivedTimeline.getInstantDetails(instant).get(),
HoodieCompactionPlan.getClassSchema());
} catch (Exception e) {
throw new HoodieException(e.getMessage(), e);
}
} else {
return null;
}
}
@@ -362,7 +369,7 @@ public class CompactionCommand implements CommandMarker {
}
}
private String printCompaction(HoodieCompactionPlan compactionPlan,
protected String printCompaction(HoodieCompactionPlan compactionPlan,
String sortByField,
boolean descending,
int limit,
@@ -376,9 +383,13 @@ public class CompactionCommand implements CommandMarker {
}
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
TableHeader header = new TableHeader().addTableHeaderField("Partition Path").addTableHeaderField("File Id")
.addTableHeaderField("Base Instant").addTableHeaderField("Data File Path")
.addTableHeaderField("Total Delta Files").addTableHeaderField("getMetrics");
TableHeader header = new TableHeader()
.addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE_PATH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_FILES)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_METRICS);
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
}
@@ -404,7 +415,7 @@ public class CompactionCommand implements CommandMarker {
public String validateCompaction(
@CliOption(key = "instant", mandatory = true, help = "Compaction Instant") String compactionInstant,
@CliOption(key = {"parallelism"}, unspecifiedDefaultValue = "3", help = "Parallelism") String parallelism,
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master ") String master,
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master,
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory,
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit,
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField,
@@ -444,9 +455,13 @@ public class CompactionCommand implements CommandMarker {
});
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
TableHeader header = new TableHeader().addTableHeaderField("File Id").addTableHeaderField("Base Instant Time")
.addTableHeaderField("Base Data File").addTableHeaderField("Num Delta Files").addTableHeaderField("Valid")
.addTableHeaderField("Error");
TableHeader header = new TableHeader()
.addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT_TIME)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_DATA_FILE)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_NUM_DELTA_FILES)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_VALID)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_ERROR);
output = message + HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit,
headerOnly, rows);
@@ -463,7 +478,7 @@ public class CompactionCommand implements CommandMarker {
public String unscheduleCompaction(
@CliOption(key = "instant", mandatory = true, help = "Compaction Instant") String compactionInstant,
@CliOption(key = {"parallelism"}, unspecifiedDefaultValue = "3", help = "Parallelism") String parallelism,
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master ") String master,
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master,
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory,
@CliOption(key = {"skipValidation"}, help = "skip validation", unspecifiedDefaultValue = "false") boolean skipV,
@CliOption(key = {"dryRun"}, help = "Dry Run Mode", unspecifiedDefaultValue = "false") boolean dryRun,
@@ -508,7 +523,8 @@ public class CompactionCommand implements CommandMarker {
@CliCommand(value = "compaction unscheduleFileId", help = "UnSchedule Compaction for a fileId")
public String unscheduleCompactFile(
@CliOption(key = "fileId", mandatory = true, help = "File Id") final String fileId,
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master ") String master,
@CliOption(key = "partitionPath", mandatory = true, help = "partition path") final String partitionPath,
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master,
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory,
@CliOption(key = {"skipValidation"}, help = "skip validation", unspecifiedDefaultValue = "false") boolean skipV,
@CliOption(key = {"dryRun"}, help = "Dry Run Mode", unspecifiedDefaultValue = "false") boolean dryRun,
@@ -529,7 +545,7 @@ public class CompactionCommand implements CommandMarker {
.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
sparkLauncher.addAppArgs(SparkCommand.COMPACT_UNSCHEDULE_FILE.toString(), master, sparkMemory, client.getBasePath(),
fileId, outputPathStr, "1", Boolean.valueOf(skipV).toString(),
fileId, partitionPath, outputPathStr, "1", Boolean.valueOf(skipV).toString(),
Boolean.valueOf(dryRun).toString());
Process process = sparkLauncher.launch();
InputStreamConsumer.captureOutput(process);
@@ -554,7 +570,7 @@ public class CompactionCommand implements CommandMarker {
public String repairCompaction(
@CliOption(key = "instant", mandatory = true, help = "Compaction Instant") String compactionInstant,
@CliOption(key = {"parallelism"}, unspecifiedDefaultValue = "3", help = "Parallelism") String parallelism,
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master ") String master,
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master,
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory,
@CliOption(key = {"dryRun"}, help = "Dry Run Mode", unspecifiedDefaultValue = "false") boolean dryRun,
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit,
@@ -616,9 +632,13 @@ public class CompactionCommand implements CommandMarker {
});
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
TableHeader header = new TableHeader().addTableHeaderField("File Id").addTableHeaderField("Source File Path")
.addTableHeaderField("Destination File Path").addTableHeaderField("Rename Executed?")
.addTableHeaderField("Rename Succeeded?").addTableHeaderField("Error");
TableHeader header = new TableHeader()
.addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_SOURCE_FILE_PATH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_DESTINATION_FILE_PATH)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_RENAME_EXECUTED)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_RENAME_SUCCEEDED)
.addTableHeaderField(HoodieTableHeaderFields.HEADER_ERROR);
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
} else {

View File

@@ -79,9 +79,7 @@ public class SparkMain {
SparkCommand cmd = SparkCommand.valueOf(command);
JavaSparkContext jsc = sparkMasterContained(cmd)
? SparkUtil.initJavaSparkConf("hoodie-cli-" + command, Option.of(args[1]), Option.of(args[2]))
: SparkUtil.initJavaSparkConf("hoodie-cli-" + command);
JavaSparkContext jsc = SparkUtil.initJavaSparkConf("hoodie-cli-" + command, Option.of(args[1]), Option.of(args[2]));
int returnCode = 0;
try {
switch (cmd) {
@@ -112,29 +110,29 @@ public class SparkMain {
Integer.parseInt(args[9]), args[10], Integer.parseInt(args[11]), propsFilePath, configs);
break;
case COMPACT_RUN:
assert (args.length >= 9);
assert (args.length >= 10);
propsFilePath = null;
if (!StringUtils.isNullOrEmpty(args[8])) {
propsFilePath = args[8];
if (!StringUtils.isNullOrEmpty(args[9])) {
propsFilePath = args[9];
}
configs = new ArrayList<>();
if (args.length > 9) {
if (args.length > 10) {
configs.addAll(Arrays.asList(args).subList(9, args.length));
}
returnCode = compact(jsc, args[1], args[2], args[3], Integer.parseInt(args[4]), args[5], args[6],
Integer.parseInt(args[7]), false, propsFilePath, configs);
returnCode = compact(jsc, args[3], args[4], args[5], Integer.parseInt(args[6]), args[7],
Integer.parseInt(args[8]), false, propsFilePath, configs);
break;
case COMPACT_SCHEDULE:
assert (args.length >= 6);
assert (args.length >= 7);
propsFilePath = null;
if (!StringUtils.isNullOrEmpty(args[5])) {
propsFilePath = args[5];
if (!StringUtils.isNullOrEmpty(args[6])) {
propsFilePath = args[6];
}
configs = new ArrayList<>();
if (args.length > 6) {
configs.addAll(Arrays.asList(args).subList(6, args.length));
if (args.length > 7) {
configs.addAll(Arrays.asList(args).subList(7, args.length));
}
returnCode = compact(jsc, args[1], args[2], args[3], 1, "", args[4], 0, true, propsFilePath, configs);
returnCode = compact(jsc, args[3], args[4], args[5], 1, "", 0, true, propsFilePath, configs);
break;
case COMPACT_VALIDATE:
assert (args.length == 7);
@@ -148,9 +146,9 @@ public class SparkMain {
returnCode = 0;
break;
case COMPACT_UNSCHEDULE_FILE:
assert (args.length == 9);
doCompactUnscheduleFile(jsc, args[3], args[4], args[5], Integer.parseInt(args[6]),
Boolean.parseBoolean(args[7]), Boolean.parseBoolean(args[8]));
assert (args.length == 10);
doCompactUnscheduleFile(jsc, args[3], args[4], args[5], args[6], Integer.parseInt(args[7]),
Boolean.parseBoolean(args[8]), Boolean.parseBoolean(args[9]));
returnCode = 0;
break;
case COMPACT_UNSCHEDULE_PLAN:
@@ -209,14 +207,6 @@ public class SparkMain {
System.exit(returnCode);
}
private static boolean sparkMasterContained(SparkCommand command) {
List<SparkCommand> masterContained = Arrays.asList(SparkCommand.COMPACT_VALIDATE, SparkCommand.COMPACT_REPAIR,
SparkCommand.COMPACT_UNSCHEDULE_PLAN, SparkCommand.COMPACT_UNSCHEDULE_FILE, SparkCommand.CLEAN,
SparkCommand.IMPORT, SparkCommand.UPSERT, SparkCommand.DEDUPLICATE, SparkCommand.SAVEPOINT,
SparkCommand.DELETE_SAVEPOINT, SparkCommand.ROLLBACK_TO_SAVEPOINT, SparkCommand.ROLLBACK, SparkCommand.BOOTSTRAP);
return masterContained.contains(command);
}
protected static void clean(JavaSparkContext jsc, String basePath, String propsFilePath,
List<String> configs) {
HoodieCleaner.Config cfg = new HoodieCleaner.Config();
@@ -280,13 +270,14 @@ public class SparkMain {
new HoodieCompactionAdminTool(cfg).run(jsc);
}
private static void doCompactUnscheduleFile(JavaSparkContext jsc, String basePath, String fileId, String outputPath,
int parallelism, boolean skipValidation, boolean dryRun)
private static void doCompactUnscheduleFile(JavaSparkContext jsc, String basePath, String fileId, String partitionPath,
String outputPath, int parallelism, boolean skipValidation, boolean dryRun)
throws Exception {
HoodieCompactionAdminTool.Config cfg = new HoodieCompactionAdminTool.Config();
cfg.basePath = basePath;
cfg.operation = Operation.UNSCHEDULE_FILE;
cfg.outputPath = outputPath;
cfg.partitionPath = partitionPath;
cfg.fileId = fileId;
cfg.parallelism = parallelism;
cfg.dryRun = dryRun;
@@ -295,7 +286,7 @@ public class SparkMain {
}
private static int compact(JavaSparkContext jsc, String basePath, String tableName, String compactionInstant,
int parallelism, String schemaFile, String sparkMemory, int retry, boolean schedule, String propsFilePath,
int parallelism, String schemaFile, int retry, boolean schedule, String propsFilePath,
List<String> configs) {
HoodieCompactor.Config cfg = new HoodieCompactor.Config();
cfg.basePath = basePath;
@@ -308,7 +299,6 @@ public class SparkMain {
cfg.runSchedule = schedule;
cfg.propsFilePath = propsFilePath;
cfg.configs = configs;
jsc.getConf().set("spark.executor.memory", sparkMemory);
return new HoodieCompactor(jsc, cfg).compact(retry);
}