[HUDI-699] Fix CompactionCommand and add unit test for CompactionCommand (#2325)
This commit is contained in:
@@ -143,4 +143,25 @@ public class HoodieTableHeaderFields {
|
||||
public static final String HEADER_TOTAL_ROLLBACK_BLOCKS = "Total Rollback Blocks";
|
||||
public static final String HEADER_TOTAL_LOG_RECORDS = "Total Log Records";
|
||||
public static final String HEADER_TOTAL_UPDATED_RECORDS_COMPACTED = "Total Updated Records Compacted";
|
||||
|
||||
/**
|
||||
* Fields of Compaction.
|
||||
*/
|
||||
public static final String HEADER_INSTANT_BLANK_TIME = "Instant Time";
|
||||
public static final String HEADER_FILE_PATH = "File Path";
|
||||
public static final String HEADER_COMPACTION_INSTANT_TIME = "Compaction " + HEADER_INSTANT_BLANK_TIME;
|
||||
public static final String HEADER_STATE = "State";
|
||||
public static final String HEADER_TOTAL_FILES_TO_BE_COMPACTED = "Total FileIds to be Compacted";
|
||||
public static final String HEADER_EXTRA_METADATA = "Extra Metadata";
|
||||
public static final String HEADER_DATA_FILE_PATH = "Data " + HEADER_FILE_PATH;
|
||||
public static final String HEADER_TOTAL_DELTA_FILES = "Total " + HEADER_DELTA_FILES;
|
||||
public static final String HEADER_METRICS = "getMetrics";
|
||||
public static final String HEADER_BASE_INSTANT_TIME = "Base " + HEADER_INSTANT_BLANK_TIME;
|
||||
public static final String HEADER_BASE_DATA_FILE = "Base Data File";
|
||||
public static final String HEADER_VALID = "Valid";
|
||||
public static final String HEADER_ERROR = "Error";
|
||||
public static final String HEADER_SOURCE_FILE_PATH = "Source " + HEADER_FILE_PATH;
|
||||
public static final String HEADER_DESTINATION_FILE_PATH = "Destination " + HEADER_FILE_PATH;
|
||||
public static final String HEADER_RENAME_EXECUTED = "Rename Executed?";
|
||||
public static final String HEADER_RENAME_SUCCEEDED = "Rename Succeeded?";
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ import org.apache.hudi.avro.model.HoodieCompactionOperation;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.cli.HoodieCLI;
|
||||
import org.apache.hudi.cli.HoodiePrintHelper;
|
||||
import org.apache.hudi.cli.HoodieTableHeaderFields;
|
||||
import org.apache.hudi.cli.TableHeader;
|
||||
import org.apache.hudi.cli.commands.SparkMain.SparkCommand;
|
||||
import org.apache.hudi.cli.utils.CommitUtil;
|
||||
@@ -97,8 +98,7 @@ public class CompactionCommand implements CommandMarker {
|
||||
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
|
||||
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
|
||||
@CliOption(key = {"headeronly"}, help = "Print Header Only",
|
||||
unspecifiedDefaultValue = "false") final boolean headerOnly)
|
||||
throws IOException {
|
||||
unspecifiedDefaultValue = "false") final boolean headerOnly) {
|
||||
HoodieTableMetaClient client = checkAndGetMetaClient();
|
||||
HoodieActiveTimeline activeTimeline = client.getActiveTimeline();
|
||||
return printAllCompactions(activeTimeline,
|
||||
@@ -139,8 +139,7 @@ public class CompactionCommand implements CommandMarker {
|
||||
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
|
||||
@CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
|
||||
@CliOption(key = {"headeronly"}, help = "Print Header Only",
|
||||
unspecifiedDefaultValue = "false") final boolean headerOnly)
|
||||
throws Exception {
|
||||
unspecifiedDefaultValue = "false") final boolean headerOnly) {
|
||||
if (StringUtils.isNullOrEmpty(startTs)) {
|
||||
startTs = CommitUtil.getTimeDaysAgo(10);
|
||||
}
|
||||
@@ -150,7 +149,7 @@ public class CompactionCommand implements CommandMarker {
|
||||
|
||||
HoodieTableMetaClient client = checkAndGetMetaClient();
|
||||
HoodieArchivedTimeline archivedTimeline = client.getArchivedTimeline();
|
||||
archivedTimeline.loadInstantDetailsInMemory(startTs, endTs);
|
||||
archivedTimeline.loadCompactionDetailsInMemory(startTs, endTs);
|
||||
try {
|
||||
return printAllCompactions(archivedTimeline,
|
||||
compactionPlanReader(this::readCompactionPlanForArchivedTimeline, archivedTimeline),
|
||||
@@ -175,25 +174,25 @@ public class CompactionCommand implements CommandMarker {
|
||||
HoodieArchivedTimeline archivedTimeline = client.getArchivedTimeline();
|
||||
HoodieInstant instant = new HoodieInstant(HoodieInstant.State.COMPLETED,
|
||||
HoodieTimeline.COMPACTION_ACTION, compactionInstantTime);
|
||||
String startTs = CommitUtil.addHours(compactionInstantTime, -1);
|
||||
String endTs = CommitUtil.addHours(compactionInstantTime, 1);
|
||||
try {
|
||||
archivedTimeline.loadInstantDetailsInMemory(startTs, endTs);
|
||||
HoodieCompactionPlan compactionPlan = TimelineMetadataUtils.deserializeCompactionPlan(
|
||||
archivedTimeline.getInstantDetails(instant).get());
|
||||
archivedTimeline.loadCompactionDetailsInMemory(compactionInstantTime);
|
||||
HoodieCompactionPlan compactionPlan = TimelineMetadataUtils.deserializeAvroRecordMetadata(
|
||||
archivedTimeline.getInstantDetails(instant).get(), HoodieCompactionPlan.getClassSchema());
|
||||
return printCompaction(compactionPlan, sortByField, descending, limit, headerOnly);
|
||||
} finally {
|
||||
archivedTimeline.clearInstantDetailsFromMemory(startTs, endTs);
|
||||
archivedTimeline.clearInstantDetailsFromMemory(compactionInstantTime);
|
||||
}
|
||||
}
|
||||
|
||||
@CliCommand(value = "compaction schedule", help = "Schedule Compaction")
|
||||
public String scheduleCompact(@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "1G",
|
||||
help = "Spark executor memory") final String sparkMemory,
|
||||
@CliOption(key = "propsFilePath", help = "path to properties file on localfs or dfs with configurations for hoodie client for compacting",
|
||||
unspecifiedDefaultValue = "") final String propsFilePath,
|
||||
@CliOption(key = "hoodieConfigs", help = "Any configuration that can be set in the properties file can be passed here in the form of an array",
|
||||
unspecifiedDefaultValue = "") final String[] configs) throws Exception {
|
||||
@CliOption(key = "propsFilePath", help = "path to properties file on localfs or dfs with configurations for hoodie client for compacting",
|
||||
unspecifiedDefaultValue = "") final String propsFilePath,
|
||||
@CliOption(key = "hoodieConfigs", help = "Any configuration that can be set in the properties file can be passed here in the form of an array",
|
||||
unspecifiedDefaultValue = "") final String[] configs,
|
||||
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master)
|
||||
throws Exception {
|
||||
HoodieTableMetaClient client = checkAndGetMetaClient();
|
||||
boolean initialized = HoodieCLI.initConf();
|
||||
HoodieCLI.initFS(initialized);
|
||||
@@ -204,8 +203,9 @@ public class CompactionCommand implements CommandMarker {
|
||||
String sparkPropertiesPath =
|
||||
Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
|
||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||
sparkLauncher.addAppArgs(SparkCommand.COMPACT_SCHEDULE.toString(), client.getBasePath(),
|
||||
client.getTableConfig().getTableName(), compactionInstantTime, sparkMemory, propsFilePath);
|
||||
String cmd = SparkCommand.COMPACT_SCHEDULE.toString();
|
||||
sparkLauncher.addAppArgs(cmd, master, sparkMemory, client.getBasePath(),
|
||||
client.getTableConfig().getTableName(), compactionInstantTime, propsFilePath);
|
||||
UtilHelpers.validateAndAddProperties(configs, sparkLauncher);
|
||||
Process process = sparkLauncher.launch();
|
||||
InputStreamConsumer.captureOutput(process);
|
||||
@@ -222,6 +222,8 @@ public class CompactionCommand implements CommandMarker {
|
||||
help = "Parallelism for hoodie compaction") final String parallelism,
|
||||
@CliOption(key = "schemaFilePath", mandatory = true,
|
||||
help = "Path for Avro schema file") final String schemaFilePath,
|
||||
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local",
|
||||
help = "Spark Master") String master,
|
||||
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",
|
||||
help = "Spark executor memory") final String sparkMemory,
|
||||
@CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries") final String retry,
|
||||
@@ -249,9 +251,9 @@ public class CompactionCommand implements CommandMarker {
|
||||
String sparkPropertiesPath =
|
||||
Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
|
||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||
sparkLauncher.addAppArgs(SparkCommand.COMPACT_RUN.toString(), client.getBasePath(),
|
||||
sparkLauncher.addAppArgs(SparkCommand.COMPACT_RUN.toString(), master, sparkMemory, client.getBasePath(),
|
||||
client.getTableConfig().getTableName(), compactionInstantTime, parallelism, schemaFilePath,
|
||||
sparkMemory, retry, propsFilePath);
|
||||
retry, propsFilePath);
|
||||
UtilHelpers.validateAndAddProperties(configs, sparkLauncher);
|
||||
Process process = sparkLauncher.launch();
|
||||
InputStreamConsumer.captureOutput(process);
|
||||
@@ -279,15 +281,15 @@ public class CompactionCommand implements CommandMarker {
|
||||
.filter(pair -> pair.getRight() != null)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
Set<HoodieInstant> committedInstants = timeline.getCommitTimeline().filterCompletedInstants()
|
||||
.getInstants().collect(Collectors.toSet());
|
||||
Set<String> committedInstants = timeline.getCommitTimeline().filterCompletedInstants()
|
||||
.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
|
||||
|
||||
List<Comparable[]> rows = new ArrayList<>();
|
||||
for (Pair<HoodieInstant, HoodieCompactionPlan> compactionPlan : compactionPlans) {
|
||||
HoodieCompactionPlan plan = compactionPlan.getRight();
|
||||
HoodieInstant instant = compactionPlan.getLeft();
|
||||
final HoodieInstant.State state;
|
||||
if (committedInstants.contains(instant)) {
|
||||
if (committedInstants.contains(instant.getTimestamp())) {
|
||||
state = HoodieInstant.State.COMPLETED;
|
||||
} else {
|
||||
state = instant.getState();
|
||||
@@ -304,10 +306,12 @@ public class CompactionCommand implements CommandMarker {
|
||||
}
|
||||
|
||||
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
|
||||
TableHeader header = new TableHeader().addTableHeaderField("Compaction Instant Time").addTableHeaderField("State")
|
||||
.addTableHeaderField("Total FileIds to be Compacted");
|
||||
TableHeader header = new TableHeader()
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_COMPACTION_INSTANT_TIME)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_STATE)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_TO_BE_COMPACTED);
|
||||
if (includeExtraMetadata) {
|
||||
header = header.addTableHeaderField("Extra Metadata");
|
||||
header = header.addTableHeaderField(HoodieTableHeaderFields.HEADER_EXTRA_METADATA);
|
||||
}
|
||||
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
|
||||
}
|
||||
@@ -326,14 +330,17 @@ public class CompactionCommand implements CommandMarker {
|
||||
|
||||
private HoodieCompactionPlan readCompactionPlanForArchivedTimeline(HoodieArchivedTimeline archivedTimeline,
|
||||
HoodieInstant instant) {
|
||||
if (!HoodieTimeline.COMPACTION_ACTION.equals(instant.getAction())) {
|
||||
return null;
|
||||
} else {
|
||||
// filter inflight compaction
|
||||
if (HoodieTimeline.COMPACTION_ACTION.equals(instant.getAction())
|
||||
&& HoodieInstant.State.INFLIGHT.equals(instant.getState())) {
|
||||
try {
|
||||
return TimelineMetadataUtils.deserializeCompactionPlan(archivedTimeline.getInstantDetails(instant).get());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(e.getMessage(), e);
|
||||
return TimelineMetadataUtils.deserializeAvroRecordMetadata(archivedTimeline.getInstantDetails(instant).get(),
|
||||
HoodieCompactionPlan.getClassSchema());
|
||||
} catch (Exception e) {
|
||||
throw new HoodieException(e.getMessage(), e);
|
||||
}
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -362,7 +369,7 @@ public class CompactionCommand implements CommandMarker {
|
||||
}
|
||||
}
|
||||
|
||||
private String printCompaction(HoodieCompactionPlan compactionPlan,
|
||||
protected String printCompaction(HoodieCompactionPlan compactionPlan,
|
||||
String sortByField,
|
||||
boolean descending,
|
||||
int limit,
|
||||
@@ -376,9 +383,13 @@ public class CompactionCommand implements CommandMarker {
|
||||
}
|
||||
|
||||
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
|
||||
TableHeader header = new TableHeader().addTableHeaderField("Partition Path").addTableHeaderField("File Id")
|
||||
.addTableHeaderField("Base Instant").addTableHeaderField("Data File Path")
|
||||
.addTableHeaderField("Total Delta Files").addTableHeaderField("getMetrics");
|
||||
TableHeader header = new TableHeader()
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_PARTITION_PATH)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_DATA_FILE_PATH)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_DELTA_FILES)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_METRICS);
|
||||
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
|
||||
}
|
||||
|
||||
@@ -404,7 +415,7 @@ public class CompactionCommand implements CommandMarker {
|
||||
public String validateCompaction(
|
||||
@CliOption(key = "instant", mandatory = true, help = "Compaction Instant") String compactionInstant,
|
||||
@CliOption(key = {"parallelism"}, unspecifiedDefaultValue = "3", help = "Parallelism") String parallelism,
|
||||
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master ") String master,
|
||||
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master,
|
||||
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory,
|
||||
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit,
|
||||
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField,
|
||||
@@ -444,9 +455,13 @@ public class CompactionCommand implements CommandMarker {
|
||||
});
|
||||
|
||||
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
|
||||
TableHeader header = new TableHeader().addTableHeaderField("File Id").addTableHeaderField("Base Instant Time")
|
||||
.addTableHeaderField("Base Data File").addTableHeaderField("Num Delta Files").addTableHeaderField("Valid")
|
||||
.addTableHeaderField("Error");
|
||||
TableHeader header = new TableHeader()
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_INSTANT_TIME)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_BASE_DATA_FILE)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_NUM_DELTA_FILES)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_VALID)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_ERROR);
|
||||
|
||||
output = message + HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit,
|
||||
headerOnly, rows);
|
||||
@@ -463,7 +478,7 @@ public class CompactionCommand implements CommandMarker {
|
||||
public String unscheduleCompaction(
|
||||
@CliOption(key = "instant", mandatory = true, help = "Compaction Instant") String compactionInstant,
|
||||
@CliOption(key = {"parallelism"}, unspecifiedDefaultValue = "3", help = "Parallelism") String parallelism,
|
||||
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master ") String master,
|
||||
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master,
|
||||
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory,
|
||||
@CliOption(key = {"skipValidation"}, help = "skip validation", unspecifiedDefaultValue = "false") boolean skipV,
|
||||
@CliOption(key = {"dryRun"}, help = "Dry Run Mode", unspecifiedDefaultValue = "false") boolean dryRun,
|
||||
@@ -508,7 +523,8 @@ public class CompactionCommand implements CommandMarker {
|
||||
@CliCommand(value = "compaction unscheduleFileId", help = "UnSchedule Compaction for a fileId")
|
||||
public String unscheduleCompactFile(
|
||||
@CliOption(key = "fileId", mandatory = true, help = "File Id") final String fileId,
|
||||
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master ") String master,
|
||||
@CliOption(key = "partitionPath", mandatory = true, help = "partition path") final String partitionPath,
|
||||
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master,
|
||||
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory,
|
||||
@CliOption(key = {"skipValidation"}, help = "skip validation", unspecifiedDefaultValue = "false") boolean skipV,
|
||||
@CliOption(key = {"dryRun"}, help = "Dry Run Mode", unspecifiedDefaultValue = "false") boolean dryRun,
|
||||
@@ -529,7 +545,7 @@ public class CompactionCommand implements CommandMarker {
|
||||
.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
|
||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||
sparkLauncher.addAppArgs(SparkCommand.COMPACT_UNSCHEDULE_FILE.toString(), master, sparkMemory, client.getBasePath(),
|
||||
fileId, outputPathStr, "1", Boolean.valueOf(skipV).toString(),
|
||||
fileId, partitionPath, outputPathStr, "1", Boolean.valueOf(skipV).toString(),
|
||||
Boolean.valueOf(dryRun).toString());
|
||||
Process process = sparkLauncher.launch();
|
||||
InputStreamConsumer.captureOutput(process);
|
||||
@@ -554,7 +570,7 @@ public class CompactionCommand implements CommandMarker {
|
||||
public String repairCompaction(
|
||||
@CliOption(key = "instant", mandatory = true, help = "Compaction Instant") String compactionInstant,
|
||||
@CliOption(key = {"parallelism"}, unspecifiedDefaultValue = "3", help = "Parallelism") String parallelism,
|
||||
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master ") String master,
|
||||
@CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local", help = "Spark Master") String master,
|
||||
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "2G", help = "executor memory") String sparkMemory,
|
||||
@CliOption(key = {"dryRun"}, help = "Dry Run Mode", unspecifiedDefaultValue = "false") boolean dryRun,
|
||||
@CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit,
|
||||
@@ -616,9 +632,13 @@ public class CompactionCommand implements CommandMarker {
|
||||
});
|
||||
|
||||
Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
|
||||
TableHeader header = new TableHeader().addTableHeaderField("File Id").addTableHeaderField("Source File Path")
|
||||
.addTableHeaderField("Destination File Path").addTableHeaderField("Rename Executed?")
|
||||
.addTableHeaderField("Rename Succeeded?").addTableHeaderField("Error");
|
||||
TableHeader header = new TableHeader()
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_FILE_ID)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_SOURCE_FILE_PATH)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_DESTINATION_FILE_PATH)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_RENAME_EXECUTED)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_RENAME_SUCCEEDED)
|
||||
.addTableHeaderField(HoodieTableHeaderFields.HEADER_ERROR);
|
||||
|
||||
return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
|
||||
} else {
|
||||
|
||||
@@ -79,9 +79,7 @@ public class SparkMain {
|
||||
|
||||
SparkCommand cmd = SparkCommand.valueOf(command);
|
||||
|
||||
JavaSparkContext jsc = sparkMasterContained(cmd)
|
||||
? SparkUtil.initJavaSparkConf("hoodie-cli-" + command, Option.of(args[1]), Option.of(args[2]))
|
||||
: SparkUtil.initJavaSparkConf("hoodie-cli-" + command);
|
||||
JavaSparkContext jsc = SparkUtil.initJavaSparkConf("hoodie-cli-" + command, Option.of(args[1]), Option.of(args[2]));
|
||||
int returnCode = 0;
|
||||
try {
|
||||
switch (cmd) {
|
||||
@@ -112,29 +110,29 @@ public class SparkMain {
|
||||
Integer.parseInt(args[9]), args[10], Integer.parseInt(args[11]), propsFilePath, configs);
|
||||
break;
|
||||
case COMPACT_RUN:
|
||||
assert (args.length >= 9);
|
||||
assert (args.length >= 10);
|
||||
propsFilePath = null;
|
||||
if (!StringUtils.isNullOrEmpty(args[8])) {
|
||||
propsFilePath = args[8];
|
||||
if (!StringUtils.isNullOrEmpty(args[9])) {
|
||||
propsFilePath = args[9];
|
||||
}
|
||||
configs = new ArrayList<>();
|
||||
if (args.length > 9) {
|
||||
if (args.length > 10) {
|
||||
configs.addAll(Arrays.asList(args).subList(9, args.length));
|
||||
}
|
||||
returnCode = compact(jsc, args[1], args[2], args[3], Integer.parseInt(args[4]), args[5], args[6],
|
||||
Integer.parseInt(args[7]), false, propsFilePath, configs);
|
||||
returnCode = compact(jsc, args[3], args[4], args[5], Integer.parseInt(args[6]), args[7],
|
||||
Integer.parseInt(args[8]), false, propsFilePath, configs);
|
||||
break;
|
||||
case COMPACT_SCHEDULE:
|
||||
assert (args.length >= 6);
|
||||
assert (args.length >= 7);
|
||||
propsFilePath = null;
|
||||
if (!StringUtils.isNullOrEmpty(args[5])) {
|
||||
propsFilePath = args[5];
|
||||
if (!StringUtils.isNullOrEmpty(args[6])) {
|
||||
propsFilePath = args[6];
|
||||
}
|
||||
configs = new ArrayList<>();
|
||||
if (args.length > 6) {
|
||||
configs.addAll(Arrays.asList(args).subList(6, args.length));
|
||||
if (args.length > 7) {
|
||||
configs.addAll(Arrays.asList(args).subList(7, args.length));
|
||||
}
|
||||
returnCode = compact(jsc, args[1], args[2], args[3], 1, "", args[4], 0, true, propsFilePath, configs);
|
||||
returnCode = compact(jsc, args[3], args[4], args[5], 1, "", 0, true, propsFilePath, configs);
|
||||
break;
|
||||
case COMPACT_VALIDATE:
|
||||
assert (args.length == 7);
|
||||
@@ -148,9 +146,9 @@ public class SparkMain {
|
||||
returnCode = 0;
|
||||
break;
|
||||
case COMPACT_UNSCHEDULE_FILE:
|
||||
assert (args.length == 9);
|
||||
doCompactUnscheduleFile(jsc, args[3], args[4], args[5], Integer.parseInt(args[6]),
|
||||
Boolean.parseBoolean(args[7]), Boolean.parseBoolean(args[8]));
|
||||
assert (args.length == 10);
|
||||
doCompactUnscheduleFile(jsc, args[3], args[4], args[5], args[6], Integer.parseInt(args[7]),
|
||||
Boolean.parseBoolean(args[8]), Boolean.parseBoolean(args[9]));
|
||||
returnCode = 0;
|
||||
break;
|
||||
case COMPACT_UNSCHEDULE_PLAN:
|
||||
@@ -209,14 +207,6 @@ public class SparkMain {
|
||||
System.exit(returnCode);
|
||||
}
|
||||
|
||||
private static boolean sparkMasterContained(SparkCommand command) {
|
||||
List<SparkCommand> masterContained = Arrays.asList(SparkCommand.COMPACT_VALIDATE, SparkCommand.COMPACT_REPAIR,
|
||||
SparkCommand.COMPACT_UNSCHEDULE_PLAN, SparkCommand.COMPACT_UNSCHEDULE_FILE, SparkCommand.CLEAN,
|
||||
SparkCommand.IMPORT, SparkCommand.UPSERT, SparkCommand.DEDUPLICATE, SparkCommand.SAVEPOINT,
|
||||
SparkCommand.DELETE_SAVEPOINT, SparkCommand.ROLLBACK_TO_SAVEPOINT, SparkCommand.ROLLBACK, SparkCommand.BOOTSTRAP);
|
||||
return masterContained.contains(command);
|
||||
}
|
||||
|
||||
protected static void clean(JavaSparkContext jsc, String basePath, String propsFilePath,
|
||||
List<String> configs) {
|
||||
HoodieCleaner.Config cfg = new HoodieCleaner.Config();
|
||||
@@ -280,13 +270,14 @@ public class SparkMain {
|
||||
new HoodieCompactionAdminTool(cfg).run(jsc);
|
||||
}
|
||||
|
||||
private static void doCompactUnscheduleFile(JavaSparkContext jsc, String basePath, String fileId, String outputPath,
|
||||
int parallelism, boolean skipValidation, boolean dryRun)
|
||||
private static void doCompactUnscheduleFile(JavaSparkContext jsc, String basePath, String fileId, String partitionPath,
|
||||
String outputPath, int parallelism, boolean skipValidation, boolean dryRun)
|
||||
throws Exception {
|
||||
HoodieCompactionAdminTool.Config cfg = new HoodieCompactionAdminTool.Config();
|
||||
cfg.basePath = basePath;
|
||||
cfg.operation = Operation.UNSCHEDULE_FILE;
|
||||
cfg.outputPath = outputPath;
|
||||
cfg.partitionPath = partitionPath;
|
||||
cfg.fileId = fileId;
|
||||
cfg.parallelism = parallelism;
|
||||
cfg.dryRun = dryRun;
|
||||
@@ -295,7 +286,7 @@ public class SparkMain {
|
||||
}
|
||||
|
||||
private static int compact(JavaSparkContext jsc, String basePath, String tableName, String compactionInstant,
|
||||
int parallelism, String schemaFile, String sparkMemory, int retry, boolean schedule, String propsFilePath,
|
||||
int parallelism, String schemaFile, int retry, boolean schedule, String propsFilePath,
|
||||
List<String> configs) {
|
||||
HoodieCompactor.Config cfg = new HoodieCompactor.Config();
|
||||
cfg.basePath = basePath;
|
||||
@@ -308,7 +299,6 @@ public class SparkMain {
|
||||
cfg.runSchedule = schedule;
|
||||
cfg.propsFilePath = propsFilePath;
|
||||
cfg.configs = configs;
|
||||
jsc.getConf().set("spark.executor.memory", sparkMemory);
|
||||
return new HoodieCompactor(jsc, cfg).compact(retry);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user