1
0

[HUDI-716] Exception: Not an Avro data file when running HoodieCleanClient.runClean (#1432)

This commit is contained in:
lamber-ken
2020-03-30 13:19:17 -05:00
committed by GitHub
parent 9f51b99174
commit dbc9acd23a
5 changed files with 78 additions and 1 deletions

View File

@@ -26,8 +26,11 @@ import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.util.CleanerUtils;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import org.apache.spark.launcher.SparkLauncher;
import org.springframework.shell.core.CommandMarker;
import org.springframework.shell.core.annotation.CliCommand;
@@ -51,6 +54,8 @@ import static org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME
@Component
public class RepairsCommand implements CommandMarker {
private static final Logger LOG = Logger.getLogger(RepairsCommand.class);
@CliCommand(value = "repair deduplicate",
help = "De-duplicate a partition path contains duplicates & produce repaired files to replace with")
public String deduplicate(
@@ -137,4 +142,20 @@ public class RepairsCommand implements CommandMarker {
}
return HoodiePrintHelper.print(new String[] {"Property", "Old Value", "New Value"}, rows);
}
@CliCommand(value = "repair corrupted clean files", help = "repair corrupted clean files")
public void removeCorruptedPendingCleanAction() {
HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
activeTimeline.filterInflightsAndRequested().getInstants().forEach(instant -> {
try {
CleanerUtils.getCleanerPlan(client, instant);
} catch (IOException e) {
LOG.warn("try to remove corrupted instant file: " + instant);
FSUtils.deleteInstantFile(client.getFs(), client.getMetaPath(), instant);
}
});
}
}