1
0

[HUDI-2675] Fix the exception 'Not an Avro data file' when archive and clean (#4016)

This commit is contained in:
董可伦
2021-12-30 11:53:17 +08:00
committed by GitHub
parent 674c149234
commit 436becf3ea
11 changed files with 209 additions and 78 deletions

View File

@@ -190,12 +190,36 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
}
}
public void deleteEmptyInstantIfExists(HoodieInstant instant) {
ValidationUtils.checkArgument(isEmpty(instant));
deleteInstantFileIfExists(instant);
}
public void deleteCompactionRequested(HoodieInstant instant) {
ValidationUtils.checkArgument(instant.isRequested());
ValidationUtils.checkArgument(Objects.equals(instant.getAction(), HoodieTimeline.COMPACTION_ACTION));
deleteInstantFile(instant);
}
private void deleteInstantFileIfExists(HoodieInstant instant) {
LOG.info("Deleting instant " + instant);
Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), instant.getFileName());
try {
if (metaClient.getFs().exists(inFlightCommitFilePath)) {
boolean result = metaClient.getFs().delete(inFlightCommitFilePath, false);
if (result) {
LOG.info("Removed instant " + instant);
} else {
throw new HoodieIOException("Could not delete instant " + instant);
}
} else {
LOG.warn("The commit " + inFlightCommitFilePath + " to remove does not exist");
}
} catch (IOException e) {
throw new HoodieIOException("Could not remove inflight commit " + inFlightCommitFilePath, e);
}
}
private void deleteInstantFile(HoodieInstant instant) {
LOG.info("Deleting instant " + instant);
Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), instant.getFileName());

View File

@@ -123,7 +123,7 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
@Override
public HoodieTimeline getCompletedReplaceTimeline() {
return new HoodieDefaultTimeline(
instants.stream().filter(s -> s.getAction().equals(REPLACE_COMMIT_ACTION)).filter(s -> s.isCompleted()), details);
instants.stream().filter(s -> s.getAction().equals(REPLACE_COMMIT_ACTION)).filter(HoodieInstant::isCompleted), details);
}
@Override
@@ -374,6 +374,11 @@ public class HoodieDefaultTimeline implements HoodieTimeline {
return operationType.isPresent() && WriteOperationType.DELETE_PARTITION.equals(operationType.get());
}
@Override
public boolean isEmpty(HoodieInstant instant) {
return getInstantDetails(instant).get().length == 0;
}
@Override
public String toString() {
return this.getClass().getName() + ": " + instants.stream().map(Object::toString).collect(Collectors.joining(","));

View File

@@ -289,6 +289,8 @@ public interface HoodieTimeline extends Serializable {
*/
Option<byte[]> getInstantDetails(HoodieInstant instant);
boolean isEmpty(HoodieInstant instant);
/**
* Check WriteOperationType is DeletePartition.
*/

View File

@@ -19,6 +19,7 @@
package org.apache.hudi.common.testutils;
import org.apache.directory.api.util.Strings;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
@@ -219,14 +220,26 @@ public class FileCreateUtils {
createMetaFile(basePath, instantTime, HoodieTimeline.CLEAN_EXTENSION, serializeCleanMetadata(metadata).get());
}
public static void createCleanFile(String basePath, String instantTime, HoodieCleanMetadata metadata, boolean isEmpty) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.CLEAN_EXTENSION, isEmpty ? Strings.EMPTY_BYTES : serializeCleanMetadata(metadata).get());
}
public static void createRequestedCleanFile(String basePath, String instantTime, HoodieCleanerPlan cleanerPlan) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_CLEAN_EXTENSION, serializeCleanerPlan(cleanerPlan).get());
}
public static void createRequestedCleanFile(String basePath, String instantTime, HoodieCleanerPlan cleanerPlan, boolean isEmpty) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_CLEAN_EXTENSION, isEmpty ? Strings.EMPTY_BYTES : serializeCleanerPlan(cleanerPlan).get());
}
public static void createInflightCleanFile(String basePath, String instantTime, HoodieCleanerPlan cleanerPlan) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_CLEAN_EXTENSION, serializeCleanerPlan(cleanerPlan).get());
}
public static void createInflightCleanFile(String basePath, String instantTime, HoodieCleanerPlan cleanerPlan, boolean isEmpty) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_CLEAN_EXTENSION, isEmpty ? Strings.EMPTY_BYTES : serializeCleanerPlan(cleanerPlan).get());
}
public static void createRequestedRollbackFile(String basePath, String instantTime, HoodieRollbackPlan plan) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.REQUESTED_ROLLBACK_EXTENSION, serializeRollbackPlan(plan).get());
}
@@ -235,8 +248,8 @@ public class FileCreateUtils {
createMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION);
}
public static void createRollbackFile(String basePath, String instantTime, HoodieRollbackMetadata hoodieRollbackMetadata) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.ROLLBACK_EXTENSION, serializeRollbackMetadata(hoodieRollbackMetadata).get());
public static void createRollbackFile(String basePath, String instantTime, HoodieRollbackMetadata hoodieRollbackMetadata, boolean isEmpty) throws IOException {
createMetaFile(basePath, instantTime, HoodieTimeline.ROLLBACK_EXTENSION, isEmpty ? Strings.EMPTY_BYTES : serializeRollbackMetadata(hoodieRollbackMetadata).get());
}
public static void createRestoreFile(String basePath, String instantTime, HoodieRestoreMetadata hoodieRestoreMetadata) throws IOException {

View File

@@ -279,9 +279,13 @@ public class HoodieTestTable {
}
public HoodieTestTable addClean(String instantTime, HoodieCleanerPlan cleanerPlan, HoodieCleanMetadata metadata) throws IOException {
createRequestedCleanFile(basePath, instantTime, cleanerPlan);
createInflightCleanFile(basePath, instantTime, cleanerPlan);
createCleanFile(basePath, instantTime, metadata);
return addClean(instantTime, cleanerPlan, metadata, false);
}
public HoodieTestTable addClean(String instantTime, HoodieCleanerPlan cleanerPlan, HoodieCleanMetadata metadata, boolean isEmpty) throws IOException {
createRequestedCleanFile(basePath, instantTime, cleanerPlan, isEmpty);
createInflightCleanFile(basePath, instantTime, cleanerPlan, isEmpty);
createCleanFile(basePath, instantTime, metadata, isEmpty);
currentInstantTime = instantTime;
return this;
}
@@ -324,8 +328,12 @@ public class HoodieTestTable {
}
public HoodieTestTable addRollback(String instantTime, HoodieRollbackMetadata rollbackMetadata) throws IOException {
return addRollback(instantTime, rollbackMetadata, false);
}
public HoodieTestTable addRollback(String instantTime, HoodieRollbackMetadata rollbackMetadata, boolean isEmpty) throws IOException {
createInflightRollbackFile(basePath, instantTime);
createRollbackFile(basePath, instantTime, rollbackMetadata);
createRollbackFile(basePath, instantTime, rollbackMetadata, isEmpty);
currentInstantTime = instantTime;
return this;
}
@@ -1015,7 +1023,7 @@ public class HoodieTestTable {
* @param tableType - Hudi table type
* @param commitTime - Write commit time
* @param partitionToFilesNameLengthMap - Map of partition names to its list of files and their lengths
* @return Test tabke state for the requested partitions and files
* @return Test table state for the requested partitions and files
*/
private static HoodieTestTableState getTestTableStateWithPartitionFileInfo(WriteOperationType operationType,
HoodieTableType tableType,