[HUDI-2792] Configure metadata payload consistency check (#4035)
- Relax metadata payload consistency check to consider spark task failures with spurious deletes
This commit is contained in:
committed by
GitHub
parent
83f8ed2ae3
commit
a9bd20804b
@@ -138,6 +138,13 @@ public final class HoodieMetadataConfig extends HoodieConfig {
|
||||
.sinceVersion("0.10.0")
|
||||
.withDocumentation("When enabled, populates all meta fields. When disabled, no meta fields are populated.");
|
||||
|
||||
public static final ConfigProperty<Boolean> IGNORE_SPURIOUS_DELETES = ConfigProperty
|
||||
.key("_" + METADATA_PREFIX + ".ignore.spurious.deletes")
|
||||
.defaultValue(true)
|
||||
.sinceVersion("0.10.10")
|
||||
.withDocumentation("There are cases when extra files are requested to be deleted from metadata table which was never added before. This config"
|
||||
+ "determines how to handle such spurious deletes");
|
||||
|
||||
private HoodieMetadataConfig() {
|
||||
super();
|
||||
}
|
||||
@@ -174,6 +181,10 @@ public final class HoodieMetadataConfig extends HoodieConfig {
|
||||
return getBooleanOrDefault(HoodieMetadataConfig.POPULATE_META_FIELDS);
|
||||
}
|
||||
|
||||
public boolean ignoreSpuriousDeletes() {
|
||||
return getBoolean(IGNORE_SPURIOUS_DELETES);
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private EngineType engineType = EngineType.SPARK;
|
||||
@@ -252,6 +263,11 @@ public final class HoodieMetadataConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder ignoreSpuriousDeletes(boolean validateMetadataPayloadConsistency) {
|
||||
metadataConfig.setValue(IGNORE_SPURIOUS_DELETES, String.valueOf(validateMetadataPayloadConsistency));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withEngineType(EngineType engineType) {
|
||||
this.engineType = engineType;
|
||||
return this;
|
||||
|
||||
@@ -156,11 +156,7 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
|
||||
|
||||
List<String> partitions = Collections.emptyList();
|
||||
if (hoodieRecord.isPresent()) {
|
||||
if (!hoodieRecord.get().getData().getDeletions().isEmpty()) {
|
||||
throw new HoodieMetadataException("Metadata partition list record is inconsistent: "
|
||||
+ hoodieRecord.get().getData());
|
||||
}
|
||||
|
||||
mayBeHandleSpuriousDeletes(hoodieRecord, "\"all partitions\"");
|
||||
partitions = hoodieRecord.get().getData().getFilenames();
|
||||
// Partition-less tables have a single empty partition
|
||||
if (partitions.contains(NON_PARTITIONED_NAME)) {
|
||||
@@ -190,10 +186,7 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
|
||||
|
||||
FileStatus[] statuses = {};
|
||||
if (hoodieRecord.isPresent()) {
|
||||
if (!hoodieRecord.get().getData().getDeletions().isEmpty()) {
|
||||
throw new HoodieMetadataException("Metadata record for partition " + partitionName + " is inconsistent: "
|
||||
+ hoodieRecord.get().getData());
|
||||
}
|
||||
mayBeHandleSpuriousDeletes(hoodieRecord, partitionName);
|
||||
statuses = hoodieRecord.get().getData().getFileStatuses(hadoopConf.get(), partitionPath);
|
||||
}
|
||||
|
||||
@@ -228,10 +221,7 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
|
||||
|
||||
for (Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry: partitionsFileStatus) {
|
||||
if (entry.getValue().isPresent()) {
|
||||
if (!entry.getValue().get().getData().getDeletions().isEmpty()) {
|
||||
throw new HoodieMetadataException("Metadata record for partition " + entry.getKey() + " is inconsistent: "
|
||||
+ entry.getValue().get().getData());
|
||||
}
|
||||
mayBeHandleSpuriousDeletes(entry.getValue(), entry.getKey());
|
||||
result.put(partitionInfo.get(entry.getKey()).toString(), entry.getValue().get().getData().getFileStatuses(hadoopConf.get(), partitionInfo.get(entry.getKey())));
|
||||
}
|
||||
}
|
||||
@@ -240,6 +230,23 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* May be handle spurious deletes. Depending on config, throw an exception or log a warn msg.
|
||||
* @param hoodieRecord instance of {@link HoodieRecord} of interest.
|
||||
* @param partitionName partition name of interest.
|
||||
*/
|
||||
private void mayBeHandleSpuriousDeletes(Option<HoodieRecord<HoodieMetadataPayload>> hoodieRecord, String partitionName) {
|
||||
if (!hoodieRecord.get().getData().getDeletions().isEmpty()) {
|
||||
if (!metadataConfig.ignoreSpuriousDeletes()) {
|
||||
throw new HoodieMetadataException("Metadata record for " + partitionName + " is inconsistent: "
|
||||
+ hoodieRecord.get().getData());
|
||||
} else {
|
||||
LOG.warn("Metadata record for " + partitionName + " encountered some files to be deleted which was not added before. "
|
||||
+ "Ignoring the spurious deletes as the `" + HoodieMetadataConfig.IGNORE_SPURIOUS_DELETES.key() + "` config is set to false");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract Option<HoodieRecord<HoodieMetadataPayload>> getRecordByKey(String key, String partitionName);
|
||||
|
||||
protected abstract List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordsByKeys(List<String> key, String partitionName);
|
||||
|
||||
@@ -704,6 +704,25 @@ public class HoodieTestTable {
|
||||
return addRollback(commitTime, rollbackMetadata);
|
||||
}
|
||||
|
||||
public HoodieTestTable doRollbackWithExtraFiles(String commitTimeToRollback, String commitTime, Map<String, List<String>> extraFiles) throws Exception {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
Option<HoodieCommitMetadata> commitMetadata = getMetadataForInstant(commitTimeToRollback);
|
||||
if (!commitMetadata.isPresent()) {
|
||||
throw new IllegalArgumentException("Instant to rollback not present in timeline: " + commitTimeToRollback);
|
||||
}
|
||||
Map<String, List<String>> partitionFiles = getPartitionFiles(commitMetadata.get());
|
||||
for (Map.Entry<String, List<String>> entry : partitionFiles.entrySet()) {
|
||||
deleteFilesInPartition(entry.getKey(), entry.getValue());
|
||||
}
|
||||
for (Map.Entry<String, List<String>> entry: extraFiles.entrySet()) {
|
||||
if (partitionFiles.containsKey(entry.getKey())) {
|
||||
partitionFiles.get(entry.getKey()).addAll(entry.getValue());
|
||||
}
|
||||
}
|
||||
HoodieRollbackMetadata rollbackMetadata = getRollbackMetadata(commitTimeToRollback, partitionFiles);
|
||||
return addRollback(commitTime, rollbackMetadata);
|
||||
}
|
||||
|
||||
public HoodieTestTable doRestore(String commitToRestoreTo, String restoreTime) throws Exception {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
List<HoodieInstant> commitsToRollback = metaClient.getActiveTimeline().getCommitsTimeline()
|
||||
|
||||
Reference in New Issue
Block a user