1
0

[HUDI-2792] Configure metadata payload consistency check (#4035)

- Relax metadata payload consistency check to consider spark task failures with spurious deletes
This commit is contained in:
Sivabalan Narayanan
2021-11-24 21:56:31 -05:00
committed by GitHub
parent 83f8ed2ae3
commit a9bd20804b
9 changed files with 125 additions and 43 deletions

View File

@@ -138,6 +138,13 @@ public final class HoodieMetadataConfig extends HoodieConfig {
.sinceVersion("0.10.0")
.withDocumentation("When enabled, populates all meta fields. When disabled, no meta fields are populated.");
public static final ConfigProperty<Boolean> IGNORE_SPURIOUS_DELETES = ConfigProperty
.key("_" + METADATA_PREFIX + ".ignore.spurious.deletes")
.defaultValue(true)
.sinceVersion("0.10.10")
.withDocumentation("There are cases when extra files are requested to be deleted from metadata table which was never added before. This config"
+ "determines how to handle such spurious deletes");
private HoodieMetadataConfig() {
super();
}
@@ -174,6 +181,10 @@ public final class HoodieMetadataConfig extends HoodieConfig {
return getBooleanOrDefault(HoodieMetadataConfig.POPULATE_META_FIELDS);
}
public boolean ignoreSpuriousDeletes() {
return getBoolean(IGNORE_SPURIOUS_DELETES);
}
public static class Builder {
private EngineType engineType = EngineType.SPARK;
@@ -252,6 +263,11 @@ public final class HoodieMetadataConfig extends HoodieConfig {
return this;
}
public Builder ignoreSpuriousDeletes(boolean validateMetadataPayloadConsistency) {
metadataConfig.setValue(IGNORE_SPURIOUS_DELETES, String.valueOf(validateMetadataPayloadConsistency));
return this;
}
public Builder withEngineType(EngineType engineType) {
this.engineType = engineType;
return this;

View File

@@ -156,11 +156,7 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
List<String> partitions = Collections.emptyList();
if (hoodieRecord.isPresent()) {
if (!hoodieRecord.get().getData().getDeletions().isEmpty()) {
throw new HoodieMetadataException("Metadata partition list record is inconsistent: "
+ hoodieRecord.get().getData());
}
mayBeHandleSpuriousDeletes(hoodieRecord, "\"all partitions\"");
partitions = hoodieRecord.get().getData().getFilenames();
// Partition-less tables have a single empty partition
if (partitions.contains(NON_PARTITIONED_NAME)) {
@@ -190,10 +186,7 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
FileStatus[] statuses = {};
if (hoodieRecord.isPresent()) {
if (!hoodieRecord.get().getData().getDeletions().isEmpty()) {
throw new HoodieMetadataException("Metadata record for partition " + partitionName + " is inconsistent: "
+ hoodieRecord.get().getData());
}
mayBeHandleSpuriousDeletes(hoodieRecord, partitionName);
statuses = hoodieRecord.get().getData().getFileStatuses(hadoopConf.get(), partitionPath);
}
@@ -228,10 +221,7 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
for (Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry: partitionsFileStatus) {
if (entry.getValue().isPresent()) {
if (!entry.getValue().get().getData().getDeletions().isEmpty()) {
throw new HoodieMetadataException("Metadata record for partition " + entry.getKey() + " is inconsistent: "
+ entry.getValue().get().getData());
}
mayBeHandleSpuriousDeletes(entry.getValue(), entry.getKey());
result.put(partitionInfo.get(entry.getKey()).toString(), entry.getValue().get().getData().getFileStatuses(hadoopConf.get(), partitionInfo.get(entry.getKey())));
}
}
@@ -240,6 +230,23 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
return result;
}
/**
* May be handle spurious deletes. Depending on config, throw an exception or log a warn msg.
* @param hoodieRecord instance of {@link HoodieRecord} of interest.
* @param partitionName partition name of interest.
*/
private void mayBeHandleSpuriousDeletes(Option<HoodieRecord<HoodieMetadataPayload>> hoodieRecord, String partitionName) {
if (!hoodieRecord.get().getData().getDeletions().isEmpty()) {
if (!metadataConfig.ignoreSpuriousDeletes()) {
throw new HoodieMetadataException("Metadata record for " + partitionName + " is inconsistent: "
+ hoodieRecord.get().getData());
} else {
LOG.warn("Metadata record for " + partitionName + " encountered some files to be deleted which was not added before. "
+ "Ignoring the spurious deletes as the `" + HoodieMetadataConfig.IGNORE_SPURIOUS_DELETES.key() + "` config is set to false");
}
}
}
protected abstract Option<HoodieRecord<HoodieMetadataPayload>> getRecordByKey(String key, String partitionName);
protected abstract List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordsByKeys(List<String> key, String partitionName);

View File

@@ -704,6 +704,25 @@ public class HoodieTestTable {
return addRollback(commitTime, rollbackMetadata);
}
public HoodieTestTable doRollbackWithExtraFiles(String commitTimeToRollback, String commitTime, Map<String, List<String>> extraFiles) throws Exception {
metaClient = HoodieTableMetaClient.reload(metaClient);
Option<HoodieCommitMetadata> commitMetadata = getMetadataForInstant(commitTimeToRollback);
if (!commitMetadata.isPresent()) {
throw new IllegalArgumentException("Instant to rollback not present in timeline: " + commitTimeToRollback);
}
Map<String, List<String>> partitionFiles = getPartitionFiles(commitMetadata.get());
for (Map.Entry<String, List<String>> entry : partitionFiles.entrySet()) {
deleteFilesInPartition(entry.getKey(), entry.getValue());
}
for (Map.Entry<String, List<String>> entry: extraFiles.entrySet()) {
if (partitionFiles.containsKey(entry.getKey())) {
partitionFiles.get(entry.getKey()).addAll(entry.getValue());
}
}
HoodieRollbackMetadata rollbackMetadata = getRollbackMetadata(commitTimeToRollback, partitionFiles);
return addRollback(commitTime, rollbackMetadata);
}
public HoodieTestTable doRestore(String commitToRestoreTo, String restoreTime) throws Exception {
metaClient = HoodieTableMetaClient.reload(metaClient);
List<HoodieInstant> commitsToRollback = metaClient.getActiveTimeline().getCommitsTimeline()