1
0

[HUDI-3731] Fixing Column Stats Index record Merging sequence missing columnName (#5159)

* Added `DataSkippingFailureMode` to control how DS handles failures in the flow (either "strict", when exception would be thrown, or "fallback" when it will just fallback to the full-scan)

* Make sure tests execute in `DataSkippingFailureMode.Strict`

* Fixed Column Stats Index record merging sequence missing `columnName`
This commit is contained in:
Alexey Kudinkin
2022-03-29 08:39:56 -07:00
committed by GitHub
parent 1b2fb71afc
commit fcb003ec76
5 changed files with 58 additions and 21 deletions

View File

@@ -321,9 +321,11 @@ public class HoodieMetadataPayload implements HoodieRecordPayload<HoodieMetadata
private HoodieMetadataColumnStats combineColumnStatsMetadata(HoodieMetadataPayload previousRecord) {
checkArgument(previousRecord.getColumnStatMetadata().isPresent());
checkArgument(getColumnStatMetadata().isPresent());
checkArgument(previousRecord.getColumnStatMetadata().get()
.getFileName().equals(this.columnStatMetadata.getFileName()));
return HoodieTableMetadataUtil.mergeColumnStats(previousRecord.getColumnStatMetadata().get(), this.columnStatMetadata);
HoodieMetadataColumnStats previousColStatsRecord = previousRecord.getColumnStatMetadata().get();
HoodieMetadataColumnStats newColumnStatsRecord = getColumnStatMetadata().get();
return HoodieTableMetadataUtil.mergeColumnStats(previousColStatsRecord, newColumnStatsRecord);
}
@Override

View File

@@ -90,6 +90,7 @@ import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_UNCOMPRESSED_SIZE;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.VALUE_COUNT;
import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
import static org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME;
import static org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME;
@@ -935,20 +936,25 @@ public class HoodieTableMetadataUtil {
return Arrays.asList(tableConfig.getRecordKeyFields().get());
}
public static HoodieMetadataColumnStats mergeColumnStats(HoodieMetadataColumnStats oldColumnStats, HoodieMetadataColumnStats newColumnStats) {
ValidationUtils.checkArgument(oldColumnStats.getFileName().equals(newColumnStats.getFileName()));
if (newColumnStats.getIsDeleted()) {
return newColumnStats;
public static HoodieMetadataColumnStats mergeColumnStats(HoodieMetadataColumnStats prevColumnStatsRecord,
HoodieMetadataColumnStats newColumnStatsRecord) {
checkArgument(prevColumnStatsRecord.getFileName().equals(newColumnStatsRecord.getFileName()));
checkArgument(prevColumnStatsRecord.getColumnName().equals(newColumnStatsRecord.getColumnName()));
if (newColumnStatsRecord.getIsDeleted()) {
return newColumnStatsRecord;
}
return HoodieMetadataColumnStats.newBuilder()
.setFileName(newColumnStats.getFileName())
.setMinValue(Stream.of(oldColumnStats.getMinValue(), newColumnStats.getMinValue()).filter(Objects::nonNull).min(Comparator.naturalOrder()).orElse(null))
.setMaxValue(Stream.of(oldColumnStats.getMinValue(), newColumnStats.getMinValue()).filter(Objects::nonNull).max(Comparator.naturalOrder()).orElse(null))
.setValueCount(oldColumnStats.getValueCount() + newColumnStats.getValueCount())
.setNullCount(oldColumnStats.getNullCount() + newColumnStats.getNullCount())
.setTotalSize(oldColumnStats.getTotalSize() + newColumnStats.getTotalSize())
.setTotalUncompressedSize(oldColumnStats.getTotalUncompressedSize() + newColumnStats.getTotalUncompressedSize())
.setIsDeleted(newColumnStats.getIsDeleted())
.setFileName(newColumnStatsRecord.getFileName())
.setColumnName(newColumnStatsRecord.getColumnName())
.setMinValue(Stream.of(prevColumnStatsRecord.getMinValue(), newColumnStatsRecord.getMinValue()).filter(Objects::nonNull).min(Comparator.naturalOrder()).orElse(null))
.setMaxValue(Stream.of(prevColumnStatsRecord.getMinValue(), newColumnStatsRecord.getMinValue()).filter(Objects::nonNull).max(Comparator.naturalOrder()).orElse(null))
.setValueCount(prevColumnStatsRecord.getValueCount() + newColumnStatsRecord.getValueCount())
.setNullCount(prevColumnStatsRecord.getNullCount() + newColumnStatsRecord.getNullCount())
.setTotalSize(prevColumnStatsRecord.getTotalSize() + newColumnStatsRecord.getTotalSize())
.setTotalUncompressedSize(prevColumnStatsRecord.getTotalUncompressedSize() + newColumnStatsRecord.getTotalUncompressedSize())
.setIsDeleted(newColumnStatsRecord.getIsDeleted())
.build();
}