1
0

Fixes HUDI-38: Reduce memory overhead of WriteStatus

- For implicit indexes (e.g BloomIndex), don't buffer up written records
 - By default, only collect 10% of failing records to avoid OOMs
 - Improves debuggability via above, since data errors can now show up in collect()
 - Unit tests & fixing subclasses & adjusting tests
This commit is contained in:
Vinoth Chandar
2019-03-26 14:31:19 -07:00
committed by vinoth chandar
parent e56c1612e4
commit f1410bfdcd
9 changed files with 112 additions and 23 deletions

View File

@@ -61,7 +61,9 @@ public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
this.originalSchema = new Schema.Parser().parse(config.getSchema());
this.writerSchema = createHoodieWriteSchema(originalSchema);
this.timer = new HoodieTimer().startTimer();
this.writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
this.writeStatus = (WriteStatus) ReflectionUtils.loadClass(config.getWriteStatusClassName(),
!hoodieTable.getIndex().isImplicitWithStorage(),
config.getWriteStatusFailureFraction());
}
/**