1
0

Fixes HUDI-38: Reduce memory overhead of WriteStatus

- For implicit indexes (e.g BloomIndex), don't buffer up written records
 - By default, only collect 10% of failing records to avoid OOMs
 - Improves debuggability via above, since data errors can now show up in collect()
 - Unit tests & fixing subclasses & adjusting tests
This commit is contained in:
Vinoth Chandar
2019-03-26 14:31:19 -07:00
committed by vinoth chandar
parent e56c1612e4
commit f1410bfdcd
9 changed files with 112 additions and 23 deletions

View File

@@ -406,21 +406,21 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
}
public Long getMaxMemoryPerCompaction() {
return Long
.valueOf(
props.getProperty(HoodieMemoryConfig.MAX_MEMORY_FOR_COMPACTION_PROP));
return Long.valueOf(props.getProperty(HoodieMemoryConfig.MAX_MEMORY_FOR_COMPACTION_PROP));
}
public int getMaxDFSStreamBufferSize() {
return Integer
.valueOf(
props.getProperty(HoodieMemoryConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP));
return Integer.valueOf(props.getProperty(HoodieMemoryConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP));
}
public String getSpillableMapBasePath() {
return props.getProperty(HoodieMemoryConfig.SPILLABLE_MAP_BASE_PATH_PROP);
}
public double getWriteStatusFailureFraction() {
return Double.valueOf(props.getProperty(HoodieMemoryConfig.WRITESTATUS_FAILURE_FRACTION_PROP));
}
public static class Builder {
private final Properties props = new Properties();
@@ -428,7 +428,6 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
private boolean isStorageConfigSet = false;
private boolean isCompactionConfigSet = false;
private boolean isMetricsConfigSet = false;
private boolean isAutoCommit = true;
private boolean isMemoryConfigSet = false;
public Builder fromFile(File propertiesFile) throws IOException {