1
0

Fixes HUDI-38: Reduce memory overhead of WriteStatus

- For implicit indexes (e.g BloomIndex), don't buffer up written records
 - By default, only collect 10% of failing records to avoid OOMs
 - Improves debuggability via above, since data errors can now show up in collect()
 - Unit tests & fixing subclasses & adjusting tests
This commit is contained in:
Vinoth Chandar
2019-03-26 14:31:19 -07:00
committed by vinoth chandar
parent e56c1612e4
commit f1410bfdcd
9 changed files with 112 additions and 23 deletions

View File

@@ -24,6 +24,7 @@ import static org.junit.Assert.assertTrue;
import com.uber.hoodie.common.HoodieCleanStat;
import com.uber.hoodie.common.HoodieClientTestUtils;
import com.uber.hoodie.common.HoodieTestDataGenerator;
import com.uber.hoodie.common.TestRawTripPayload;
import com.uber.hoodie.common.model.HoodiePartitionMetadata;
import com.uber.hoodie.common.model.HoodieRecord;
import com.uber.hoodie.common.model.HoodieTableType;
@@ -144,6 +145,7 @@ public class TestHoodieClientBase implements Serializable {
return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
.withParallelism(2, 2)
.withBulkInsertParallelism(2).withFinalizeWriteParallelism(2)
.withWriteStatusClass(TestRawTripPayload.MetadataMergeWriteStatus.class)
.withConsistencyCheckEnabled(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).build())
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())