Fixes HUDI-38: Reduce memory overhead of WriteStatus
- For implicit indexes (e.g BloomIndex), don't buffer up written records - By default, only collect 10% of failing records to avoid OOMs - Improves debuggability via above, since data errors can now show up in collect() - Unit tests & fixing subclasses & adjusting tests
This commit is contained in:
committed by
vinoth chandar
parent
e56c1612e4
commit
f1410bfdcd
@@ -24,6 +24,7 @@ import static org.junit.Assert.assertTrue;
|
||||
import com.uber.hoodie.common.HoodieCleanStat;
|
||||
import com.uber.hoodie.common.HoodieClientTestUtils;
|
||||
import com.uber.hoodie.common.HoodieTestDataGenerator;
|
||||
import com.uber.hoodie.common.TestRawTripPayload;
|
||||
import com.uber.hoodie.common.model.HoodiePartitionMetadata;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.common.model.HoodieTableType;
|
||||
@@ -144,6 +145,7 @@ public class TestHoodieClientBase implements Serializable {
|
||||
return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
|
||||
.withParallelism(2, 2)
|
||||
.withBulkInsertParallelism(2).withFinalizeWriteParallelism(2)
|
||||
.withWriteStatusClass(TestRawTripPayload.MetadataMergeWriteStatus.class)
|
||||
.withConsistencyCheckEnabled(true)
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).build())
|
||||
.withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import java.io.IOException;
|
||||
import org.junit.Test;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
public class TestWriteStatus {
|
||||
@Test
|
||||
public void testFailureFraction() throws IOException {
|
||||
WriteStatus status = new WriteStatus(true, 0.1);
|
||||
Throwable t = new Exception("some error in writing");
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
status.markFailure(Mockito.mock(HoodieRecord.class), t, null);
|
||||
}
|
||||
assertTrue(status.getFailedRecords().size() > 0);
|
||||
assertTrue(status.getFailedRecords().size() < 150); //150 instead of 100, to prevent flaky test
|
||||
assertTrue(status.hasErrors());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSuccessRecordTracking() {
|
||||
WriteStatus status = new WriteStatus(false, 1.0);
|
||||
Throwable t = new Exception("some error in writing");
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
status.markSuccess(Mockito.mock(HoodieRecord.class), null);
|
||||
status.markFailure(Mockito.mock(HoodieRecord.class), t, null);
|
||||
}
|
||||
assertEquals(1000, status.getFailedRecords().size());
|
||||
assertTrue(status.hasErrors());
|
||||
assertTrue(status.getWrittenRecords().isEmpty());
|
||||
assertEquals(2000, status.getTotalRecords());
|
||||
}
|
||||
}
|
||||
@@ -144,6 +144,10 @@ public class TestRawTripPayload implements HoodieRecordPayload<TestRawTripPayloa
|
||||
|
||||
private Map<String, String> mergedMetadataMap = new HashMap<>();
|
||||
|
||||
public MetadataMergeWriteStatus(Boolean trackSuccessRecords, Double failureFraction) {
|
||||
super(trackSuccessRecords, failureFraction);
|
||||
}
|
||||
|
||||
public static Map<String, String> mergeMetadataForWriteStatuses(List<WriteStatus> writeStatuses) {
|
||||
Map<String, String> allWriteStatusMergedMetadataMap = new HashMap<>();
|
||||
for (WriteStatus writeStatus : writeStatuses) {
|
||||
|
||||
@@ -337,7 +337,7 @@ public class TestHbaseIndex {
|
||||
}
|
||||
|
||||
private WriteStatus getSampleWriteStatus(final int numInserts, final int numUpdateWrites) {
|
||||
final WriteStatus writeStatus = new WriteStatus();
|
||||
final WriteStatus writeStatus = new WriteStatus(false, 0.1);
|
||||
HoodieWriteStat hoodieWriteStat = new HoodieWriteStat();
|
||||
hoodieWriteStat.setNumInserts(numInserts);
|
||||
hoodieWriteStat.setNumUpdateWrites(numUpdateWrites);
|
||||
|
||||
@@ -342,10 +342,10 @@ public class TestCopyOnWriteTable {
|
||||
assertEquals(2, returnedStatuses.size());
|
||||
assertEquals("2016/01/31", returnedStatuses.get(0).getPartitionPath());
|
||||
assertEquals(0, returnedStatuses.get(0).getFailedRecords().size());
|
||||
assertEquals(10, returnedStatuses.get(0).getWrittenRecords().size());
|
||||
assertEquals(10, returnedStatuses.get(0).getTotalRecords());
|
||||
assertEquals("2016/02/01", returnedStatuses.get(1).getPartitionPath());
|
||||
assertEquals(0, returnedStatuses.get(0).getFailedRecords().size());
|
||||
assertEquals(1, returnedStatuses.get(1).getWrittenRecords().size());
|
||||
assertEquals(1, returnedStatuses.get(1).getTotalRecords());
|
||||
|
||||
// Case 2:
|
||||
// 1 record for partition 1, 5 record for partition 2, 1 records for partition 3.
|
||||
@@ -358,13 +358,13 @@ public class TestCopyOnWriteTable {
|
||||
|
||||
assertEquals(3, returnedStatuses.size());
|
||||
assertEquals("2016/01/31", returnedStatuses.get(0).getPartitionPath());
|
||||
assertEquals(1, returnedStatuses.get(0).getWrittenRecords().size());
|
||||
assertEquals(1, returnedStatuses.get(0).getTotalRecords());
|
||||
|
||||
assertEquals("2016/02/01", returnedStatuses.get(1).getPartitionPath());
|
||||
assertEquals(5, returnedStatuses.get(1).getWrittenRecords().size());
|
||||
assertEquals(5, returnedStatuses.get(1).getTotalRecords());
|
||||
|
||||
assertEquals("2016/02/02", returnedStatuses.get(2).getPartitionPath());
|
||||
assertEquals(1, returnedStatuses.get(2).getWrittenRecords().size());
|
||||
assertEquals(1, returnedStatuses.get(2).getTotalRecords());
|
||||
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user