[HUDI-1104] Adding support for UserDefinedPartitioners and SortModes to BulkInsert with Rows (#3149)
This commit is contained in:
committed by
GitHub
parent
55ecbc662e
commit
ea9e5d0e8b
@@ -30,7 +30,9 @@ import org.apache.spark.sql.Row;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
@@ -61,13 +63,40 @@ public class HoodieBulkInsertInternalWriterTestBase extends HoodieClientTestHarn
|
||||
}
|
||||
|
||||
protected void assertWriteStatuses(List<HoodieInternalWriteStatus> writeStatuses, int batches, int size,
|
||||
Option<List<String>> fileAbsPaths, Option<List<String>> fileNames) {
|
||||
assertEquals(batches, writeStatuses.size());
|
||||
Option<List<String>> fileAbsPaths, Option<List<String>> fileNames) {
|
||||
assertWriteStatuses(writeStatuses, batches, size, false, fileAbsPaths, fileNames);
|
||||
}
|
||||
|
||||
protected void assertWriteStatuses(List<HoodieInternalWriteStatus> writeStatuses, int batches, int size, boolean areRecordsSorted,
|
||||
Option<List<String>> fileAbsPaths, Option<List<String>> fileNames) {
|
||||
if (areRecordsSorted) {
|
||||
assertEquals(batches, writeStatuses.size());
|
||||
} else {
|
||||
assertEquals(Math.min(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS.length, batches), writeStatuses.size());
|
||||
}
|
||||
|
||||
Map<String, Long> sizeMap = new HashMap<>();
|
||||
if (!areRecordsSorted) {
|
||||
// <size> no of records are written per batch. Every 4th batch goes into same writeStatus. So, populating the size expected
|
||||
// per write status
|
||||
for (int i = 0; i < batches; i++) {
|
||||
String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3];
|
||||
if (!sizeMap.containsKey(partitionPath)) {
|
||||
sizeMap.put(partitionPath, 0L);
|
||||
}
|
||||
sizeMap.put(partitionPath, sizeMap.get(partitionPath) + size);
|
||||
}
|
||||
}
|
||||
|
||||
int counter = 0;
|
||||
for (HoodieInternalWriteStatus writeStatus : writeStatuses) {
|
||||
// verify write status
|
||||
assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3], writeStatus.getPartitionPath());
|
||||
assertEquals(writeStatus.getTotalRecords(), size);
|
||||
if (areRecordsSorted) {
|
||||
assertEquals(writeStatus.getTotalRecords(), size);
|
||||
} else {
|
||||
assertEquals(writeStatus.getTotalRecords(), sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]));
|
||||
}
|
||||
assertNull(writeStatus.getGlobalError());
|
||||
assertEquals(writeStatus.getFailedRowsSize(), 0);
|
||||
assertEquals(writeStatus.getTotalErrorRecords(), 0);
|
||||
@@ -82,8 +111,13 @@ public class HoodieBulkInsertInternalWriterTestBase extends HoodieClientTestHarn
|
||||
.substring(writeStatus.getStat().getPath().lastIndexOf('/') + 1));
|
||||
}
|
||||
HoodieWriteStat writeStat = writeStatus.getStat();
|
||||
assertEquals(size, writeStat.getNumInserts());
|
||||
assertEquals(size, writeStat.getNumWrites());
|
||||
if (areRecordsSorted) {
|
||||
assertEquals(size, writeStat.getNumInserts());
|
||||
assertEquals(size, writeStat.getNumWrites());
|
||||
} else {
|
||||
assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumInserts());
|
||||
assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumWrites());
|
||||
}
|
||||
assertEquals(fileId, writeStat.getFileId());
|
||||
assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter++ % 3], writeStat.getPartitionPath());
|
||||
assertEquals(0, writeStat.getNumDeletes());
|
||||
|
||||
Reference in New Issue
Block a user