1
0

[HUDI-1104] Adding support for UserDefinedPartitioners and SortModes to BulkInsert with Rows (#3149)

This commit is contained in:
Sivabalan Narayanan
2021-07-07 11:15:25 -04:00
committed by GitHub
parent 55ecbc662e
commit ea9e5d0e8b
31 changed files with 618 additions and 82 deletions

View File

@@ -30,7 +30,9 @@ import org.apache.spark.sql.Row;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -61,13 +63,40 @@ public class HoodieBulkInsertInternalWriterTestBase extends HoodieClientTestHarn
}
protected void assertWriteStatuses(List<HoodieInternalWriteStatus> writeStatuses, int batches, int size,
Option<List<String>> fileAbsPaths, Option<List<String>> fileNames) {
assertEquals(batches, writeStatuses.size());
Option<List<String>> fileAbsPaths, Option<List<String>> fileNames) {
assertWriteStatuses(writeStatuses, batches, size, false, fileAbsPaths, fileNames);
}
protected void assertWriteStatuses(List<HoodieInternalWriteStatus> writeStatuses, int batches, int size, boolean areRecordsSorted,
Option<List<String>> fileAbsPaths, Option<List<String>> fileNames) {
if (areRecordsSorted) {
assertEquals(batches, writeStatuses.size());
} else {
assertEquals(Math.min(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS.length, batches), writeStatuses.size());
}
Map<String, Long> sizeMap = new HashMap<>();
if (!areRecordsSorted) {
// <size> no of records are written per batch. Every 4th batch goes into same writeStatus. So, populating the size expected
// per write status
for (int i = 0; i < batches; i++) {
String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3];
if (!sizeMap.containsKey(partitionPath)) {
sizeMap.put(partitionPath, 0L);
}
sizeMap.put(partitionPath, sizeMap.get(partitionPath) + size);
}
}
int counter = 0;
for (HoodieInternalWriteStatus writeStatus : writeStatuses) {
// verify write status
assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3], writeStatus.getPartitionPath());
assertEquals(writeStatus.getTotalRecords(), size);
if (areRecordsSorted) {
assertEquals(writeStatus.getTotalRecords(), size);
} else {
assertEquals(writeStatus.getTotalRecords(), sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]));
}
assertNull(writeStatus.getGlobalError());
assertEquals(writeStatus.getFailedRowsSize(), 0);
assertEquals(writeStatus.getTotalErrorRecords(), 0);
@@ -82,8 +111,13 @@ public class HoodieBulkInsertInternalWriterTestBase extends HoodieClientTestHarn
.substring(writeStatus.getStat().getPath().lastIndexOf('/') + 1));
}
HoodieWriteStat writeStat = writeStatus.getStat();
assertEquals(size, writeStat.getNumInserts());
assertEquals(size, writeStat.getNumWrites());
if (areRecordsSorted) {
assertEquals(size, writeStat.getNumInserts());
assertEquals(size, writeStat.getNumWrites());
} else {
assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumInserts());
assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumWrites());
}
assertEquals(fileId, writeStat.getFileId());
assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter++ % 3], writeStat.getPartitionPath());
assertEquals(0, writeStat.getNumDeletes());