1
0

[HUDI-1398] Align insert file size for reducing IO (#2256)

* [HUDI-1398] Align insert file size for reducing IO

Co-authored-by: zhang wen <wen.zhang@dmall.com>
This commit is contained in:
steven zhang
2020-12-29 11:52:35 +08:00
committed by GitHub
parent 0ecdec348e
commit 4c17528de0
2 changed files with 22 additions and 3 deletions

View File

@@ -218,7 +218,11 @@ public class UpsertPartitioner<T extends HoodieRecordPayload<T>> extends Partiti
+ ", totalInsertBuckets => " + insertBuckets + ", recordsPerBucket => " + insertRecordsPerBucket);
for (int b = 0; b < insertBuckets; b++) {
bucketNumbers.add(totalBuckets);
recordsPerBucket.add(totalUnassignedInserts / insertBuckets);
if (b < insertBuckets - 1) {
recordsPerBucket.add(insertRecordsPerBucket);
} else {
recordsPerBucket.add(totalUnassignedInserts - (insertBuckets - 1) * insertRecordsPerBucket);
}
BucketInfo bucketInfo = new BucketInfo();
bucketInfo.bucketType = BucketType.INSERT;
bucketInfo.partitionPath = partitionPath;

View File

@@ -185,6 +185,21 @@ public class TestUpsertPartitioner extends HoodieClientTestBase {
assertEquals(2, insertBuckets.size(), "Total of 2 insert buckets");
}
@Test
public void testUpsertPartitionerWithRecordsPerBucket() throws Exception {
final String testPartitionPath = "2016/09/26";
// Inserts + Updates... Check all updates go together & inserts subsplit
UpsertPartitioner partitioner = getUpsertPartitioner(0, 250, 100, 1024, testPartitionPath, false);
List<InsertBucketCumulativeWeightPair> insertBuckets = partitioner.getInsertBuckets(testPartitionPath);
int insertSplitSize = partitioner.config.getCopyOnWriteInsertSplitSize();
int remainedInsertSize = 250 - 2 * insertSplitSize;
// will assigned 3 insertBuckets. 100, 100, 50 each
assertEquals(3, insertBuckets.size(), "Total of 3 insert buckets");
assertEquals(0.4, insertBuckets.get(0).getLeft().weight, "insert " + insertSplitSize + " records");
assertEquals(0.4, insertBuckets.get(1).getLeft().weight, "insert " + insertSplitSize + " records");
assertEquals(0.2, insertBuckets.get(2).getLeft().weight, "insert " + remainedInsertSize + " records");
}
@Test
public void testPartitionWeight() throws Exception {
final String testPartitionPath = "2016/09/26";
@@ -286,8 +301,8 @@ public class TestUpsertPartitioner extends HoodieClientTestBase {
"Bucket 3 is INSERT");
assertEquals(4, insertBuckets.size(), "Total of 4 insert buckets");
weights = new Double[] { 0.08, 0.31, 0.31, 0.31};
cumulativeWeights = new Double[] { 0.08, 0.39, 0.69, 1.0};
weights = new Double[] { 0.08, 0.42, 0.42, 0.08};
cumulativeWeights = new Double[] { 0.08, 0.5, 0.92, 1.0};
assertInsertBuckets(weights, cumulativeWeights, insertBuckets);
}