1
0

[HUDI-724] Parallelize getSmallFiles for partitions (#1421)

Co-authored-by: Feichi Feng <feicfeng@amazon.com>
This commit is contained in:
ffcchi
2020-03-30 01:14:38 -06:00
committed by GitHub
parent fa36082554
commit 1f5b0c77d6
6 changed files with 37 additions and 24 deletions

View File

@@ -415,7 +415,7 @@ public class TestCopyOnWriteTable extends HoodieClientTestHarness {
records.addAll(updateRecords);
WorkloadProfile profile = new WorkloadProfile(jsc.parallelize(records));
HoodieCopyOnWriteTable.UpsertPartitioner partitioner =
(HoodieCopyOnWriteTable.UpsertPartitioner) table.getUpsertPartitioner(profile);
(HoodieCopyOnWriteTable.UpsertPartitioner) table.getUpsertPartitioner(profile, jsc);
assertEquals("Update record should have gone to the 1 update partition", 0, partitioner.getPartition(
new Tuple2<>(updateRecords.get(0).getKey(), Option.ofNullable(updateRecords.get(0).getCurrentLocation()))));
return partitioner;

View File

@@ -1272,7 +1272,7 @@ public class TestMergeOnReadTable extends HoodieClientTestHarness {
JavaRDD<HoodieRecord> deleteRDD = jsc.parallelize(fewRecordsForDelete, 1);
// initialize partitioner
hoodieTable.getUpsertPartitioner(new WorkloadProfile(deleteRDD));
hoodieTable.getUpsertPartitioner(new WorkloadProfile(deleteRDD), jsc);
final List<List<WriteStatus>> deleteStatus = jsc.parallelize(Arrays.asList(1)).map(x -> {
return hoodieTable.handleUpdate(newDeleteTime, partitionPath, fileId, fewRecordsForDelete.iterator());
}).map(x -> (List<WriteStatus>) HoodieClientTestUtils.collectStatuses(x)).collect();