1
0

feat(SparkDataSource): add additional feature to drop later arriving dups

This commit is contained in:
jiale.tan
2018-10-04 17:56:51 -07:00
committed by vinoth chandar
parent 8485b9e263
commit 1628d044ac
5 changed files with 170 additions and 70 deletions

View File

@@ -196,6 +196,16 @@ public class HoodieTestDataGenerator {
return inserts;
}
public List<HoodieRecord> generateSameKeyInserts(String commitTime, List<HoodieRecord> origin) throws IOException {
List<HoodieRecord> copy = new ArrayList<>();
for (HoodieRecord r: origin) {
HoodieKey key = r.getKey();
HoodieRecord record = new HoodieRecord(key, generateRandomValue(key, commitTime));
copy.add(record);
}
return copy;
}
public List<HoodieRecord> generateDeletes(String commitTime, Integer n) throws IOException {
List<HoodieRecord> inserts = generateInserts(commitTime, n);
return generateDeletesFromExistingRecords(inserts);