1
0

feat(SparkDataSource): add additional feature to drop later arriving dups

This commit is contained in:
jiale.tan
2018-10-04 17:56:51 -07:00
committed by vinoth chandar
parent 8485b9e263
commit 1628d044ac
5 changed files with 170 additions and 70 deletions

View File

@@ -158,4 +158,15 @@ public class DataSourceUtils {
return incomingHoodieRecords;
}
}
@SuppressWarnings("unchecked")
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc,
JavaRDD<HoodieRecord> incomingHoodieRecords,
Map<String, String> parameters) throws Exception {
HoodieWriteConfig writeConfig = HoodieWriteConfig
.newBuilder()
.withPath(parameters.get("path"))
.withProps(parameters).build();
return dropDuplicates(jssc, incomingHoodieRecords, writeConfig);
}
}