feat(SparkDataSource): add additional feature to drop later arriving dups
This commit is contained in:
committed by
vinoth chandar
parent
8485b9e263
commit
1628d044ac
@@ -158,4 +158,15 @@ public class DataSourceUtils {
|
||||
return incomingHoodieRecords;
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc,
|
||||
JavaRDD<HoodieRecord> incomingHoodieRecords,
|
||||
Map<String, String> parameters) throws Exception {
|
||||
HoodieWriteConfig writeConfig = HoodieWriteConfig
|
||||
.newBuilder()
|
||||
.withPath(parameters.get("path"))
|
||||
.withProps(parameters).build();
|
||||
return dropDuplicates(jssc, incomingHoodieRecords, writeConfig);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user