Revamped Deltastreamer (#93)
* Add analytics to site * Fix ugly favicon * New & Improved HoodieDeltaStreamer - Can incrementally consume from HDFS or Kafka, with exactly-once semantics! - Supports Json/Avro data, Source can also do custom things - Source is totally pluggable, via reflection - Key generation is pluggable, currently added SimpleKeyGenerator - Schema provider is pluggable, currently Filebased schemas - Configurable field to break ties during preCombine - Finally, can also plugin the HoodieRecordPayload, to get other merge types than overwriting - Handles efficient avro serialization in Spark Pending : - Rewriting of HiveIncrPullSource - Hive sync via hoodie-hive - Cleanup & tests * Minor fixes from master rebase * Implementation of HiveIncrPullSource - Copies commit by commit from source to target * Adding TimestampBasedKeyGenerator - Supports unix time & date strings
This commit is contained in:
@@ -64,6 +64,8 @@ import java.nio.charset.StandardCharsets;
|
||||
import java.text.ParseException;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -317,6 +319,16 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
* Commit changes performed at the given commitTime marker
|
||||
*/
|
||||
public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses) {
|
||||
return commit(commitTime, writeStatuses, Optional.empty());
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit changes performed at the given commitTime marker
|
||||
*/
|
||||
public boolean commit(String commitTime,
|
||||
JavaRDD<WriteStatus> writeStatuses,
|
||||
Optional<HashMap<String, String>> extraMetadata) {
|
||||
|
||||
logger.info("Comitting " + commitTime);
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
HoodieTable<T> table = HoodieTable
|
||||
@@ -333,6 +345,10 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
for (Tuple2<String, HoodieWriteStat> stat : stats) {
|
||||
metadata.addWriteStat(stat._1(), stat._2());
|
||||
}
|
||||
// add in extra metadata
|
||||
if (extraMetadata.isPresent()) {
|
||||
extraMetadata.get().forEach((k, v) -> metadata.addMetadata(k, v));
|
||||
}
|
||||
|
||||
try {
|
||||
String actionType = table.getCommitActionType();
|
||||
|
||||
Reference in New Issue
Block a user