Revamped Deltastreamer (#93)
* Add analytics to site * Fix ugly favicon * New & Improved HoodieDeltaStreamer - Can incrementally consume from HDFS or Kafka, with exactly-once semantics! - Supports Json/Avro data, Source can also do custom things - Source is totally pluggable, via reflection - Key generation is pluggable, currently added SimpleKeyGenerator - Schema provider is pluggable, currently Filebased schemas - Configurable field to break ties during preCombine - Finally, can also plugin the HoodieRecordPayload, to get other merge types than overwriting - Handles efficient avro serialization in Spark Pending : - Rewriting of HiveIncrPullSource - Hive sync via hoodie-hive - Cleanup & tests * Minor fixes from master rebase * Implementation of HiveIncrPullSource - Copies commit by commit from source to target * Adding TimestampBasedKeyGenerator - Supports unix time & date strings
This commit is contained in:
@@ -17,6 +17,7 @@
|
||||
package com.uber.hoodie.avro;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
@@ -29,6 +29,7 @@ import java.io.IOException;
|
||||
/**
|
||||
* This is a payload to wrap a existing Hoodie Avro Record.
|
||||
* Useful to create a HoodieRecord over existing GenericRecords in a hoodie datasets (useful in compactions)
|
||||
*
|
||||
*/
|
||||
public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload> {
|
||||
private final Optional<GenericRecord> record;
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.codehaus.jackson.annotate.JsonAutoDetect;
|
||||
@@ -33,21 +35,29 @@ import java.util.Map;
|
||||
/**
|
||||
* All the metadata that gets stored along with a commit.
|
||||
*/
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class HoodieCommitMetadata implements Serializable {
|
||||
private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
|
||||
private HashMap<String, List<HoodieWriteStat>> partitionToWriteStats;
|
||||
|
||||
private HashMap<String, String> extraMetadataMap;
|
||||
|
||||
public HoodieCommitMetadata() {
|
||||
extraMetadataMap = new HashMap<>();
|
||||
partitionToWriteStats = new HashMap<>();
|
||||
}
|
||||
|
||||
public void addWriteStat(String partitionPath, HoodieWriteStat stat) {
|
||||
if (!partitionToWriteStats.containsKey(partitionPath)) {
|
||||
partitionToWriteStats.put(partitionPath, new ArrayList<HoodieWriteStat>());
|
||||
partitionToWriteStats.put(partitionPath, new ArrayList<>());
|
||||
}
|
||||
partitionToWriteStats.get(partitionPath).add(stat);
|
||||
}
|
||||
|
||||
public void addMetadata(String metaKey, String value) {
|
||||
extraMetadataMap.put(metaKey, value);
|
||||
}
|
||||
|
||||
public List<HoodieWriteStat> getWriteStats(String partitionPath) {
|
||||
return partitionToWriteStats.get(partitionPath);
|
||||
}
|
||||
@@ -56,6 +66,10 @@ public class HoodieCommitMetadata implements Serializable {
|
||||
return partitionToWriteStats;
|
||||
}
|
||||
|
||||
public String getMetadata(String metaKey) {
|
||||
return extraMetadataMap.get(metaKey);
|
||||
}
|
||||
|
||||
public HashMap<String, String> getFileIdAndFullPaths() {
|
||||
HashMap<String, String> filePaths = new HashMap<>();
|
||||
// list all partitions paths
|
||||
|
||||
Reference in New Issue
Block a user