1
0

Revamped Deltastreamer (#93)

* Add analytics to site

* Fix ugly favicon

* New & Improved HoodieDeltaStreamer

 - Can incrementally consume from HDFS or Kafka, with exactly-once semantics!
 - Supports Json/Avro data, Source can also do custom things
 - Source is totally pluggable, via reflection
 - Key generation is pluggable, currently added SimpleKeyGenerator
 - Schema provider is pluggable, currently Filebased schemas
 - Configurable field to break ties during preCombine
 - Finally, can also plugin the HoodieRecordPayload, to get other merge types than overwriting
 - Handles efficient avro serialization in Spark

 Pending :
 - Rewriting of HiveIncrPullSource
 - Hive sync via hoodie-hive
 - Cleanup & tests

* Minor fixes from master rebase

* Implementation of HiveIncrPullSource
 - Copies commit by commit from source to target

* Adding TimestampBasedKeyGenerator
 - Supports unix time & date strings
This commit is contained in:
vinoth chandar
2017-03-13 12:41:29 -07:00
committed by prazanna
parent c3257b9680
commit 69d3950a32
33 changed files with 1925 additions and 263 deletions

View File

@@ -64,6 +64,8 @@ import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
@@ -317,6 +319,16 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
* Commit changes performed at the given commitTime marker
*/
public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses) {
return commit(commitTime, writeStatuses, Optional.empty());
}
/**
* Commit changes performed at the given commitTime marker
*/
public boolean commit(String commitTime,
JavaRDD<WriteStatus> writeStatuses,
Optional<HashMap<String, String>> extraMetadata) {
logger.info("Comitting " + commitTime);
// Create a Hoodie table which encapsulated the commits and files visible
HoodieTable<T> table = HoodieTable
@@ -333,6 +345,10 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
for (Tuple2<String, HoodieWriteStat> stat : stats) {
metadata.addWriteStat(stat._1(), stat._2());
}
// add in extra metadata
if (extraMetadata.isPresent()) {
extraMetadata.get().forEach((k, v) -> metadata.addMetadata(k, v));
}
try {
String actionType = table.getCommitActionType();

View File

@@ -27,6 +27,7 @@ import javax.annotation.concurrent.Immutable;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
/**
@@ -230,6 +231,15 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
}
}
public Builder fromInputStream(InputStream inputStream) throws IOException {
try {
this.props.load(inputStream);
return this;
} finally {
inputStream.close();
}
}
public Builder withPath(String basePath) {
props.setProperty(BASE_PATH_PROP, basePath);