1
0

Reworking the deltastreamer tool

- Standardize version of jackson
 - DFSPropertiesConfiguration replaces usage of commons PropertiesConfiguration
 - Remove dependency on ConstructorUtils
 - Throw error if ordering value is not present, during key generation
 - Switch to shade plugin for hoodie-utilities
 - Added support for consumption for Confluent avro kafka serdes
 - Support for Confluent schema registry
 - KafkaSource now deals with skews nicely, by doing round robin allocation of source limit across partitions
 - Added support for BULK_INSERT operations as well
 - Pass in the payload class config properly into HoodieWriteClient
 - Fix documentation based on new usage
 - Adding tests on deltastreamer, sources and all new util classes.
This commit is contained in:
Vinoth Chandar
2018-08-04 03:35:30 -07:00
committed by vinoth chandar
parent fb95dbdedb
commit d58ddbd999
49 changed files with 1919 additions and 754 deletions

View File

@@ -41,7 +41,6 @@ public class HDFSParquetImportCommand implements CommandMarker {
@CliOption(key = "upsert", mandatory = false, unspecifiedDefaultValue = "false",
help = "Uses upsert API instead of the default insert API of WriteClient") boolean useUpsert,
@CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset") final String srcPath,
@CliOption(key = "srcType", mandatory = true, help = "Source type for the input dataset") final String srcType,
@CliOption(key = "targetPath", mandatory = true, help = "Base path for the target hoodie dataset") final String
targetPath,
@CliOption(key = "tableName", mandatory = true, help = "Table name") final String tableName,
@@ -57,7 +56,7 @@ public class HDFSParquetImportCommand implements CommandMarker {
@CliOption(key = "sparkMemory", mandatory = true, help = "Spark executor memory") final String sparkMemory,
@CliOption(key = "retry", mandatory = true, help = "Number of retries") final String retry) throws Exception {
validate(format, srcType);
(new HDFSParquetImporter.FormatValidator()).validate("format", format);
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
@@ -81,9 +80,4 @@ public class HDFSParquetImportCommand implements CommandMarker {
}
return "Dataset imported to hoodie format";
}
private void validate(String format, String srcType) {
(new HDFSParquetImporter.FormatValidator()).validate("format", format);
(new HDFSParquetImporter.SourceTypeValidator()).validate("srcType", srcType);
}
}