[HUDI-684] Introduced abstraction for writing and reading different types of base file formats. (#1687)
Notable changes:
1. HoodieFileWriter and HoodieFileReader abstractions for writer/reader side of a base file format
2. HoodieDataBlock abstraction for creation specific data blocks for base file formats. (e.g. Parquet has HoodieAvroDataBlock)
3. All hardocded references to Parquet / Parquet based classes have been abstracted to call methods which accept a base file format
4. HiveSyncTool accepts the base file format as a CLI parameter
5. HoodieDeltaStreamer accepts the base file format as a CLI parameter
6. HoodieSparkSqlWriter accepts the base file format as a parameter
This commit is contained in:
@@ -270,7 +270,7 @@ public class DataSourceUtils {
|
||||
return dropDuplicates(jssc, incomingHoodieRecords, writeConfig);
|
||||
}
|
||||
|
||||
public static HiveSyncConfig buildHiveSyncConfig(TypedProperties props, String basePath) {
|
||||
public static HiveSyncConfig buildHiveSyncConfig(TypedProperties props, String basePath, String baseFileFormat) {
|
||||
checkRequiredProperties(props, Collections.singletonList(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY()));
|
||||
HiveSyncConfig hiveSyncConfig = new HiveSyncConfig();
|
||||
hiveSyncConfig.basePath = basePath;
|
||||
@@ -280,6 +280,7 @@ public class DataSourceUtils {
|
||||
hiveSyncConfig.databaseName = props.getString(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY(),
|
||||
DataSourceWriteOptions.DEFAULT_HIVE_DATABASE_OPT_VAL());
|
||||
hiveSyncConfig.tableName = props.getString(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY());
|
||||
hiveSyncConfig.baseFileFormat = baseFileFormat;
|
||||
hiveSyncConfig.hiveUser =
|
||||
props.getString(DataSourceWriteOptions.HIVE_USER_OPT_KEY(), DataSourceWriteOptions.DEFAULT_HIVE_USER_OPT_VAL());
|
||||
hiveSyncConfig.hivePass =
|
||||
|
||||
@@ -257,6 +257,7 @@ object DataSourceWriteOptions {
|
||||
val HIVE_SYNC_ENABLED_OPT_KEY = "hoodie.datasource.hive_sync.enable"
|
||||
val HIVE_DATABASE_OPT_KEY = "hoodie.datasource.hive_sync.database"
|
||||
val HIVE_TABLE_OPT_KEY = "hoodie.datasource.hive_sync.table"
|
||||
val HIVE_BASE_FILE_FORMAT_OPT_KEY = "hoodie.datasource.hive_sync.base_file_format"
|
||||
val HIVE_USER_OPT_KEY = "hoodie.datasource.hive_sync.username"
|
||||
val HIVE_PASS_OPT_KEY = "hoodie.datasource.hive_sync.password"
|
||||
val HIVE_URL_OPT_KEY = "hoodie.datasource.hive_sync.jdbcurl"
|
||||
@@ -270,6 +271,7 @@ object DataSourceWriteOptions {
|
||||
val DEFAULT_HIVE_SYNC_ENABLED_OPT_VAL = "false"
|
||||
val DEFAULT_HIVE_DATABASE_OPT_VAL = "default"
|
||||
val DEFAULT_HIVE_TABLE_OPT_VAL = "unknown"
|
||||
val DEFAULT_HIVE_BASE_FILE_FORMAT_OPT_VAL = "PARQUET"
|
||||
val DEFAULT_HIVE_USER_OPT_VAL = "hive"
|
||||
val DEFAULT_HIVE_PASS_OPT_VAL = "hive"
|
||||
val DEFAULT_HIVE_URL_OPT_VAL = "jdbc:hive2://localhost:10000"
|
||||
|
||||
@@ -212,6 +212,7 @@ private[hudi] object HoodieSparkSqlWriter {
|
||||
HIVE_SYNC_ENABLED_OPT_KEY -> DEFAULT_HIVE_SYNC_ENABLED_OPT_VAL,
|
||||
HIVE_DATABASE_OPT_KEY -> DEFAULT_HIVE_DATABASE_OPT_VAL,
|
||||
HIVE_TABLE_OPT_KEY -> DEFAULT_HIVE_TABLE_OPT_VAL,
|
||||
HIVE_BASE_FILE_FORMAT_OPT_KEY -> DEFAULT_HIVE_BASE_FILE_FORMAT_OPT_VAL,
|
||||
HIVE_USER_OPT_KEY -> DEFAULT_HIVE_USER_OPT_VAL,
|
||||
HIVE_PASS_OPT_KEY -> DEFAULT_HIVE_PASS_OPT_VAL,
|
||||
HIVE_URL_OPT_KEY -> DEFAULT_HIVE_URL_OPT_VAL,
|
||||
@@ -239,6 +240,7 @@ private[hudi] object HoodieSparkSqlWriter {
|
||||
private def buildSyncConfig(basePath: Path, parameters: Map[String, String]): HiveSyncConfig = {
|
||||
val hiveSyncConfig: HiveSyncConfig = new HiveSyncConfig()
|
||||
hiveSyncConfig.basePath = basePath.toString
|
||||
hiveSyncConfig.baseFileFormat = parameters(HIVE_BASE_FILE_FORMAT_OPT_KEY);
|
||||
hiveSyncConfig.usePreApacheInputFormat =
|
||||
parameters.get(HIVE_USE_PRE_APACHE_INPUT_FORMAT_OPT_KEY).exists(r => r.toBoolean)
|
||||
hiveSyncConfig.databaseName = parameters(HIVE_DATABASE_OPT_KEY)
|
||||
|
||||
Reference in New Issue
Block a user