[HUDI-684] Introduced abstraction for writing and reading different types of base file formats. (#1687)
Notable changes:
1. HoodieFileWriter and HoodieFileReader abstractions for writer/reader side of a base file format
2. HoodieDataBlock abstraction for creation specific data blocks for base file formats. (e.g. Parquet has HoodieAvroDataBlock)
3. All hardocded references to Parquet / Parquet based classes have been abstracted to call methods which accept a base file format
4. HiveSyncTool accepts the base file format as a CLI parameter
5. HoodieDeltaStreamer accepts the base file format as a CLI parameter
6. HoodieSparkSqlWriter accepts the base file format as a parameter
This commit is contained in:
@@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hudi.integ;
|
||||
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
|
||||
@@ -67,6 +68,8 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
private static final String HIVE_INCREMENTAL_MOR_RO_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-incremental-mor-ro.commands";
|
||||
private static final String HIVE_INCREMENTAL_MOR_RT_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-incremental-mor-rt.commands";
|
||||
|
||||
private static HoodieFileFormat baseFileFormat;
|
||||
|
||||
private static String HIVE_SYNC_CMD_FMT =
|
||||
" --enable-hive-sync --hoodie-conf hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000 "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.username=hive "
|
||||
@@ -76,7 +79,9 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.table=%s";
|
||||
|
||||
@Test
|
||||
public void testDemo() throws Exception {
|
||||
public void testParquetDemo() throws Exception {
|
||||
baseFileFormat = HoodieFileFormat.PARQUET;
|
||||
|
||||
setupDemo();
|
||||
|
||||
// batch 1
|
||||
@@ -122,6 +127,7 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
List<String> cmds = CollectionUtils.createImmutableList(
|
||||
"spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
|
||||
+ " --table-type COPY_ON_WRITE "
|
||||
+ " --base-file-format " + baseFileFormat.toString()
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties"
|
||||
@@ -130,12 +136,14 @@ public class ITTestHoodieDemo extends ITTestBase {
|
||||
+ " --database default"
|
||||
+ " --table " + COW_TABLE_NAME
|
||||
+ " --base-path " + COW_BASE_PATH
|
||||
+ " --base-file-format " + baseFileFormat.toString()
|
||||
+ " --user hive"
|
||||
+ " --pass hive"
|
||||
+ " --jdbc-url jdbc:hive2://hiveserver:10000"
|
||||
+ " --partitioned-by dt",
|
||||
("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
|
||||
+ " --table-type MERGE_ON_READ "
|
||||
+ " --base-file-format " + baseFileFormat.toString()
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties"
|
||||
|
||||
Reference in New Issue
Block a user