|
|
|
|
@@ -25,6 +25,7 @@ import org.apache.hudi.exception.HoodieException;
|
|
|
|
|
import org.apache.hudi.utilities.deltastreamer.HoodieMultiTableDeltaStreamer;
|
|
|
|
|
import org.apache.hudi.utilities.deltastreamer.TableExecutionContext;
|
|
|
|
|
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
|
|
|
|
|
import org.apache.hudi.utilities.schema.SchemaRegistryProvider;
|
|
|
|
|
import org.apache.hudi.utilities.sources.JsonKafkaSource;
|
|
|
|
|
import org.apache.hudi.utilities.sources.ParquetDFSSource;
|
|
|
|
|
import org.apache.hudi.utilities.sources.TestDataSource;
|
|
|
|
|
@@ -49,12 +50,13 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
|
|
|
|
|
|
|
|
|
|
static class TestHelpers {
|
|
|
|
|
|
|
|
|
|
static HoodieMultiTableDeltaStreamer.Config getConfig(String fileName, String configFolder, String sourceClassName, boolean enableHiveSync, boolean enableMetaSync) {
|
|
|
|
|
return getConfig(fileName, configFolder, sourceClassName, enableHiveSync, enableMetaSync, true, "multi_table_dataset");
|
|
|
|
|
static HoodieMultiTableDeltaStreamer.Config getConfig(String fileName, String configFolder, String sourceClassName, boolean enableHiveSync, boolean enableMetaSync,
|
|
|
|
|
Class<?> clazz) {
|
|
|
|
|
return getConfig(fileName, configFolder, sourceClassName, enableHiveSync, enableMetaSync, true, "multi_table_dataset", clazz);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static HoodieMultiTableDeltaStreamer.Config getConfig(String fileName, String configFolder, String sourceClassName, boolean enableHiveSync, boolean enableMetaSync,
|
|
|
|
|
boolean setSchemaProvider, String basePathPrefix) {
|
|
|
|
|
boolean setSchemaProvider, String basePathPrefix, Class<?> clazz) {
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config config = new HoodieMultiTableDeltaStreamer.Config();
|
|
|
|
|
config.configFolder = configFolder;
|
|
|
|
|
config.targetTableName = "dummy_table";
|
|
|
|
|
@@ -64,7 +66,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
|
|
|
|
|
config.sourceClassName = sourceClassName;
|
|
|
|
|
config.sourceOrderingField = "timestamp";
|
|
|
|
|
if (setSchemaProvider) {
|
|
|
|
|
config.schemaProviderClassName = FilebasedSchemaProvider.class.getName();
|
|
|
|
|
config.schemaProviderClassName = clazz != null ? clazz.getName() : FilebasedSchemaProvider.class.getName();
|
|
|
|
|
}
|
|
|
|
|
config.enableHiveSync = enableHiveSync;
|
|
|
|
|
config.enableMetaSync = enableMetaSync;
|
|
|
|
|
@@ -74,7 +76,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testInvalidHiveSyncProps() throws IOException {
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_INVALID_HIVE_SYNC_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), true, true);
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_INVALID_HIVE_SYNC_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), true, true, null);
|
|
|
|
|
Exception e = assertThrows(HoodieException.class, () -> {
|
|
|
|
|
new HoodieMultiTableDeltaStreamer(cfg, jsc);
|
|
|
|
|
}, "Should fail when hive sync table not provided with enableHiveSync flag");
|
|
|
|
|
@@ -84,7 +86,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testInvalidPropsFilePath() throws IOException {
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_INVALID_FILE, dfsBasePath + "/config", TestDataSource.class.getName(), true, true);
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_INVALID_FILE, dfsBasePath + "/config", TestDataSource.class.getName(), true, true, null);
|
|
|
|
|
Exception e = assertThrows(IllegalArgumentException.class, () -> {
|
|
|
|
|
new HoodieMultiTableDeltaStreamer(cfg, jsc);
|
|
|
|
|
}, "Should fail when invalid props file is provided");
|
|
|
|
|
@@ -94,7 +96,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testInvalidTableConfigFilePath() throws IOException {
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_INVALID_TABLE_CONFIG_FILE, dfsBasePath + "/config", TestDataSource.class.getName(), true, true);
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_INVALID_TABLE_CONFIG_FILE, dfsBasePath + "/config", TestDataSource.class.getName(), true, true, null);
|
|
|
|
|
Exception e = assertThrows(IllegalArgumentException.class, () -> {
|
|
|
|
|
new HoodieMultiTableDeltaStreamer(cfg, jsc);
|
|
|
|
|
}, "Should fail when invalid table config props file path is provided");
|
|
|
|
|
@@ -104,7 +106,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testCustomConfigProps() throws IOException {
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), false, false);
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), false, false, SchemaRegistryProvider.class);
|
|
|
|
|
HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
|
|
|
|
|
TableExecutionContext executionContext = streamer.getTableExecutionContexts().get(1);
|
|
|
|
|
assertEquals(2, streamer.getTableExecutionContexts().size());
|
|
|
|
|
@@ -114,13 +116,16 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
|
|
|
|
|
assertEquals("_row_key", executionContext.getProperties().getString(DataSourceWriteOptions.RECORDKEY_FIELD().key()));
|
|
|
|
|
assertEquals(TestHoodieDeltaStreamer.TestGenerator.class.getName(), executionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key()));
|
|
|
|
|
assertEquals("uber_hive_dummy_table", executionContext.getProperties().getString(HoodieMultiTableDeltaStreamer.Constants.HIVE_SYNC_TABLE_PROP));
|
|
|
|
|
assertEquals("http://localhost:8081/subjects/random-value/versions/latest", executionContext.getProperties().getString(SchemaRegistryProvider.Config.SRC_SCHEMA_REGISTRY_URL_PROP));
|
|
|
|
|
assertEquals("http://localhost:8081/subjects/topic2-value/versions/latest",
|
|
|
|
|
streamer.getTableExecutionContexts().get(0).getProperties().getString(SchemaRegistryProvider.Config.SRC_SCHEMA_REGISTRY_URL_PROP));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
@Disabled
|
|
|
|
|
public void testInvalidIngestionProps() {
|
|
|
|
|
Exception e = assertThrows(Exception.class, () -> {
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), true, true);
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), true, true, null);
|
|
|
|
|
new HoodieMultiTableDeltaStreamer(cfg, jsc);
|
|
|
|
|
}, "Creation of execution object should fail without kafka topic");
|
|
|
|
|
log.debug("Creation of execution object failed with error: " + e.getMessage(), e);
|
|
|
|
|
@@ -139,7 +144,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
|
|
|
|
|
testUtils.sendMessages(topicName1, Helpers.jsonifyRecords(dataGenerator.generateInsertsAsPerSchema("000", 5, HoodieTestDataGenerator.TRIP_SCHEMA)));
|
|
|
|
|
testUtils.sendMessages(topicName2, Helpers.jsonifyRecords(dataGenerator.generateInsertsAsPerSchema("000", 10, HoodieTestDataGenerator.SHORT_TRIP_SCHEMA)));
|
|
|
|
|
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", JsonKafkaSource.class.getName(), false, false);
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", JsonKafkaSource.class.getName(), false, false, null);
|
|
|
|
|
HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
|
|
|
|
|
List<TableExecutionContext> executionContexts = streamer.getTableExecutionContexts();
|
|
|
|
|
TypedProperties properties = executionContexts.get(1).getProperties();
|
|
|
|
|
@@ -189,7 +194,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
|
|
|
|
|
String parquetPropsFile = populateCommonPropsAndWriteToFile();
|
|
|
|
|
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(parquetPropsFile, dfsBasePath + "/config", ParquetDFSSource.class.getName(), false, false,
|
|
|
|
|
false, "multi_table_parquet");
|
|
|
|
|
false, "multi_table_parquet", null);
|
|
|
|
|
HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
|
|
|
|
|
|
|
|
|
|
List<TableExecutionContext> executionContexts = streamer.getTableExecutionContexts();
|
|
|
|
|
@@ -219,7 +224,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testTableLevelProperties() throws IOException {
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), false, false);
|
|
|
|
|
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), false, false, null);
|
|
|
|
|
HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
|
|
|
|
|
List<TableExecutionContext> tableExecutionContexts = streamer.getTableExecutionContexts();
|
|
|
|
|
tableExecutionContexts.forEach(tableExecutionContext -> {
|
|
|
|
|
|