1
0

[HUDI-3689] Fix glob path and hive sync in deltastreamer tests (#5117)

* Remove glob pattern basePath from the deltastreamer tests.

* [HUDI-3689] Fix file scheme config

for CI failure in TestHoodieRealTimeRecordReader

Co-authored-by: Raymond Xu <2701446+xushiyan@users.noreply.github.com>
This commit is contained in:
Sagar Sumit
2022-03-24 15:48:35 +05:30
committed by GitHub
parent a1c42fcc07
commit fe2c3989e3
5 changed files with 130 additions and 120 deletions

View File

@@ -18,29 +18,6 @@
package org.apache.hudi.hadoop.realtime;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.common.config.HoodieCommonConfig;
@@ -68,6 +45,30 @@ import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.hadoop.RealtimeFileStatus;
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
@@ -78,6 +79,7 @@ import org.junit.jupiter.params.provider.MethodSource;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
@@ -106,9 +108,11 @@ public class TestHoodieRealtimeRecordReader {
@BeforeEach
public void setUp() {
hadoopConf = HoodieTestUtils.getDefaultHadoopConf();
hadoopConf.set("fs.defaultFS", "file:///");
hadoopConf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
baseJobConf = new JobConf(hadoopConf);
baseJobConf.set(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, String.valueOf(1024 * 1024));
fs = FSUtils.getFs(basePath.toString(), baseJobConf);
fs = FSUtils.getFs(basePath.toUri().toString(), baseJobConf);
}
@TempDir
@@ -810,13 +814,14 @@ public class TestHoodieRealtimeRecordReader {
public void testLogOnlyReader() throws Exception {
// initial commit
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema());
HoodieTestUtils.init(hadoopConf, basePath.toString(), HoodieTableType.MERGE_ON_READ);
URI baseUri = basePath.toUri();
HoodieTestUtils.init(hadoopConf, baseUri.toString(), HoodieTableType.MERGE_ON_READ);
String baseInstant = "100";
File partitionDir = InputFormatTestUtil.prepareNonPartitionedParquetTable(basePath, schema, 1, 100, baseInstant,
HoodieTableType.MERGE_ON_READ);
FileCreateUtils.createDeltaCommit(basePath.toString(), baseInstant);
// Add the paths
FileInputFormat.setInputPaths(baseJobConf, partitionDir.getPath());
FileInputFormat.setInputPaths(baseJobConf, partitionDir.toURI().toString());
FileSlice fileSlice = new FileSlice("default", baseInstant, "fileid1");
try {
@@ -836,7 +841,7 @@ public class TestHoodieRealtimeRecordReader {
fileSlice.addLogFile(new HoodieLogFile(writer.getLogFile().getPath(), size));
RealtimeFileStatus realtimeFileStatus = new RealtimeFileStatus(
new FileStatus(writer.getLogFile().getFileSize(), false, 1, 1, 0, writer.getLogFile().getPath()),
basePath.toString(),
baseUri.toString(),
fileSlice.getLogFiles().collect(Collectors.toList()),
false,
Option.empty());

View File

@@ -137,7 +137,7 @@ public class HoodieDeltaStreamerTestBase extends UtilitiesTestBase {
TypedProperties downstreamProps = new TypedProperties();
downstreamProps.setProperty("include", "base.properties");
downstreamProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
downstreamProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
downstreamProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
// Source schema is the target schema of upstream table
downstreamProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/target.avsc");
@@ -149,7 +149,7 @@ public class HoodieDeltaStreamerTestBase extends UtilitiesTestBase {
invalidProps.setProperty("include", "sql-transformer.properties");
invalidProps.setProperty("hoodie.datasource.write.keygenerator.class", "invalid");
invalidProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
invalidProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
invalidProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
invalidProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
invalidProps.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
UtilitiesTestBase.Helpers.savePropsToDFS(invalidProps, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_INVALID);

View File

@@ -43,6 +43,7 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieClusteringConfig;
@@ -103,6 +104,7 @@ import org.apache.spark.sql.types.StructField;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
@@ -580,32 +582,32 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
// Initial bulk insert
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.BULK_INSERT);
new HoodieDeltaStreamer(cfg, jsc).sync();
TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
// No new data => no commits.
cfg.sourceLimit = 0;
new HoodieDeltaStreamer(cfg, jsc).sync();
TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
// upsert() #1
cfg.sourceLimit = 2000;
cfg.operation = WriteOperationType.UPSERT;
new HoodieDeltaStreamer(cfg, jsc).sync();
TestHelpers.assertRecordCount(1950, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(1950, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1950, tableBasePath, sqlContext);
TestHelpers.assertDistanceCount(1950, tableBasePath, sqlContext);
TestHelpers.assertCommitMetadata("00001", tableBasePath, dfs, 2);
List<Row> counts = TestHelpers.countsPerCommit(tableBasePath + "/*/*.parquet", sqlContext);
List<Row> counts = TestHelpers.countsPerCommit(tableBasePath, sqlContext);
assertEquals(1950, counts.stream().mapToLong(entry -> entry.getLong(1)).sum());
// Perform bootstrap with tableBasePath as source
String bootstrapSourcePath = dfsBasePath + "/src_bootstrapped";
Dataset<Row> sourceDf = sqlContext.read()
.format("org.apache.hudi")
.load(tableBasePath + "/*/*.parquet");
.load(tableBasePath);
sourceDf.write().format("parquet").save(bootstrapSourcePath);
String newDatasetBasePath = dfsBasePath + "/test_dataset_bootstrapped";
@@ -615,11 +617,11 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
cfg.configs.add("hoodie.bootstrap.parallelism=5");
cfg.targetBasePath = newDatasetBasePath;
new HoodieDeltaStreamer(cfg, jsc).sync();
Dataset<Row> res = sqlContext.read().format("org.apache.hudi").load(newDatasetBasePath + "/*.parquet");
Dataset<Row> res = sqlContext.read().format("org.apache.hudi").load(newDatasetBasePath);
LOG.info("Schema :");
res.printSchema();
TestHelpers.assertRecordCount(1950, newDatasetBasePath + "/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1950, newDatasetBasePath, sqlContext);
res.registerTempTable("bootstrapped");
assertEquals(1950, sqlContext.sql("select distinct _hoodie_record_key from bootstrapped").count());
@@ -646,7 +648,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
cfg.configs.add(SparkAvroPostProcessor.Config.SPARK_AVRO_POST_PROCESSOR_PROP_ENABLE + "=false");
}
new HoodieDeltaStreamer(cfg, jsc).sync();
TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*", sqlContext);
TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
// Upsert data produced with Schema B, pass Schema B
@@ -660,12 +662,12 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
}
new HoodieDeltaStreamer(cfg, jsc).sync();
// out of 1000 new records, 500 are inserts, 450 are updates and 50 are deletes.
TestHelpers.assertRecordCount(1450, tableBasePath + "/*/*", sqlContext);
TestHelpers.assertRecordCount(1450, tableBasePath, sqlContext);
TestHelpers.assertCommitMetadata("00001", tableBasePath, dfs, 2);
List<Row> counts = TestHelpers.countsPerCommit(tableBasePath + "/*/*", sqlContext);
List<Row> counts = TestHelpers.countsPerCommit(tableBasePath, sqlContext);
assertEquals(1450, counts.stream().mapToLong(entry -> entry.getLong(1)).sum());
sqlContext.read().format("org.apache.hudi").load(tableBasePath + "/*/*").createOrReplaceTempView("tmp_trips");
sqlContext.read().format("org.apache.hudi").load(tableBasePath).createOrReplaceTempView("tmp_trips");
long recordCount =
sqlContext.sparkSession().sql("select * from tmp_trips where evoluted_optional_union_field is not NULL").count();
assertEquals(950, recordCount);
@@ -686,9 +688,9 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
cfg.configs.add(DataSourceWriteOptions.RECONCILE_SCHEMA().key() + "=true");
new HoodieDeltaStreamer(cfg, jsc).sync();
// again, 1000 new records, 500 are inserts, 450 are updates and 50 are deletes.
TestHelpers.assertRecordCount(1900, tableBasePath + "/*/*", sqlContext);
TestHelpers.assertRecordCount(1900, tableBasePath, sqlContext);
TestHelpers.assertCommitMetadata("00002", tableBasePath, dfs, 3);
counts = TestHelpers.countsPerCommit(tableBasePath + "/*/*", sqlContext);
counts = TestHelpers.countsPerCommit(tableBasePath, sqlContext);
assertEquals(1900, counts.stream().mapToLong(entry -> entry.getLong(1)).sum());
TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(HoodieTableMetaClient.builder().setBasePath(tableBasePath).setConf(dfs.getConf()).build());
@@ -736,8 +738,8 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
} else {
TestHelpers.assertAtleastNCompactionCommits(5, tableBasePath, dfs);
}
TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(totalRecords, tableBasePath, sqlContext);
TestHelpers.assertDistanceCount(totalRecords, tableBasePath, sqlContext);
return true;
});
}
@@ -1011,7 +1013,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
// There should be 4 commits, one of which should be a replace commit
TestHelpers.assertAtLeastNCommits(4, tableBasePath, dfs);
TestHelpers.assertAtLeastNReplaceCommits(1, tableBasePath, dfs);
TestHelpers.assertDistinctRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistinctRecordCount(totalRecords, tableBasePath, sqlContext);
}
/**
@@ -1039,7 +1041,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
// There should be 4 commits, one of which should be a replace commit
TestHelpers.assertAtLeastNCommits(4, tableBasePath, dfs);
TestHelpers.assertAtLeastNReplaceCommits(1, tableBasePath, dfs);
TestHelpers.assertDistinctRecordCount(1900, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistinctRecordCount(1900, tableBasePath, sqlContext);
}
@Test
@@ -1062,7 +1064,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
// There should be 4 commits, one of which should be a replace commit
TestHelpers.assertAtLeastNCommits(4, tableBasePath, dfs);
TestHelpers.assertAtLeastNReplaceCommits(1, tableBasePath, dfs);
TestHelpers.assertDistinctRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistinctRecordCount(totalRecords, tableBasePath, sqlContext);
}
@ParameterizedTest
@@ -1168,15 +1170,15 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
String tableBasePath = dfsBasePath + "/test_table2";
String downstreamTableBasePath = dfsBasePath + "/test_downstream_table2";
HiveSyncConfig hiveSyncConfig = getHiveSyncConfig(tableBasePath, "hive_trips");
// Initial bulk insert to ingest to first hudi table
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.BULK_INSERT,
Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, true);
// NOTE: We should not have need to set below config, 'datestr' should have assumed date partitioning
cfg.configs.add("hoodie.datasource.hive_sync.partition_fields=year,month,day");
new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCountWithExactValue(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
TestHelpers.assertDistanceCountWithExactValue(1000, tableBasePath, sqlContext);
String lastInstantForUpstreamTable = TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
// Now incrementally pull from the above hudi table and ingest to second table
@@ -1184,17 +1186,17 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
TestHelpers.makeConfigForHudiIncrSrc(tableBasePath, downstreamTableBasePath, WriteOperationType.BULK_INSERT,
true, null);
new HoodieDeltaStreamer(downstreamCfg, jsc, dfs, hiveServer.getHiveConf()).sync();
TestHelpers.assertRecordCount(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCountWithExactValue(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1000, downstreamTableBasePath, sqlContext);
TestHelpers.assertDistanceCount(1000, downstreamTableBasePath, sqlContext);
TestHelpers.assertDistanceCountWithExactValue(1000, downstreamTableBasePath, sqlContext);
TestHelpers.assertCommitMetadata(lastInstantForUpstreamTable, downstreamTableBasePath, dfs, 1);
// No new data => no commits for upstream table
cfg.sourceLimit = 0;
new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCountWithExactValue(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
TestHelpers.assertDistanceCountWithExactValue(1000, tableBasePath, sqlContext);
TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
// with no change in upstream table, no change in downstream too when pulled.
@@ -1202,20 +1204,20 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
TestHelpers.makeConfigForHudiIncrSrc(tableBasePath, downstreamTableBasePath,
WriteOperationType.BULK_INSERT, true, DummySchemaProvider.class.getName());
new HoodieDeltaStreamer(downstreamCfg1, jsc).sync();
TestHelpers.assertRecordCount(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCountWithExactValue(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1000, downstreamTableBasePath, sqlContext);
TestHelpers.assertDistanceCount(1000, downstreamTableBasePath, sqlContext);
TestHelpers.assertDistanceCountWithExactValue(1000, downstreamTableBasePath, sqlContext);
TestHelpers.assertCommitMetadata(lastInstantForUpstreamTable, downstreamTableBasePath, dfs, 1);
// upsert() #1 on upstream hudi table
cfg.sourceLimit = 2000;
cfg.operation = WriteOperationType.UPSERT;
new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
TestHelpers.assertRecordCount(1950, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(1950, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCountWithExactValue(1950, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1950, tableBasePath, sqlContext);
TestHelpers.assertDistanceCount(1950, tableBasePath, sqlContext);
TestHelpers.assertDistanceCountWithExactValue(1950, tableBasePath, sqlContext);
lastInstantForUpstreamTable = TestHelpers.assertCommitMetadata("00001", tableBasePath, dfs, 2);
List<Row> counts = TestHelpers.countsPerCommit(tableBasePath + "/*/*.parquet", sqlContext);
List<Row> counts = TestHelpers.countsPerCommit(tableBasePath, sqlContext);
assertEquals(1950, counts.stream().mapToLong(entry -> entry.getLong(1)).sum());
// Incrementally pull changes in upstream hudi table and apply to downstream table
@@ -1224,18 +1226,20 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
false, null);
downstreamCfg.sourceLimit = 2000;
new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
TestHelpers.assertRecordCount(2000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(2000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCountWithExactValue(2000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(2000, downstreamTableBasePath, sqlContext);
TestHelpers.assertDistanceCount(2000, downstreamTableBasePath, sqlContext);
TestHelpers.assertDistanceCountWithExactValue(2000, downstreamTableBasePath, sqlContext);
String finalInstant =
TestHelpers.assertCommitMetadata(lastInstantForUpstreamTable, downstreamTableBasePath, dfs, 2);
counts = TestHelpers.countsPerCommit(downstreamTableBasePath + "/*/*.parquet", sqlContext);
counts = TestHelpers.countsPerCommit(downstreamTableBasePath, sqlContext);
assertEquals(2000, counts.stream().mapToLong(entry -> entry.getLong(1)).sum());
// Test Hive integration
HiveSyncConfig hiveSyncConfig = getHiveSyncConfig(tableBasePath, "hive_trips");
hiveSyncConfig.partitionFields = CollectionUtils.createImmutableList("year", "month", "day");
HoodieHiveClient hiveClient = new HoodieHiveClient(hiveSyncConfig, hiveServer.getHiveConf(), dfs);
assertTrue(hiveClient.doesTableExist(hiveSyncConfig.tableName), "Table " + hiveSyncConfig.tableName + " should exist");
assertEquals(1, hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size(),
assertEquals(3, hiveClient.scanTablePartitions(hiveSyncConfig.tableName).size(),
"Table partitions should match the number of partitions we wrote");
assertEquals(lastInstantForUpstreamTable,
hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get(),
@@ -1259,14 +1263,14 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
public void testPayloadClassUpdate() throws Exception {
String dataSetBasePath = dfsBasePath + "/test_dataset_mor";
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(dataSetBasePath, WriteOperationType.BULK_INSERT,
Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, true,
Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, false,
true, false, null, "MERGE_ON_READ");
new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
TestHelpers.assertRecordCount(1000, dataSetBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1000, dataSetBasePath, sqlContext);
//now create one more deltaStreamer instance and update payload class
cfg = TestHelpers.makeConfig(dataSetBasePath, WriteOperationType.BULK_INSERT,
Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, true,
Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, false,
true, true, DummyAvroPayload.class.getName(), "MERGE_ON_READ");
new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf());
@@ -1285,14 +1289,14 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
public void testPayloadClassUpdateWithCOWTable() throws Exception {
String dataSetBasePath = dfsBasePath + "/test_dataset_cow";
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(dataSetBasePath, WriteOperationType.BULK_INSERT,
Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, true,
Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, false,
true, false, null, null);
new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
TestHelpers.assertRecordCount(1000, dataSetBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1000, dataSetBasePath, sqlContext);
//now create one more deltaStreamer instance and update payload class
cfg = TestHelpers.makeConfig(dataSetBasePath, WriteOperationType.BULK_INSERT,
Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, true,
Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, false,
true, true, DummyAvroPayload.class.getName(), null);
new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf());
@@ -1314,7 +1318,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
// Initial bulk insert
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.BULK_INSERT);
new HoodieDeltaStreamer(cfg, jsc).sync();
TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
// Generate the same 1000 records + 1000 new ones for upsert
@@ -1322,10 +1326,10 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
cfg.sourceLimit = 2000;
cfg.operation = WriteOperationType.INSERT;
new HoodieDeltaStreamer(cfg, jsc).sync();
TestHelpers.assertRecordCount(2000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(2000, tableBasePath, sqlContext);
TestHelpers.assertCommitMetadata("00001", tableBasePath, dfs, 2);
// 1000 records for commit 00000 & 1000 for commit 00001
List<Row> counts = TestHelpers.countsPerCommit(tableBasePath + "/*/*.parquet", sqlContext);
List<Row> counts = TestHelpers.countsPerCommit(tableBasePath, sqlContext);
assertEquals(1000, counts.get(0).getLong(1));
assertEquals(1000, counts.get(1).getLong(1));
@@ -1394,7 +1398,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
private void prepareParquetDFSSource(boolean useSchemaProvider, boolean hasTransformer, String sourceSchemaFile, String targetSchemaFile,
String propsFileName, String parquetSourceRoot, boolean addCommonProps) throws IOException {
prepareParquetDFSSource(useSchemaProvider, hasTransformer, sourceSchemaFile, targetSchemaFile, propsFileName, parquetSourceRoot, addCommonProps,
"not_there");
"partition_path");
}
private void prepareParquetDFSSource(boolean useSchemaProvider, boolean hasTransformer, String sourceSchemaFile, String targetSchemaFile,
@@ -1434,7 +1438,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
transformerClassNames, PROPS_FILENAME_TEST_PARQUET, false,
useSchemaProvider, 100000, false, null, null, "timestamp", null), jsc);
deltaStreamer.sync();
TestHelpers.assertRecordCount(PARQUET_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(PARQUET_NUM_RECORDS, tableBasePath, sqlContext);
testNum++;
if (testEmptyBatch) {
@@ -1443,7 +1447,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
TestParquetDFSSourceEmptyBatch.returnEmptyBatch = true;
deltaStreamer.sync();
// since we mimic'ed empty batch, total records should be same as first sync().
TestHelpers.assertRecordCount(PARQUET_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(PARQUET_NUM_RECORDS, tableBasePath, sqlContext);
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(tableBasePath).setConf(jsc.hadoopConfiguration()).build();
// validate table schema fetches valid schema from last but one commit.
@@ -1460,7 +1464,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
orcProps.setProperty("include", "base.properties");
orcProps.setProperty("hoodie.embed.timeline.server", "false");
orcProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
orcProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
orcProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
if (useSchemaProvider) {
orcProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/" + "source.avsc");
if (transformerClassNames != null) {
@@ -1476,7 +1480,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
transformerClassNames, PROPS_FILENAME_TEST_ORC, false,
useSchemaProvider, 100000, false, null, null, "timestamp", null), jsc);
deltaStreamer.sync();
TestHelpers.assertRecordCount(ORC_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(ORC_NUM_RECORDS, tableBasePath, sqlContext);
testNum++;
}
@@ -1487,7 +1491,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
props.setProperty("include", "base.properties");
props.setProperty("hoodie.embed.timeline.server", "false");
props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
props.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
props.setProperty("hoodie.deltastreamer.source.dfs.root", JSON_KAFKA_SOURCE_ROOT);
props.setProperty("hoodie.deltastreamer.source.kafka.topic", topicName);
props.setProperty("hoodie.deltastreamer.source.kafka.checkpoint.type", kafkaCheckpointType);
@@ -1640,7 +1644,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
TypedProperties csvProps = new TypedProperties();
csvProps.setProperty("include", "base.properties");
csvProps.setProperty("hoodie.datasource.write.recordkey.field", recordKeyField);
csvProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
csvProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
if (useSchemaProvider) {
csvProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source-flattened.avsc");
if (hasTransformer) {
@@ -1681,7 +1685,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
transformerClassNames, PROPS_FILENAME_TEST_CSV, false,
useSchemaProvider, 1000, false, null, null, sourceOrderingField, null), jsc);
deltaStreamer.sync();
TestHelpers.assertRecordCount(CSV_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(CSV_NUM_RECORDS, tableBasePath, sqlContext);
testNum++;
}
@@ -1775,7 +1779,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
sqlSourceProps.setProperty("include", "base.properties");
sqlSourceProps.setProperty("hoodie.embed.timeline.server", "false");
sqlSourceProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
sqlSourceProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
sqlSourceProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
sqlSourceProps.setProperty("hoodie.deltastreamer.source.sql.sql.query","select * from test_sql_table");
UtilitiesTestBase.Helpers.savePropsToDFS(sqlSourceProps, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_SQL_SOURCE);
@@ -1801,9 +1805,10 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
Collections.emptyList(), PROPS_FILENAME_TEST_SQL_SOURCE, false,
false, 1000, false, null, null, "timestamp", null, true), jsc);
deltaStreamer.sync();
TestHelpers.assertRecordCount(SQL_SOURCE_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(SQL_SOURCE_NUM_RECORDS, tableBasePath, sqlContext);
}
@Disabled
@Test
public void testJdbcSourceIncrementalFetchInContinuousMode() {
try (Connection connection = DriverManager.getConnection("jdbc:h2:mem:test_mem", "test", "jdbc")) {
@@ -1818,7 +1823,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
props.setProperty("hoodie.datasource.write.keygenerator.class", SimpleKeyGenerator.class.getName());
props.setProperty("hoodie.datasource.write.recordkey.field", "ID");
props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
props.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/test-jdbc-source.properties");
@@ -1835,7 +1840,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(cfg, jsc);
deltaStreamerTestRunner(deltaStreamer, cfg, (r) -> {
TestHelpers.assertAtleastNCompactionCommits(numRecords / sourceLimit + ((numRecords % sourceLimit == 0) ? 0 : 1), tableBasePath, dfs);
TestHelpers.assertRecordCount(numRecords, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(numRecords, tableBasePath, sqlContext);
return true;
});
} catch (Exception e) {
@@ -1857,7 +1862,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
insertInTable(tableBasePath, 9, WriteOperationType.UPSERT);
//No change as this fails with Path not exist error
assertThrows(org.apache.spark.sql.AnalysisException.class, () -> new HoodieDeltaStreamer(downstreamCfg, jsc).sync());
TestHelpers.assertRecordCount(1000, downstreamTableBasePath + "/*/*", sqlContext);
TestHelpers.assertRecordCount(1000, downstreamTableBasePath, sqlContext);
if (downstreamCfg.configs == null) {
downstreamCfg.configs = new ArrayList<>();
@@ -1870,8 +1875,8 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
long baseTableRecords = sqlContext.read().format("org.apache.hudi").load(tableBasePath + "/*/*.parquet").count();
long downStreamTableRecords = sqlContext.read().format("org.apache.hudi").load(downstreamTableBasePath + "/*/*.parquet").count();
long baseTableRecords = sqlContext.read().format("org.apache.hudi").load(tableBasePath).count();
long downStreamTableRecords = sqlContext.read().format("org.apache.hudi").load(downstreamTableBasePath).count();
assertEquals(baseTableRecords, downStreamTableRecords);
}
@@ -1930,8 +1935,8 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
// Initial insert
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.BULK_INSERT);
new HoodieDeltaStreamer(cfg, jsc).sync();
TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
// setting the operationType
@@ -1939,14 +1944,14 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
// No new data => no commits.
cfg.sourceLimit = 0;
new HoodieDeltaStreamer(cfg, jsc).sync();
TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
cfg.sourceLimit = 1000;
new HoodieDeltaStreamer(cfg, jsc).sync();
TestHelpers.assertRecordCount(1950, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertDistanceCount(1950, tableBasePath + "/*/*.parquet", sqlContext);
TestHelpers.assertRecordCount(1950, tableBasePath, sqlContext);
TestHelpers.assertDistanceCount(1950, tableBasePath, sqlContext);
TestHelpers.assertCommitMetadata("00001", tableBasePath, dfs, 2);
}

View File

@@ -106,8 +106,8 @@ public class TestHoodieDeltaStreamerWithMultiWriter extends SparkClientFunctiona
} else {
TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(3, tableBasePath, fs());
}
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath, sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath, sqlContext());
return true;
});
@@ -168,8 +168,8 @@ public class TestHoodieDeltaStreamerWithMultiWriter extends SparkClientFunctiona
} else {
TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(3, tableBasePath, fs());
}
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath, sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath, sqlContext());
return true;
});
@@ -236,8 +236,8 @@ public class TestHoodieDeltaStreamerWithMultiWriter extends SparkClientFunctiona
} else {
TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(3, tableBasePath, fs());
}
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath, sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath, sqlContext());
return true;
});
@@ -305,7 +305,7 @@ public class TestHoodieDeltaStreamerWithMultiWriter extends SparkClientFunctiona
props.setProperty("include", "sql-transformer.properties");
props.setProperty("hoodie.datasource.write.keygenerator.class", TestHoodieDeltaStreamer.TestGenerator.class.getName());
props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
props.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", basePath + "/source.avsc");
props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", basePath + "/target.avsc");
@@ -362,8 +362,8 @@ public class TestHoodieDeltaStreamerWithMultiWriter extends SparkClientFunctiona
} else {
TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommitsAfterCommit(3, lastSuccessfulCommit, tableBasePath, fs());
}
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath, sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath, sqlContext());
return true;
};

View File

@@ -46,7 +46,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBase {
private static volatile Logger log = LogManager.getLogger(TestHoodieMultiTableDeltaStreamer.class);
private static final Logger LOG = LogManager.getLogger(TestHoodieMultiTableDeltaStreamer.class);
static class TestHelpers {
@@ -80,7 +80,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
Exception e = assertThrows(HoodieException.class, () -> {
new HoodieMultiTableDeltaStreamer(cfg, jsc);
}, "Should fail when hive sync table not provided with enableHiveSync flag");
log.debug("Expected error when creating table execution objects", e);
LOG.debug("Expected error when creating table execution objects", e);
assertTrue(e.getMessage().contains("Meta sync table field not provided!"));
}
@@ -90,7 +90,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
Exception e = assertThrows(IllegalArgumentException.class, () -> {
new HoodieMultiTableDeltaStreamer(cfg, jsc);
}, "Should fail when invalid props file is provided");
log.debug("Expected error when creating table execution objects", e);
LOG.debug("Expected error when creating table execution objects", e);
assertTrue(e.getMessage().contains("Please provide valid common config file path!"));
}
@@ -100,7 +100,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
Exception e = assertThrows(IllegalArgumentException.class, () -> {
new HoodieMultiTableDeltaStreamer(cfg, jsc);
}, "Should fail when invalid table config props file path is provided");
log.debug("Expected error when creating table execution objects", e);
LOG.debug("Expected error when creating table execution objects", e);
assertTrue(e.getMessage().contains("Please provide valid table config file path!"));
}
@@ -128,7 +128,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), true, true, null);
new HoodieMultiTableDeltaStreamer(cfg, jsc);
}, "Creation of execution object should fail without kafka topic");
log.debug("Creation of execution object failed with error: " + e.getMessage(), e);
LOG.debug("Creation of execution object failed with error: " + e.getMessage(), e);
assertTrue(e.getMessage().contains("Please provide valid table config arguments!"));
}
@@ -251,7 +251,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
TypedProperties props = new TypedProperties();
props.setProperty("include", "base.properties");
props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
props.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
props.setProperty("hoodie.deltastreamer.source.dfs.root", parquetSourceRoot);
return props;
}
@@ -271,7 +271,7 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
private void syncAndVerify(HoodieMultiTableDeltaStreamer streamer, String targetBasePath1, String targetBasePath2, long table1ExpectedRecords, long table2ExpectedRecords) {
streamer.sync();
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(table1ExpectedRecords, targetBasePath1 + "/*/*.parquet", sqlContext);
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(table2ExpectedRecords, targetBasePath2 + "/*/*.parquet", sqlContext);
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(table1ExpectedRecords, targetBasePath1, sqlContext);
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(table2ExpectedRecords, targetBasePath2, sqlContext);
}
}