1
0

1. Fix merge on read DAG to make docker demo pass (#2092)

1. Fix merge on read DAG to make docker demo pass (#2092)
2. Fix repeat_count, rollback node
This commit is contained in:
n3nash
2020-10-28 19:34:26 -07:00
committed by GitHub
parent e206ddd431
commit e109a61803
12 changed files with 181 additions and 54 deletions

View File

@@ -28,12 +28,14 @@ import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.integ.testsuite.HoodieTestSuiteJob;
import org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig;
import org.apache.hudi.integ.testsuite.dag.ComplexDagGenerator;
import org.apache.hudi.integ.testsuite.dag.HiveSyncDagGenerator;
import org.apache.hudi.integ.testsuite.dag.HiveSyncDagGeneratorMOR;
import org.apache.hudi.integ.testsuite.dag.WorkflowDagGenerator;
import org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector;
import org.apache.hudi.integ.testsuite.reader.DeltaInputType;
import org.apache.hudi.integ.testsuite.writer.DeltaOutputMode;
import org.apache.hudi.keygen.TimestampBasedKeyGenerator;
@@ -105,6 +107,15 @@ public class TestHoodieTestSuiteJob extends UtilitiesTestBase {
props.setProperty("hoodie.compact.inline.max.delta.commits", "3");
props.setProperty("hoodie.parquet.max.file.size", "1024000");
props.setProperty("hoodie.compact.inline.max.delta.commits", "0");
props.setProperty("hoodie.index.type", HoodieIndex.IndexType.GLOBAL_SIMPLE.name());
props.setProperty("hoodie.global.simple.index.parallelism", "2");
// Reduce shuffle parallelism, spark hangs when numPartitions >> numRecords to process
props.setProperty("hoodie.insert.shuffle.parallelism", "10");
props.setProperty("hoodie.upsert.shuffle.parallelism", "10");
props.setProperty("hoodie.bulkinsert.shuffle.parallelism", "10");
props.setProperty("hoodie.compact.inline.max.delta.commits", "0");
// Make path selection test suite specific
props.setProperty("hoodie.deltastreamer.source.input.selector", DFSTestSuitePathSelector.class.getName());
// Hive Configs
props.setProperty(DataSourceWriteOptions.HIVE_URL_OPT_KEY(), "jdbc:hive2://127.0.0.1:9999/");
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY(), "testdb1");

View File

@@ -16,17 +16,45 @@
first_insert:
config:
record_size: 70000
num_insert_partitions: 1
repeat_count: 1
num_partitions_insert: 1
repeat_count: 2
num_records_insert: 100
type: InsertNode
deps: none
second_insert:
config:
record_size: 70000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 100
type: InsertNode
deps: first_insert
first_rollback:
config:
deps: second_insert
type: RollbackNode
third_insert:
config:
record_size: 70000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 100
type: InsertNode
deps: first_rollback
first_upsert:
config:
record_size: 70000
num_partitions_upsert: 1
repeat_count: 1
num_records_upsert: 100
type: UpsertNode
deps: third_insert
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_insert
deps: first_upsert
first_hive_query:
config:
hive_props:
@@ -34,6 +62,8 @@ first_hive_query:
prop3: "set hive.strict.checks.large.query=false"
prop4: "set hive.stats.autogather=false"
hive_queries:
query1: "select count(*) from testdb1.table1"
result1: 300
query2: "select count(*) from testdb1.table1 group by `_row_key` having count(*) > 1"
result2: 0
type: HiveQueryNode

View File

@@ -16,17 +16,45 @@
first_insert:
config:
record_size: 70000
num_insert_partitions: 1
repeat_count: 1
num_partitions_insert: 1
repeat_count: 2
num_records_insert: 100
type: InsertNode
deps: none
second_insert:
config:
record_size: 70000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 100
type: InsertNode
deps: first_insert
first_rollback:
config:
deps: second_insert
type: RollbackNode
third_insert:
config:
record_size: 70000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 100
type: InsertNode
deps: first_rollback
first_upsert:
config:
record_size: 70000
num_partitions_upsert: 1
repeat_count: 1
num_records_upsert: 100
type: UpsertNode
deps: third_insert
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_insert
deps: first_upsert
first_hive_query:
config:
hive_props:
@@ -34,6 +62,8 @@ first_hive_query:
prop3: "set hive.strict.checks.large.query=false"
prop4: "set hive.stats.autogather=false"
hive_queries:
query1: "select count(*) from testdb1.table1"
result1: 300
query2: "select count(*) from testdb1.table1 group by `_row_key` having count(*) > 1"
result2: 0
type: HiveQueryNode