1
0

Test Suite should work with Docker + Unit Tests

This commit is contained in:
Abhishek Modi
2020-09-01 20:12:22 -07:00
committed by n3nash
parent 2fee087f0f
commit 53d1e55110
21 changed files with 422 additions and 185 deletions

View File

@@ -60,15 +60,14 @@ first_hive_sync:
first_hive_query:
config:
hive_props:
prop1: "set hive.execution.engine=spark"
prop2: "set spark.yarn.queue="
prop3: "set hive.strict.checks.large.query=false"
prop4: "set hive.stats.autogather=false"
hive_queries:
query1: "select count(*) from testdb1.table1 group by `_row_key` having count(*) > 1"
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb1.table1"
result2: 22100000
query2: "select count(*) from testdb.table1"
result2: 11600
type: HiveQueryNode
deps: first_hive_sync
second_upsert:
@@ -84,14 +83,13 @@ second_upsert:
second_hive_query:
config:
hive_props:
prop1: "set hive.execution.engine=mr"
prop2: "set mapred.job.queue.name="
prop3: "set hive.strict.checks.large.query=false"
prop4: "set hive.stats.autogather=false"
hive_queries:
query1: "select count(*) from testdb1.table1 group by `_row_key` having count(*) > 1"
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb1.table1"
result2: 22100
query2: "select count(*) from testdb.table1"
result2: 11900
type: HiveQueryNode
deps: second_upsert
deps: second_upsert

View File

@@ -13,15 +13,25 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# write configs
hoodie.datasource.write.recordkey.field=_row_key
hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-bench/input
hoodie.datasource.write.keygenerator.class=org.apache.hudi.ComplexKeyGenerator
hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
hoodie.datasource.write.partitionpath.field=timestamp
hoodie.deltastreamer.schemaprovider.source.schema.file=/var/hoodie/ws/docker/demo/config/bench/source.avsc
# deltastreamer configs
hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-bench/input
hoodie.deltastreamer.schemaprovider.source.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
hoodie.deltastreamer.schemaprovider.target.schema.file=/var/hoodie/ws/docker/demo/config/bench/source.avsc
#hive sync
hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
hoodie.datasource.hive_sync.database=testdb
hoodie.datasource.hive_sync.table=test_table
hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.NonPartitionedExtractor
hoodie.datasource.hive_sync.table=table1
hoodie.datasource.hive_sync.use_jdbc=false
hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
hoodie.datasource.hive_sync.assume_date_partitioning=true
hoodie.datasource.write.keytranslator.class=org.apache.hudi.DayBasedPartitionPathKeyTranslator
hoodie.deltastreamer.schemaprovider.target.schema.file=/var/hoodie/ws/docker/demo/config/bench/source.avsc
hoodie.datasource.hive_sync.use_pre_apache_input_format=true