1
0

[HUDI-2319] dbt example models to demonstrate hudi dbt integration (#5220)

* dbt example models to demonstrate hudi dbt integration

* Fixed readme text
This commit is contained in:
Vinoth Govindarajan
2022-04-05 08:58:13 -07:00
committed by GitHub
parent 3195f51562
commit 92ca426ab7
9 changed files with 476 additions and 0 deletions

View File

@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
/*
Example of an insert_overwrite for a non-partitioned table with incremental materialization.
*/
{{ config(
materialized='incremental',
file_format='hudi',
incremental_strategy='insert_overwrite',
options={
'type': 'cow',
'precombineKey': 'ts',
},
unique_key='id'
)
}}
select id, cast(rand() as string) as name, current_timestamp() as ts
from {{ ref('hudi_insert_table') }}

View File

@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
/*
Example of an insert for a non-partitioned table with incremental materialization.
*/
{{
config(
materialized='incremental',
file_format='hudi',
unique_key='id'
)
}}
with source_data as (
select format_number(rand()*1000, 0) as id
union all
select null as id
)
select *
from source_data
where id is not null

View File

@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
/*
Example of an upsert for a partitioned copy on write table with incremental materialization using merge strategy.
*/
{{ config(
materialized='incremental',
file_format='hudi',
incremental_strategy='merge',
options={
'type': 'cow',
'primaryKey': 'id',
'precombineKey': 'ts',
},
unique_key='id',
partition_by='datestr',
pre_hook=["set spark.sql.datetime.java8API.enabled=false;"],
)
}}
select id, name, current_timestamp() as ts, current_date as datestr
from {{ ref('hudi_upsert_table') }}

View File

@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
/*
Example of an upsert for a partitioned merge on read table with incremental materialization using merge strategy.
*/
{{ config(
materialized='incremental',
file_format='hudi',
incremental_strategy='merge',
options={
'type': 'mor',
'primaryKey': 'id',
'precombineKey': 'ts',
},
unique_key='id',
partition_by='datestr',
pre_hook=["set spark.sql.datetime.java8API.enabled=false;"],
)
}}
select id, name, current_timestamp() as ts, current_date as datestr
from {{ ref('hudi_upsert_table') }}

View File

@@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
/*
Example of an upsert for a non-partitioned table with incremental materialization using merge strategy.
*/
{{ config(
materialized='incremental',
file_format='hudi',
incremental_strategy='merge',
options={
'type': 'cow',
'primaryKey': 'id',
'precombineKey': 'ts',
},
unique_key='id'
)
}}
select id, name, current_timestamp() as ts
from {{ ref('hudi_insert_overwrite_table') }}

View File

@@ -0,0 +1,104 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
version: 2
models:
- name: hudi_insert_table
description: "Hudi insert non-partitioned table with incremental materialization"
columns:
- name: id
description: "The primary key for this table"
tests:
- unique
- not_null
- name: hudi_insert_overwrite_table
description: "Hudi insert overwrite non-partitioned table with incremental materialization"
columns:
- name: id
description: "The primary key for this table"
tests:
- unique
- not_null
- name: name
description: "Employee name"
tests:
- not_null
- name: ts
description: "Created timestamp"
tests:
- not_null
- name: hudi_upsert_table
description: "Hudi upsert non-partitioned table with incremental materialization"
columns:
- name: id
description: "The primary key for this table"
tests:
- unique
- not_null
- name: name
description: "Employee name"
tests:
- not_null
- name: ts
description: "Created timestamp"
tests:
- not_null
- name: hudi_upsert_paritioned_cow_table
description: "Hudi upsert partitioned copy-on-write table with incremental materialization using merge strategy"
columns:
- name: id
description: "The primary key for this table"
tests:
- unique
- not_null
- name: name
description: "Employee name"
tests:
- not_null
- name: ts
description: "Created timestamp"
tests:
- not_null
- name: datestr
description: "Partition date string column"
tests:
- not_null
- name: hudi_upsert_paritioned_mor_table
description: "Hudi upsert partitioned merge-on-read table with incremental materialization using merge strategy"
columns:
- name: id
description: "The primary key for this table"
tests:
- unique
- not_null
- name: name
description: "Employee name"
tests:
- not_null
- name: ts
description: "Created timestamp"
tests:
- not_null
- name: datestr
description: "Partition date string column"
tests:
- not_null