Skip to content

Commit

Permalink
Merge pull request #6 from svinstech/improve_pivot_performance
Browse files Browse the repository at this point in the history
  • Loading branch information
jennaallen authored Mar 27, 2023
2 parents a64ad3b + 5ff0705 commit a425b73
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 48 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ target/
dbt_packages/
logs/
.idea
.vscode
14 changes: 7 additions & 7 deletions integration_tests/models/dim_as_primitive.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
select
{{ dbt_variant_utils.as_primitive(ref('stg_as_primitive'), 'same_type') }} as cast_same_type,
{{ dbt_variant_utils.as_primitive(ref('stg_as_primitive'), 'with_null') }} as cast_with_null,
{{ dbt_variant_utils.as_primitive(ref('stg_as_primitive'), 'different_types') }} as cast_different_types,
{{ dbt_variant_utils.as_primitive(ref('stg_as_primitive'), 'decimal_number') }} as cast_decimal_number,
{{ dbt_variant_utils.as_primitive(ref('stg_as_primitive'), 'string_number', try_casting=True) }} as cast_string_number
from {{ ref('stg_as_primitive') }}
select
{{ dbt_variant_utils.as_primitive('integer', "get(extract, 'same_type')", '2') }} cast_same_type,
{{ dbt_variant_utils.as_primitive('varchar', "get(extract, 'with_null')", '2') }} cast_with_null,
{{ dbt_variant_utils.as_primitive('boolean', "get(extract, 'different_types')", '2') }} cast_different_types,
{{ dbt_variant_utils.as_primitive('decimal', "get(extract, 'decimal_number')", '9') }} cast_decimal_number,
{{ dbt_variant_utils.as_primitive('decimal', "get(extract, 'string_number')", '2', try_casting=True) }} cast_string_number
from {{ ref('stg_as_primitive')}}
42 changes: 27 additions & 15 deletions integration_tests/models/stg_as_primitive.sql
Original file line number Diff line number Diff line change
@@ -1,20 +1,32 @@
select
1::variant as same_type,
'foo'::variant as with_null,
true::variant as different_types,
0.567453454::variant as decimal_number,
'1.25'::variant as string_number
parse_json(
'{
"same_type": 1,
"with_null": "foo",
"different_types": true,
"decimal_number": 0.567453454,
"string_number": "1.25"
}'
) as extract
union all
select
2::variant as same_type,
null::variant as with_null,
4::variant as different_types,
1.05::variant as decimal_number,
1::variant as string_number
parse_json(
'{
"same_type": 2,
"with_null": null,
"different_types": 4,
"decimal_number": 1.05,
"string_number": 1
}'
) as extract
union all
select
3::variant as same_type,
'bar'::variant as with_null,
false::variant as different_types,
0.4532::variant as decimal_number,
1.5::variant as string_number
parse_json(
'{
"same_type": 3,
"with_null": "bar",
"different_types": false,
"decimal_number": 0.4532,
"string_number": 1.5
}'
) as extract
26 changes: 2 additions & 24 deletions macros/as_primitive.sql
Original file line number Diff line number Diff line change
@@ -1,26 +1,4 @@
{%- macro as_primitive(t, c, try_casting=False) -%}
{% set query = "
select
ifnull(try_parse_json(" ~ c ~ "), " ~ c ~ ") as parsed,
typeof(parsed) as type,
ifnull(len(split(parsed, '.')[1]), 2) as scale
from " ~ t ~ "
-- two scenarios:
-- key c exists in the JSON, but the value is null, type = NULL_VALUE
-- key c doesn't exists in the JSON, type = null
where ifnull(type, 'NULL_VALUE') != 'NULL_VALUE'
order by type, length(parsed) desc
limit 1" %}
{%- set type_query = run_query(query) -%}

{%- if execute -%}
{%- set type = type_query.columns[1][0] -%}
{%- set scale = type_query.columns[2][0] -%}
{%- else -%}
{%- set type = none -%}
{%- set scale = none -%}
{%- endif -%}

{%- macro as_primitive(type, c, scale, try_casting=False) -%}
{%- if type and type|lower not in ['null_value'] -%}
{%- if try_casting %}
{%- if type|lower == 'decimal' %}
Expand All @@ -38,6 +16,6 @@
{%- endif -%}
{%- endif -%}
{%- else %}
null
as_varchar({{ c }})
{%- endif -%}
{% endmacro %}
18 changes: 16 additions & 2 deletions macros/object_pivot.sql
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
{%- macro object_pivot(t, c, primitive=true, include_columns=[], exclude_keys=['null'], force_varchar=[], try_casting=False) -%}
{%- set query = "select distinct key, regexp_replace(key, '[ .]', '_') as alias_key from " ~ t ~ ", lateral flatten(" ~ c ~ ")" -%}
{%- set query = "
select
key,
regexp_replace(key, '[ .]', '_') as alias_key,
ifnull(try_parse_json(value), value) as parsed,
typeof(parsed) as type,
ifnull(len(split(parsed, '.')[1]), 2) as scale
from " ~ t ~ ", lateral flatten(" ~ c ~ ")
qualify row_number() over(partition by key order by iff(type = 'NULL_VALUE', null, type), length(parsed) desc) = 1" -%}
{%- set keys_query = run_query(query) -%}

{%- if execute -%}
{%- set keys = keys_query.columns[0].values() -%}
{%- set alias_keys = keys_query.columns[1].values() -%}
{%- set data_types = keys_query.columns[3].values() -%}
{%- set scales = keys_query.columns[4].values() -%}
{%- else -%}
{%- set keys = [] -%}
{%- set alias_keys = [] -%}
{%- set data_types = [] -%}
{%- set scales = [] -%}
{%- endif -%}

select
Expand All @@ -17,12 +29,14 @@
{%- if keys|length > 0 -%},{%- endif -%}
{%- for k in keys -%}
{%- set alias_key = alias_keys[loop.index0] -%}
{%- set data_type = data_types[loop.index0] -%}
{%- set scale = scales[loop.index0] -%}
{%- if k not in exclude_keys -%}
{%- if primitive -%}
{%- if k in force_varchar %}
as_varchar(get({{ c }}, '{{ k }}')) as {{ alias_key }}
{%- else -%}
{{ dbt_variant_utils.as_primitive(t, "get(" ~ c ~ ", '" ~ k ~ "')", try_casting) }} as {{ alias_key }}
{{ dbt_variant_utils.as_primitive(data_type, "get(" ~ c ~ ", '" ~ k ~ "')", scale, try_casting) }} as {{ alias_key }}
{%- endif -%}
{%- else -%}
get({{ c }}, '{{ k }}') as {{ alias_key }}
Expand Down

0 comments on commit a425b73

Please sign in to comment.