Skip to content

Commit

Permalink
Add multi-version support to get_field for bigquery
Browse files Browse the repository at this point in the history
  • Loading branch information
rlh1994 committed Sep 14, 2023
1 parent dad8ccd commit c5aaf23
Show file tree
Hide file tree
Showing 9 changed files with 84 additions and 16 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ my_context_table as (

### get_field ([source](macros/utils/cross_db/get_field.sql))

This macro exists to make it easier to extract a field from our `unstruct_` and `contexts_` type columns for users in Snowflake, Databricks, and BigQuery (although you may prefer to use [`combine_column_versions`](#combine_column_versions-source) for BigQuery, as this manages multiple context versions and allows for extraction of multiple fields at the same time). The macro can handle type casting and selecting from arrays.
This macro exists to make it easier to extract a field from our `unstruct_` and `contexts_` type columns for users in Snowflake, Databricks, and BigQuery (using a wildcard version number is only possible for BigQuery e.g. `column_name = 'contexts_nl_basjes_yauaa_context_1_*'`). The macro can handle type casting and selecting from arrays.

**Returns:**

Expand Down
2 changes: 1 addition & 1 deletion docs/markdown/snowplow_utils_macro_docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ left join nl_basjes_yauaa_context_1 b on
{% docs macro_get_field %}
{% raw %}

This macro exists to make it easier to extract a field from our `unstruct_` and `contexts_` type columns for users in Snowflake, Databricks, and BigQuery (although you may prefer to use `combine_column_versions` for BigQuery, as this manages multiple context versions and allows for extraction of multiple fields at the same time). The macro can handle type casting and selecting from arrays.
This macro exists to make it easier to extract a field from our `unstruct_` and `contexts_` type columns for users in Snowflake, Databricks, and BigQuery (using a wildcard version number is only possible for BigQuery e.g. `column_name = 'contexts_nl_basjes_yauaa_context_1_*'`). The macro can handle type casting and selecting from arrays.

#### Returns

Expand Down
4 changes: 4 additions & 0 deletions integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ models:
+enabled: "{{ target.type in ['snowflake', 'bigquery', 'databricks', 'spark'] | as_bool() }}"
expected_get_field:
+enabled: "{{ target.type in ['snowflake', 'bigquery', 'databricks', 'spark'] | as_bool() }}"
test_get_field_bq:
+enabled: "{{ target.type in ['bigquery'] | as_bool() }}"
expected_get_field_bq:
+enabled: "{{ target.type in ['bigquery'] | as_bool() }}"

base:
+bind: false
Expand Down
4 changes: 4 additions & 0 deletions integration_tests/models/utils/cross_db/cross_db.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ models:
tests:
- dbt_utils.equality:
compare_model: ref('expected_get_field')
- name: test_get_field_bq
tests:
- dbt_utils.equality:
compare_model: ref('expected_get_field_bq')
14 changes: 14 additions & 0 deletions integration_tests/models/utils/cross_db/expected_get_field_bq.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{#
Copyright (c) 2021-present Snowplow Analytics Ltd. All rights reserved.
This program is licensed to you under the Snowplow Community License Version 1.0,
and you may not use this file except in compliance with the Snowplow Community License Version 1.0.
You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0
#}

{{
config(
tags = ['get_field'],
)
}}

select 'John' as combined_column
25 changes: 25 additions & 0 deletions integration_tests/models/utils/cross_db/test_get_field_bq.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{#
Copyright (c) 2021-present Snowplow Analytics Ltd. All rights reserved.
This program is licensed to you under the Snowplow Community License Version 1.0,
and you may not use this file except in compliance with the Snowplow Community License Version 1.0.
You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0
#}

{{
config(
tags = ['get_field'],
)
}}

{% set test_1_actual = snowplow_utils.combine_column_versions(relation=ref('data_combine_column_versions'),
column_prefix='staff_v',
array_index=1) %}
{% if target.type == 'Bigquery' %}

select
{{snowplow_utils.get_field('staff_v_*', 'first_name', 'a', dbt.type_string(), 0, relation = ref('data_combine_column_versions'))}} as combined_column,
from
{{ref('data_combine_column_versions')}} a
{% else %}
select 'John'
{% endif %}
2 changes: 1 addition & 1 deletion macros/base/base_create_snowplow_events_this_run.sql
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ You may obtain a copy of the Snowplow Community License Version 1.0 at https://d
COALESCE(
{% for identifier in session_identifiers %}
{%- if identifier['schema']|lower != 'atomic' -%}
{{ snowplow_utils.get_field(identifier['schema'], identifier['field'], 'e', dbt.type_string(), 0) }}
{{ snowplow_utils.get_field(identifier['schema'], identifier['field'], 'e', dbt.type_string(), 0, snowplow_events) }}
{%- else -%}
e.{{identifier['field']}}
{%- endif -%}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ You may obtain a copy of the Snowplow Community License Version 1.0 at https://d
COALESCE(
{% for identifier in session_identifiers %}
{%- if identifier['schema']|lower != 'atomic' -%}
{{ snowplow_utils.get_field(identifier['schema'], identifier['field'], 'e', dbt.type_string(), 0) }}
{{ snowplow_utils.get_field(identifier['schema'], identifier['field'], 'e', dbt.type_string(), 0, snowplow_events) }}
{%- else -%}
e.{{identifier['field']}}
{%- endif -%}
Expand All @@ -45,7 +45,7 @@ You may obtain a copy of the Snowplow Community License Version 1.0 at https://d
COALESCE(
{% for identifier in user_identifiers %}
{%- if identifier['schema']|lower != 'atomic' -%}
{{ snowplow_utils.get_field(identifier['schema'], identifier['field'], 'e', dbt.type_string(), 0) }}
{{ snowplow_utils.get_field(identifier['schema'], identifier['field'], 'e', dbt.type_string(), 0, snowplow_events) }}
{%- else -%}
e.{{identifier['field']}}
{%- endif -%}
Expand Down
43 changes: 32 additions & 11 deletions macros/utils/cross_db/get_field.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,48 @@ and you may not use this file except in compliance with the Snowplow Community L
You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0
#}

{% macro get_field(column_name, field_name, table_alias = none, type = none, array_index = none) %}
{{ return(adapter.dispatch('get_field', 'snowplow_utils')(column_name, field_name, table_alias, type, array_index)) }}
{% macro get_field(column_name, field_name, table_alias = none, type = none, array_index = none, relation = none) %}
{{ return(adapter.dispatch('get_field', 'snowplow_utils')(column_name, field_name, table_alias, type, array_index, relation)) }}
{% endmacro %}

{% macro bigquery__get_field(column_name, field_name, table_alias = none, type = none, array_index = none) %}
{% macro bigquery__get_field(column_name, field_name, table_alias = none, type = none, array_index = none, relation = none) %}

{% if '*' in column_name %}
{{ snowplow_utils.get_optional_fields(
enabled=true,
fields=[{'field': field_name, 'dtype': type or 'string' }],
col_prefix=column_name|replace('_*', ''),
relation=relation,
relation_alias=table_alias,
include_field_alias=false
) }}

{% else %}
{%- if type -%}cast({%- endif -%}{%- if table_alias -%}{{table_alias}}.{%- endif -%}{{column_name}}{%- if array_index is not none -%}[SAFE_OFFSET({{array_index}})]{%- endif -%}.{{field_name}}{%- if type %} as {{type}}){%- endif -%}
{% endif %}
{% endmacro %}

{% macro spark__get_field(column_name, field_name, table_alias = none, type = none, array_index = none) %}
{%- if table_alias -%}{{table_alias}}.{%- endif -%}{{column_name}}{%- if array_index is not none -%}[{{array_index}}]{%- endif -%}.{{field_name}}{%- if type -%}::{{type}}{%- endif -%}
{% macro spark__get_field(column_name, field_name, table_alias = none, type = none, array_index = none, relation = none) %}
{% if '*' in column_name %}
{% do exceptions.raise_compiler_error('Wildcard schema versions are only supported for Bigquery, they are not supported for ' ~ target.type) %}
{% else %}
{%- if table_alias -%}{{table_alias}}.{%- endif -%}{{column_name}}{%- if array_index is not none -%}[{{array_index}}]{%- endif -%}.{{field_name}}{%- if type -%}::{{type}}{%- endif -%}
{% endif %}
{% endmacro %}

{% macro snowflake__get_field(column_name, field_name, table_alias = none, type = none, array_index = none) %}
{%- if type is none and execute -%}
{% do exceptions.warn("Warning: macro snowplow_utils.get_field is being used without a type provided, Snowflake will return a variant column in this case which is unlikely to be what you want.") %}
{%- endif -%}
{%- if table_alias -%}{{table_alias}}.{%- endif -%}{{column_name}}{%- if array_index is not none -%}[{{array_index}}]{%- endif -%}:{{field_name}}{%- if type -%}::{{type}}{%- endif -%}
{% macro snowflake__get_field(column_name, field_name, table_alias = none, type = none, array_index = none, relation = none) %}
{% if '*' in column_name %}
{% do exceptions.raise_compiler_error('Wildcard schema versions are only supported for Bigquery, they are not supported for ' ~ target.type) %}
{% else %}
{%- if type is none and execute -%}
{% do exceptions.warn("Warning: macro snowplow_utils.get_field is being used without a type provided, Snowflake will return a variant column in this case which is unlikely to be what you want.") %}
{%- endif -%}
{%- if table_alias -%}{{table_alias}}.{%- endif -%}{{column_name}}{%- if array_index is not none -%}[{{array_index}}]{%- endif -%}:{{field_name}}{%- if type -%}::{{type}}{%- endif -%}
{% endif %}
{% endmacro %}


{% macro default__get_field(column_name, field_name, table_alias = none, type = none, array_index = none) %}
{% macro default__get_field(column_name, field_name, table_alias = none, type = none, array_index = none, relation = none) %}

{% if execute %}
{% do exceptions.raise_compiler_error('Macro get_field only supports Bigquery, Snowflake, Spark, and Databricks, it is not supported for ' ~ target.type) %}
Expand Down

0 comments on commit c5aaf23

Please sign in to comment.