diff --git a/README.md b/README.md index e4d80f3b..70364596 100644 --- a/README.md +++ b/README.md @@ -467,7 +467,7 @@ my_context_table as ( ### get_field ([source](macros/utils/cross_db/get_field.sql)) -This macro exists to make it easier to extract a field from our `unstruct_` and `contexts_` type columns for users in Snowflake, Databricks, and BigQuery (although you may prefer to use [`combine_column_versions`](#combine_column_versions-source) for BigQuery, as this manages multiple context versions and allows for extraction of multiple fields at the same time). The macro can handle type casting and selecting from arrays. +This macro exists to make it easier to extract a field from our `unstruct_` and `contexts_` type columns for users in Snowflake, Databricks, and BigQuery (using a wildcard version number is only possible for BigQuery e.g. `column_name = 'contexts_nl_basjes_yauaa_context_1_*'`). The macro can handle type casting and selecting from arrays. **Returns:** diff --git a/docs/markdown/snowplow_utils_macro_docs.md b/docs/markdown/snowplow_utils_macro_docs.md index 381b0ece..93a92615 100644 --- a/docs/markdown/snowplow_utils_macro_docs.md +++ b/docs/markdown/snowplow_utils_macro_docs.md @@ -317,7 +317,7 @@ left join nl_basjes_yauaa_context_1 b on {% docs macro_get_field %} {% raw %} -This macro exists to make it easier to extract a field from our `unstruct_` and `contexts_` type columns for users in Snowflake, Databricks, and BigQuery (although you may prefer to use `combine_column_versions` for BigQuery, as this manages multiple context versions and allows for extraction of multiple fields at the same time). The macro can handle type casting and selecting from arrays. +This macro exists to make it easier to extract a field from our `unstruct_` and `contexts_` type columns for users in Snowflake, Databricks, and BigQuery (using a wildcard version number is only possible for BigQuery e.g. `column_name = 'contexts_nl_basjes_yauaa_context_1_*'`). The macro can handle type casting and selecting from arrays. #### Returns diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 9d0123e7..668aa6aa 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -79,6 +79,10 @@ models: +enabled: "{{ target.type in ['snowflake', 'bigquery', 'databricks', 'spark'] | as_bool() }}" expected_get_field: +enabled: "{{ target.type in ['snowflake', 'bigquery', 'databricks', 'spark'] | as_bool() }}" + test_get_field_bq: + +enabled: "{{ target.type in ['bigquery'] | as_bool() }}" + expected_get_field_bq: + +enabled: "{{ target.type in ['bigquery'] | as_bool() }}" base: +bind: false diff --git a/integration_tests/models/utils/cross_db/cross_db.yml b/integration_tests/models/utils/cross_db/cross_db.yml index 6dacca1d..fbec14b3 100644 --- a/integration_tests/models/utils/cross_db/cross_db.yml +++ b/integration_tests/models/utils/cross_db/cross_db.yml @@ -13,3 +13,7 @@ models: tests: - dbt_utils.equality: compare_model: ref('expected_get_field') + - name: test_get_field_bq + tests: + - dbt_utils.equality: + compare_model: ref('expected_get_field_bq') diff --git a/integration_tests/models/utils/cross_db/expected_get_field_bq.sql b/integration_tests/models/utils/cross_db/expected_get_field_bq.sql new file mode 100644 index 00000000..4956e62e --- /dev/null +++ b/integration_tests/models/utils/cross_db/expected_get_field_bq.sql @@ -0,0 +1,14 @@ +{# +Copyright (c) 2021-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Community License Version 1.0, +and you may not use this file except in compliance with the Snowplow Community License Version 1.0. +You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 +#} + +{{ + config( + tags = ['get_field'], + ) +}} + +select 'John' as combined_column diff --git a/integration_tests/models/utils/cross_db/test_get_field_bq.sql b/integration_tests/models/utils/cross_db/test_get_field_bq.sql new file mode 100644 index 00000000..b69bcf8f --- /dev/null +++ b/integration_tests/models/utils/cross_db/test_get_field_bq.sql @@ -0,0 +1,24 @@ +{# +Copyright (c) 2021-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Community License Version 1.0, +and you may not use this file except in compliance with the Snowplow Community License Version 1.0. +You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 +#} + +{{ + config( + tags = ['get_field'], + ) +}} + +{% set test_1_actual = snowplow_utils.combine_column_versions(relation=ref('data_combine_column_versions'), + column_prefix='staff_v', + array_index=1) %} +{% if target.type == 'Bigquery' %} + +select + {{snowplow_utils.get_field('staff_v_*', 'first_name', 'a', dbt.type_string(), 0, relation = ref('data_combine_column_versions'))}} as combined_column, +from + {{ref('data_combine_column_versions')}} a + +{% endif %} diff --git a/macros/base/base_create_snowplow_events_this_run.sql b/macros/base/base_create_snowplow_events_this_run.sql index aee9571f..42a60a3b 100644 --- a/macros/base/base_create_snowplow_events_this_run.sql +++ b/macros/base/base_create_snowplow_events_this_run.sql @@ -26,7 +26,7 @@ You may obtain a copy of the Snowplow Community License Version 1.0 at https://d COALESCE( {% for identifier in session_identifiers %} {%- if identifier['schema']|lower != 'atomic' -%} - {{ snowplow_utils.get_field(identifier['schema'], identifier['field'], 'e', dbt.type_string(), 0) }} + {{ snowplow_utils.get_field(identifier['schema'], identifier['field'], 'e', dbt.type_string(), 0, snowplow_events) }} {%- else -%} e.{{identifier['field']}} {%- endif -%} diff --git a/macros/base/base_create_snowplow_sessions_lifecycle_manifest.sql b/macros/base/base_create_snowplow_sessions_lifecycle_manifest.sql index be52fc43..700018fa 100644 --- a/macros/base/base_create_snowplow_sessions_lifecycle_manifest.sql +++ b/macros/base/base_create_snowplow_sessions_lifecycle_manifest.sql @@ -27,7 +27,7 @@ You may obtain a copy of the Snowplow Community License Version 1.0 at https://d COALESCE( {% for identifier in session_identifiers %} {%- if identifier['schema']|lower != 'atomic' -%} - {{ snowplow_utils.get_field(identifier['schema'], identifier['field'], 'e', dbt.type_string(), 0) }} + {{ snowplow_utils.get_field(identifier['schema'], identifier['field'], 'e', dbt.type_string(), 0, snowplow_events) }} {%- else -%} e.{{identifier['field']}} {%- endif -%} @@ -45,7 +45,7 @@ You may obtain a copy of the Snowplow Community License Version 1.0 at https://d COALESCE( {% for identifier in user_identifiers %} {%- if identifier['schema']|lower != 'atomic' -%} - {{ snowplow_utils.get_field(identifier['schema'], identifier['field'], 'e', dbt.type_string(), 0) }} + {{ snowplow_utils.get_field(identifier['schema'], identifier['field'], 'e', dbt.type_string(), 0, snowplow_events) }} {%- else -%} e.{{identifier['field']}} {%- endif -%} diff --git a/macros/utils/cross_db/get_field.sql b/macros/utils/cross_db/get_field.sql index 1090181f..20766418 100644 --- a/macros/utils/cross_db/get_field.sql +++ b/macros/utils/cross_db/get_field.sql @@ -5,27 +5,48 @@ and you may not use this file except in compliance with the Snowplow Community L You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0 #} -{% macro get_field(column_name, field_name, table_alias = none, type = none, array_index = none) %} - {{ return(adapter.dispatch('get_field', 'snowplow_utils')(column_name, field_name, table_alias, type, array_index)) }} +{% macro get_field(column_name, field_name, table_alias = none, type = none, array_index = none, relation = none) %} + {{ return(adapter.dispatch('get_field', 'snowplow_utils')(column_name, field_name, table_alias, type, array_index, relation)) }} {% endmacro %} -{% macro bigquery__get_field(column_name, field_name, table_alias = none, type = none, array_index = none) %} +{% macro bigquery__get_field(column_name, field_name, table_alias = none, type = none, array_index = none, relation = none) %} + +{% if '*' in column_name %} + {{ snowplow_utils.get_optional_fields( + enabled=true, + fields=[{'field': field_name, 'dtype': type or 'string' }], + col_prefix=column_name|replace('_*', ''), + relation=relation, + relation_alias=table_alias, + include_field_alias=false + ) }} + +{% else %} {%- if type -%}cast({%- endif -%}{%- if table_alias -%}{{table_alias}}.{%- endif -%}{{column_name}}{%- if array_index is not none -%}[SAFE_OFFSET({{array_index}})]{%- endif -%}.{{field_name}}{%- if type %} as {{type}}){%- endif -%} +{% endif %} {% endmacro %} -{% macro spark__get_field(column_name, field_name, table_alias = none, type = none, array_index = none) %} -{%- if table_alias -%}{{table_alias}}.{%- endif -%}{{column_name}}{%- if array_index is not none -%}[{{array_index}}]{%- endif -%}.{{field_name}}{%- if type -%}::{{type}}{%- endif -%} +{% macro spark__get_field(column_name, field_name, table_alias = none, type = none, array_index = none, relation = none) %} +{% if '*' in column_name %} + {% do exceptions.raise_compiler_error('Wildcard schema versions are only supported for Bigquery, they are not supported for ' ~ target.type) %} +{% else %} + {%- if table_alias -%}{{table_alias}}.{%- endif -%}{{column_name}}{%- if array_index is not none -%}[{{array_index}}]{%- endif -%}.{{field_name}}{%- if type -%}::{{type}}{%- endif -%} +{% endif %} {% endmacro %} -{% macro snowflake__get_field(column_name, field_name, table_alias = none, type = none, array_index = none) %} -{%- if type is none and execute -%} -{% do exceptions.warn("Warning: macro snowplow_utils.get_field is being used without a type provided, Snowflake will return a variant column in this case which is unlikely to be what you want.") %} -{%- endif -%} -{%- if table_alias -%}{{table_alias}}.{%- endif -%}{{column_name}}{%- if array_index is not none -%}[{{array_index}}]{%- endif -%}:{{field_name}}{%- if type -%}::{{type}}{%- endif -%} +{% macro snowflake__get_field(column_name, field_name, table_alias = none, type = none, array_index = none, relation = none) %} +{% if '*' in column_name %} + {% do exceptions.raise_compiler_error('Wildcard schema versions are only supported for Bigquery, they are not supported for ' ~ target.type) %} +{% else %} + {%- if type is none and execute -%} + {% do exceptions.warn("Warning: macro snowplow_utils.get_field is being used without a type provided, Snowflake will return a variant column in this case which is unlikely to be what you want.") %} + {%- endif -%} + {%- if table_alias -%}{{table_alias}}.{%- endif -%}{{column_name}}{%- if array_index is not none -%}[{{array_index}}]{%- endif -%}:{{field_name}}{%- if type -%}::{{type}}{%- endif -%} +{% endif %} {% endmacro %} -{% macro default__get_field(column_name, field_name, table_alias = none, type = none, array_index = none) %} +{% macro default__get_field(column_name, field_name, table_alias = none, type = none, array_index = none, relation = none) %} {% if execute %} {% do exceptions.raise_compiler_error('Macro get_field only supports Bigquery, Snowflake, Spark, and Databricks, it is not supported for ' ~ target.type) %}