From 4c3a0e4e3cc8eab04abd9f186d2eda08ee00bfbf Mon Sep 17 00:00:00 2001 From: ctpita Date: Wed, 10 Apr 2024 11:37:22 -0400 Subject: [PATCH 1/2] wip: reduce CTE repetition in dataset.sql --- macros/dataset.sql | 110 +++++++----------- .../append_only/aggregate_after.sql | 7 +- .../append_only/aggregate_before.sql | 7 +- .../append_only/aggregate_in_between.sql | 11 +- .../relationships/append_only/first_after.sql | 7 +- .../append_only/first_before.sql | 9 +- .../append_only/first_in_between.sql | 11 +- .../relationships/append_only/last_after.sql | 7 +- .../relationships/append_only/last_before.sql | 7 +- .../append_only/last_in_between.sql | 11 +- 10 files changed, 68 insertions(+), 119 deletions(-) diff --git a/macros/dataset.sql b/macros/dataset.sql index 1393b63..f50e304 100644 --- a/macros/dataset.sql +++ b/macros/dataset.sql @@ -30,7 +30,6 @@ params: {% set columns = dbt_activity_schema.columns() %} {% set primary = dbt_activity_schema.primary %} -{% set appended = dbt_activity_schema.appended %} {% set alias_cte = dbt_activity_schema.alias_cte %} {% set alias_column = dbt_activity_schema.alias_column %} {% set alias_appended_activity = dbt_activity_schema.alias_appended_activity %} @@ -39,91 +38,68 @@ params: with -filter_activity_stream_using_primary_activity as ( +dataset as ( select {% for col in primary_activity.included_columns + primary_activity.required_columns %} - {{ dbt_activity_schema.parse_column(primary(), col) }} as {{ col }}{%- if not loop.last -%},{%- endif %} + {{ dbt_activity_schema.parse_column(primary(), col) }} as {{ col }}, {% endfor %} - - from {{ activity_stream }} as {{ primary() }} - - where {{ primary() }}.{{ columns.activity }} = {{ dbt.string_literal(primary_activity.name) }} - and {{ primary_activity.relationship.where_clause }} -), - -{% for activity in appended_activities %}{% set i = loop.index %} - -{{ alias_cte(activity, i) }} as ( - select - - -- Primary Activity Columns - {% for col in primary_activity.included_columns + primary_activity.required_columns %} - {{ primary() }}.{{- col }}, - {% endfor %} - - {% for col in activity.included_columns %} - {%- set parsed_col = dbt_activity_schema.parse_column(appended(), col) -%} - {% call activity.relationship.aggregation_func() %} - {{ parsed_col }} - {% endcall %} as {{ dbt_activity_schema.alias_appended_activity(activity, col) }} - {% if not loop.last %},{% endif %} + {% for activity in appended_activities %}{% set i = loop.index %}{% set last_outer_loop = loop.last %} + {% for col in activity.included_columns %} + {% call activity.relationship.aggregation_func() %} + {{ dbt_activity_schema.parse_column(alias_cte(activity, i), col) }} + {% endcall %} as {{ dbt_activity_schema.alias_appended_activity(activity, col) }} + {% if not (loop.last and last_outer_loop) %},{% endif %} + {% endfor %} {% endfor %} - - from filter_activity_stream_using_primary_activity as {{ primary() }} - - left join {{ activity_stream }} as {{ appended() }} + from ( + select * + from {{ activity_stream }} + where + {{ columns.activity }} = {{ dbt.string_literal(primary_activity.name) }} + and {{ primary_activity.relationship.where_clause }} + ) as {{ primary() }} + {% for activity in appended_activities %}{% set i = loop.index %} + {% set appended = alias_cte(activity, i) %} + left join ( + select * + from {{ activity_stream }} + where {{ columns.activity }} = {{ dbt.string_literal(activity.name) }} + ) as {{ appended }} on ( -- Join on Customer UUID Column - {{ appended() }}.{{ columns.customer }} = {{ primary() }}.{{ columns.customer }} - - -- Join the Correct Activity - and {{ appended() }}.{{- columns.activity }} = {{ dbt.string_literal(activity.name) }} + {{ appended }}.{{ columns.customer }} = {{ primary() }}.{{ columns.customer }} -- Relationship Specific Join Conditions and ( + {% if activity.relationship.name == "nth_ever" %} {# nth_ever_join_clause relies on instantiated nth_occurance arg, in addition to the i passed to the join #} - {% if activity.relationship.name == "nth_ever" %} - {{ activity.relationship.join_clause(activity.relationship.nth_occurance) }} - {% else %} + {{ activity.relationship.join_clause(relationship.nth_occurance, appended) }} + {% elif activity.relationship.name in ("first_ever", "last_ever") %} + {# relies on appended subquery/CTE name #} + {{ activity.relationship.join_clause(appended) }} + {% elif activity.relationship.name in ("all_ever", "aggregate_all_ever") %} + {# doesn't rely on anything #} {{ activity.relationship.join_clause() }} + {% else %} + {# These need primary and appended subquery/CTE names, the zero is unused #} + {{ activity.relationship.join_clause(0, primary(), appended) }} {% endif %} ) - -- Additional Join Condition - and ( {{ activity.additional_join_condition }} ) + {# Additional Join Condition relies on primary and appended subquery/CTE names #} + {% if activity.additional_join_condition is string %} + and ( {{ activity.additional_join_condition(primary=primary(), appended=appended) }} ) + {% else %} + and ( {{ activity.additional_join_condition(primary(), appended) }} ) + {% endif %} ) - + {% endfor %} group by - {% for col in primary_activity.included_columns + primary_activity.required_columns %} - {{ primary() }}.{{ col }}{%- if not loop.last -%},{%- endif %} - {% endfor %} -), - -{% endfor %} - -rejoin_aggregated_activities as ( - select - {% for col in primary_activity.included_columns %} - {{ primary() }}.{{ col }}, - {% endfor %} - - {% for activity in appended_activities %}{% set i = loop.index %}{% set last_outer_loop = loop.last %} - {% for col in activity.included_columns %} - {{ alias_cte(activity, i) }}.{{ alias_appended_activity(activity, col) }}{% if not (last_outer_loop and loop.last) %},{% endif %} - {% endfor %} + {{ primary() }}.{{ col }}{%- if not loop.last -%},{%- endif %} {% endfor %} - - from filter_activity_stream_using_primary_activity as {{ primary() }} - - {% for activity in appended_activities %}{% set i = loop.index %} - - left join {{ alias_cte(activity, i) }} - on {{ alias_cte(activity, i) }}.{{ columns.activity_id }} = {{ primary() }}.{{ columns.activity_id }} - - {% endfor %} ) -select * from rejoin_aggregated_activities +select * from dataset {% endmacro %} diff --git a/macros/relationships/append_only/aggregate_after.sql b/macros/relationships/append_only/aggregate_after.sql index 84588ba..059e18a 100644 --- a/macros/relationships/append_only/aggregate_after.sql +++ b/macros/relationships/append_only/aggregate_after.sql @@ -1,11 +1,8 @@ -{% macro aggregate_after_join_clause(i) %} - -{% set primary = dbt_activity_schema.primary %} +{% macro aggregate_after_join_clause(i, primary, appended) %} {% set columns = dbt_activity_schema.columns() %} -{% set appended = dbt_activity_schema.appended %} ( - {{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }} + {{ appended }}.{{- columns.ts }} > {{ primary }}.{{- columns.ts }} ) {% endmacro %} diff --git a/macros/relationships/append_only/aggregate_before.sql b/macros/relationships/append_only/aggregate_before.sql index cda8e96..39ef270 100644 --- a/macros/relationships/append_only/aggregate_before.sql +++ b/macros/relationships/append_only/aggregate_before.sql @@ -1,11 +1,8 @@ -{% macro aggregate_before_join_clause(i) %} - -{% set primary = dbt_activity_schema.primary %} +{% macro aggregate_before_join_clause(i, primary, appended) %} {% set columns = dbt_activity_schema.columns() %} -{% set appended = dbt_activity_schema.appended %} ( - {{ appended() }}.{{- columns.ts }} < {{ primary() }}.{{- columns.ts }} + {{ appended }}.{{- columns.ts }} < {{ primary }}.{{- columns.ts }} ) {% endmacro %} diff --git a/macros/relationships/append_only/aggregate_in_between.sql b/macros/relationships/append_only/aggregate_in_between.sql index 1fee142..dde943c 100644 --- a/macros/relationships/append_only/aggregate_in_between.sql +++ b/macros/relationships/append_only/aggregate_in_between.sql @@ -1,14 +1,11 @@ -{% macro aggregate_in_between_join_clause(i) %} - -{% set primary = dbt_activity_schema.primary %} +{% macro aggregate_in_between_join_clause(i, primary, appended) %} {% set columns = dbt_activity_schema.columns() %} -{% set appended = dbt_activity_schema.appended %} ( - {{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }} + {{ appended }}.{{- columns.ts }} > {{ primary }}.{{- columns.ts }} and ( - {{ appended() }}.{{- columns.ts }} <= {{ primary() }}.{{- columns.activity_repeated_at }} - or {{ primary() }}.{{- columns.activity_repeated_at }} is null + {{ appended }}.{{- columns.ts }} <= {{ primary }}.{{- columns.activity_repeated_at }} + or {{ primary }}.{{- columns.activity_repeated_at }} is null ) ) {% endmacro %} diff --git a/macros/relationships/append_only/first_after.sql b/macros/relationships/append_only/first_after.sql index 2399db6..85c5148 100644 --- a/macros/relationships/append_only/first_after.sql +++ b/macros/relationships/append_only/first_after.sql @@ -1,11 +1,8 @@ -{% macro first_after_join_clause(i) %} - -{% set primary = dbt_activity_schema.primary %} +{% macro first_after_join_clause(i, primary, appended) %} {% set columns = dbt_activity_schema.columns() %} -{% set appended = dbt_activity_schema.appended %} ( - {{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }} + {{ appended }}.{{- columns.ts }} > {{ primary }}.{{- columns.ts }} ) {% endmacro %} diff --git a/macros/relationships/append_only/first_before.sql b/macros/relationships/append_only/first_before.sql index 49d880e..0b6048e 100644 --- a/macros/relationships/append_only/first_before.sql +++ b/macros/relationships/append_only/first_before.sql @@ -1,12 +1,9 @@ -{% macro first_before_join_clause(i) %} - -{% set primary = dbt_activity_schema.primary %} +{% macro first_before_join_clause(i, primary, appended) %} {% set columns = dbt_activity_schema.columns() %} -{% set appended = dbt_activity_schema.appended %} ( - {{ appended() }}.{{ columns.activity_occurrence }} = 1 - and {{ appended() }}.{{- columns.ts }} <= coalesce({{ primary() }}.{{- columns.activity_repeated_at }}, '2100-01-01'::timestamp) + {{ appended }}.{{ columns.activity_occurrence }} = 1 + and {{ appended }}.{{- columns.ts }} <= coalesce({{ primary }}.{{- columns.activity_repeated_at }}, '2100-01-01'::timestamp) ) {% endmacro %} diff --git a/macros/relationships/append_only/first_in_between.sql b/macros/relationships/append_only/first_in_between.sql index 20d801d..90094c4 100644 --- a/macros/relationships/append_only/first_in_between.sql +++ b/macros/relationships/append_only/first_in_between.sql @@ -1,14 +1,11 @@ -{% macro first_in_between_join_clause(i) %} - -{% set primary = dbt_activity_schema.primary %} +{% macro first_in_between_join_clause(i, primary, appended) %} {% set columns = dbt_activity_schema.columns() %} -{% set appended = dbt_activity_schema.appended %} ( - {{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }} + {{ appended }}.{{- columns.ts }} > {{ primary }}.{{- columns.ts }} and ( - {{ appended() }}.{{- columns.ts }} <= {{ primary() }}.{{- columns.activity_repeated_at }} - or {{ primary() }}.{{- columns.activity_repeated_at }} is null + {{ appended }}.{{- columns.ts }} <= {{ primary }}.{{- columns.activity_repeated_at }} + or {{ primary }}.{{- columns.activity_repeated_at }} is null ) ) {% endmacro %} diff --git a/macros/relationships/append_only/last_after.sql b/macros/relationships/append_only/last_after.sql index df43454..ea01099 100644 --- a/macros/relationships/append_only/last_after.sql +++ b/macros/relationships/append_only/last_after.sql @@ -1,11 +1,8 @@ -{% macro last_after_join_clause(i) %} - -{% set primary = dbt_activity_schema.primary %} +{% macro last_after_join_clause(i, primary, appended) %} {% set columns = dbt_activity_schema.columns() %} -{% set appended = dbt_activity_schema.appended %} ( - {{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }} + {{ appended }}.{{- columns.ts }} > {{ primary }}.{{- columns.ts }} ) {% endmacro %} diff --git a/macros/relationships/append_only/last_before.sql b/macros/relationships/append_only/last_before.sql index 485ee21..06d91b9 100644 --- a/macros/relationships/append_only/last_before.sql +++ b/macros/relationships/append_only/last_before.sql @@ -1,11 +1,8 @@ -{% macro last_before_join_clause(i) %} - -{% set primary = dbt_activity_schema.primary %} +{% macro last_before_join_clause(i, primary, appended) %} {% set columns = dbt_activity_schema.columns() %} -{% set appended = dbt_activity_schema.appended %} ( - {{ appended() }}.{{- columns.ts }} <= coalesce({{ primary() }}.{{- columns.ts }}, '1900-01-01'::timestamp) + {{ appended }}.{{- columns.ts }} <= coalesce({{ primary }}.{{- columns.ts }}, '1900-01-01'::timestamp) ) {% endmacro %} diff --git a/macros/relationships/append_only/last_in_between.sql b/macros/relationships/append_only/last_in_between.sql index 8f988d5..be6aa14 100644 --- a/macros/relationships/append_only/last_in_between.sql +++ b/macros/relationships/append_only/last_in_between.sql @@ -1,14 +1,11 @@ -{% macro last_in_between_join_clause(i) %} - -{% set primary = dbt_activity_schema.primary %} +{% macro last_in_between_join_clause(i, primary, appended) %} {% set columns = dbt_activity_schema.columns() %} -{% set appended = dbt_activity_schema.appended %} ( - {{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }} + {{ appended }}.{{- columns.ts }} > {{ primary }}.{{- columns.ts }} and ( - {{ appended() }}.{{- columns.ts }} <= {{ primary() }}.{{- columns.activity_repeated_at }} - or {{ primary() }}.{{- columns.activity_repeated_at }} is null + {{ appended }}.{{- columns.ts }} <= {{ primary }}.{{- columns.activity_repeated_at }} + or {{ primary }}.{{- columns.activity_repeated_at }} is null ) ) {% endmacro %} From e76b3b2bd34cb7ed015f4cd8638cf71b90062ad6 Mon Sep 17 00:00:00 2001 From: ctpita Date: Wed, 10 Apr 2024 12:01:10 -0400 Subject: [PATCH 2/2] wip: do not call string --- macros/dataset.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/dataset.sql b/macros/dataset.sql index f50e304..1dd5f80 100644 --- a/macros/dataset.sql +++ b/macros/dataset.sql @@ -88,7 +88,7 @@ dataset as ( ) {# Additional Join Condition relies on primary and appended subquery/CTE names #} {% if activity.additional_join_condition is string %} - and ( {{ activity.additional_join_condition(primary=primary(), appended=appended) }} ) + and ( {{ activity.additional_join_condition }} ) {% else %} and ( {{ activity.additional_join_condition(primary(), appended) }} ) {% endif %}