Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: [Patch] Reduce CTE repetition #41

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 42 additions & 66 deletions macros/dataset.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ params:

{% set columns = dbt_activity_schema.columns() %}
{% set primary = dbt_activity_schema.primary %}
{% set appended = dbt_activity_schema.appended %}
{% set alias_cte = dbt_activity_schema.alias_cte %}
{% set alias_column = dbt_activity_schema.alias_column %}
{% set alias_appended_activity = dbt_activity_schema.alias_appended_activity %}
Expand All @@ -39,91 +38,68 @@ params:

with

filter_activity_stream_using_primary_activity as (
dataset as (
select
{% for col in primary_activity.included_columns + primary_activity.required_columns %}
{{ dbt_activity_schema.parse_column(primary(), col) }} as {{ col }}{%- if not loop.last -%},{%- endif %}
{{ dbt_activity_schema.parse_column(primary(), col) }} as {{ col }},
{% endfor %}

from {{ activity_stream }} as {{ primary() }}

where {{ primary() }}.{{ columns.activity }} = {{ dbt.string_literal(primary_activity.name) }}
and {{ primary_activity.relationship.where_clause }}
),

{% for activity in appended_activities %}{% set i = loop.index %}

{{ alias_cte(activity, i) }} as (
select

-- Primary Activity Columns
{% for col in primary_activity.included_columns + primary_activity.required_columns %}
{{ primary() }}.{{- col }},
{% endfor %}

{% for col in activity.included_columns %}
{%- set parsed_col = dbt_activity_schema.parse_column(appended(), col) -%}
{% call activity.relationship.aggregation_func() %}
{{ parsed_col }}
{% endcall %} as {{ dbt_activity_schema.alias_appended_activity(activity, col) }}
{% if not loop.last %},{% endif %}
{% for activity in appended_activities %}{% set i = loop.index %}{% set last_outer_loop = loop.last %}
{% for col in activity.included_columns %}
{% call activity.relationship.aggregation_func() %}
{{ dbt_activity_schema.parse_column(alias_cte(activity, i), col) }}
{% endcall %} as {{ dbt_activity_schema.alias_appended_activity(activity, col) }}
{% if not (loop.last and last_outer_loop) %},{% endif %}
{% endfor %}
{% endfor %}

from filter_activity_stream_using_primary_activity as {{ primary() }}

left join {{ activity_stream }} as {{ appended() }}
from (
select *
from {{ activity_stream }}
where
{{ columns.activity }} = {{ dbt.string_literal(primary_activity.name) }}
and {{ primary_activity.relationship.where_clause }}
) as {{ primary() }}
{% for activity in appended_activities %}{% set i = loop.index %}
{% set appended = alias_cte(activity, i) %}
left join (
select *
from {{ activity_stream }}
where {{ columns.activity }} = {{ dbt.string_literal(activity.name) }}
) as {{ appended }}
on (
-- Join on Customer UUID Column
{{ appended() }}.{{ columns.customer }} = {{ primary() }}.{{ columns.customer }}

-- Join the Correct Activity
and {{ appended() }}.{{- columns.activity }} = {{ dbt.string_literal(activity.name) }}
{{ appended }}.{{ columns.customer }} = {{ primary() }}.{{ columns.customer }}

-- Relationship Specific Join Conditions
and (
{% if activity.relationship.name == "nth_ever" %}
{# nth_ever_join_clause relies on instantiated nth_occurance arg, in
addition to the i passed to the join #}
{% if activity.relationship.name == "nth_ever" %}
{{ activity.relationship.join_clause(activity.relationship.nth_occurance) }}
{% else %}
{{ activity.relationship.join_clause(relationship.nth_occurance, appended) }}
{% elif activity.relationship.name in ("first_ever", "last_ever") %}
{# relies on appended subquery/CTE name #}
{{ activity.relationship.join_clause(appended) }}
{% elif activity.relationship.name in ("all_ever", "aggregate_all_ever") %}
{# doesn't rely on anything #}
{{ activity.relationship.join_clause() }}
{% else %}
{# These need primary and appended subquery/CTE names, the zero is unused #}
{{ activity.relationship.join_clause(0, primary(), appended) }}
{% endif %}
)
-- Additional Join Condition
{# Additional Join Condition relies on primary and appended subquery/CTE names #}
{% if activity.additional_join_condition is string %}
and ( {{ activity.additional_join_condition }} )
{% else %}
and ( {{ activity.additional_join_condition(primary(), appended) }} )
{% endif %}
)

{% endfor %}
group by
{% for col in primary_activity.included_columns + primary_activity.required_columns %}
{{ primary() }}.{{ col }}{%- if not loop.last -%},{%- endif %}
{% endfor %}
),

{% endfor %}

rejoin_aggregated_activities as (
select

{% for col in primary_activity.included_columns %}
{{ primary() }}.{{ col }},
{% endfor %}

{% for activity in appended_activities %}{% set i = loop.index %}{% set last_outer_loop = loop.last %}
{% for col in activity.included_columns %}
{{ alias_cte(activity, i) }}.{{ alias_appended_activity(activity, col) }}{% if not (last_outer_loop and loop.last) %},{% endif %}
{% endfor %}
{{ primary() }}.{{ col }}{%- if not loop.last -%},{%- endif %}
{% endfor %}

from filter_activity_stream_using_primary_activity as {{ primary() }}

{% for activity in appended_activities %}{% set i = loop.index %}

left join {{ alias_cte(activity, i) }}
on {{ alias_cte(activity, i) }}.{{ columns.activity_id }} = {{ primary() }}.{{ columns.activity_id }}

{% endfor %}
)

select * from rejoin_aggregated_activities
select * from dataset

{% endmacro %}
7 changes: 2 additions & 5 deletions macros/relationships/append_only/aggregate_after.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
{% macro aggregate_after_join_clause(i) %}

{% set primary = dbt_activity_schema.primary %}
{% macro aggregate_after_join_clause(i, primary, appended) %}
{% set columns = dbt_activity_schema.columns() %}
{% set appended = dbt_activity_schema.appended %}

(
{{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }}
{{ appended }}.{{- columns.ts }} > {{ primary }}.{{- columns.ts }}
)
{% endmacro %}

Expand Down
7 changes: 2 additions & 5 deletions macros/relationships/append_only/aggregate_before.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
{% macro aggregate_before_join_clause(i) %}

{% set primary = dbt_activity_schema.primary %}
{% macro aggregate_before_join_clause(i, primary, appended) %}
{% set columns = dbt_activity_schema.columns() %}
{% set appended = dbt_activity_schema.appended %}

(
{{ appended() }}.{{- columns.ts }} < {{ primary() }}.{{- columns.ts }}
{{ appended }}.{{- columns.ts }} < {{ primary }}.{{- columns.ts }}
)
{% endmacro %}

Expand Down
11 changes: 4 additions & 7 deletions macros/relationships/append_only/aggregate_in_between.sql
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
{% macro aggregate_in_between_join_clause(i) %}

{% set primary = dbt_activity_schema.primary %}
{% macro aggregate_in_between_join_clause(i, primary, appended) %}
{% set columns = dbt_activity_schema.columns() %}
{% set appended = dbt_activity_schema.appended %}

(
{{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }}
{{ appended }}.{{- columns.ts }} > {{ primary }}.{{- columns.ts }}
and (
{{ appended() }}.{{- columns.ts }} <= {{ primary() }}.{{- columns.activity_repeated_at }}
or {{ primary() }}.{{- columns.activity_repeated_at }} is null
{{ appended }}.{{- columns.ts }} <= {{ primary }}.{{- columns.activity_repeated_at }}
or {{ primary }}.{{- columns.activity_repeated_at }} is null
)
)
{% endmacro %}
Expand Down
7 changes: 2 additions & 5 deletions macros/relationships/append_only/first_after.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
{% macro first_after_join_clause(i) %}

{% set primary = dbt_activity_schema.primary %}
{% macro first_after_join_clause(i, primary, appended) %}
{% set columns = dbt_activity_schema.columns() %}
{% set appended = dbt_activity_schema.appended %}

(
{{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }}
{{ appended }}.{{- columns.ts }} > {{ primary }}.{{- columns.ts }}
)
{% endmacro %}

Expand Down
9 changes: 3 additions & 6 deletions macros/relationships/append_only/first_before.sql
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
{% macro first_before_join_clause(i) %}

{% set primary = dbt_activity_schema.primary %}
{% macro first_before_join_clause(i, primary, appended) %}
{% set columns = dbt_activity_schema.columns() %}
{% set appended = dbt_activity_schema.appended %}

(
{{ appended() }}.{{ columns.activity_occurrence }} = 1
and {{ appended() }}.{{- columns.ts }} <= coalesce({{ primary() }}.{{- columns.activity_repeated_at }}, '2100-01-01'::timestamp)
{{ appended }}.{{ columns.activity_occurrence }} = 1
and {{ appended }}.{{- columns.ts }} <= coalesce({{ primary }}.{{- columns.activity_repeated_at }}, '2100-01-01'::timestamp)
)
{% endmacro %}

Expand Down
11 changes: 4 additions & 7 deletions macros/relationships/append_only/first_in_between.sql
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
{% macro first_in_between_join_clause(i) %}

{% set primary = dbt_activity_schema.primary %}
{% macro first_in_between_join_clause(i, primary, appended) %}
{% set columns = dbt_activity_schema.columns() %}
{% set appended = dbt_activity_schema.appended %}

(
{{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }}
{{ appended }}.{{- columns.ts }} > {{ primary }}.{{- columns.ts }}
and (
{{ appended() }}.{{- columns.ts }} <= {{ primary() }}.{{- columns.activity_repeated_at }}
or {{ primary() }}.{{- columns.activity_repeated_at }} is null
{{ appended }}.{{- columns.ts }} <= {{ primary }}.{{- columns.activity_repeated_at }}
or {{ primary }}.{{- columns.activity_repeated_at }} is null
)
)
{% endmacro %}
Expand Down
7 changes: 2 additions & 5 deletions macros/relationships/append_only/last_after.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
{% macro last_after_join_clause(i) %}

{% set primary = dbt_activity_schema.primary %}
{% macro last_after_join_clause(i, primary, appended) %}
{% set columns = dbt_activity_schema.columns() %}
{% set appended = dbt_activity_schema.appended %}

(
{{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }}
{{ appended }}.{{- columns.ts }} > {{ primary }}.{{- columns.ts }}
)
{% endmacro %}

Expand Down
7 changes: 2 additions & 5 deletions macros/relationships/append_only/last_before.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
{% macro last_before_join_clause(i) %}

{% set primary = dbt_activity_schema.primary %}
{% macro last_before_join_clause(i, primary, appended) %}
{% set columns = dbt_activity_schema.columns() %}
{% set appended = dbt_activity_schema.appended %}

(
{{ appended() }}.{{- columns.ts }} <= coalesce({{ primary() }}.{{- columns.ts }}, '1900-01-01'::timestamp)
{{ appended }}.{{- columns.ts }} <= coalesce({{ primary }}.{{- columns.ts }}, '1900-01-01'::timestamp)
)
{% endmacro %}

Expand Down
11 changes: 4 additions & 7 deletions macros/relationships/append_only/last_in_between.sql
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
{% macro last_in_between_join_clause(i) %}

{% set primary = dbt_activity_schema.primary %}
{% macro last_in_between_join_clause(i, primary, appended) %}
{% set columns = dbt_activity_schema.columns() %}
{% set appended = dbt_activity_schema.appended %}

(
{{ appended() }}.{{- columns.ts }} > {{ primary() }}.{{- columns.ts }}
{{ appended }}.{{- columns.ts }} > {{ primary }}.{{- columns.ts }}
and (
{{ appended() }}.{{- columns.ts }} <= {{ primary() }}.{{- columns.activity_repeated_at }}
or {{ primary() }}.{{- columns.activity_repeated_at }} is null
{{ appended }}.{{- columns.ts }} <= {{ primary }}.{{- columns.activity_repeated_at }}
or {{ primary }}.{{- columns.activity_repeated_at }} is null
)
)
{% endmacro %}
Expand Down