Skip to content

Commit

Permalink
fix(web-analytics): Fix channel type (#19378)
Browse files Browse the repository at this point in the history
Fix channel type
  • Loading branch information
robbie-c authored Dec 17, 2023
1 parent 8ea18ca commit 7ba143b
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 124 deletions.
211 changes: 89 additions & 122 deletions posthog/hogql_queries/web_analytics/stats_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ class WebStatsTableQueryRunner(WebAnalyticsQueryRunner):
query_type = WebStatsTableQuery

def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
# special case for channel, as some hogql features to use the general code are still being worked on
if self.query.breakdownBy == WebStatsBreakdown.InitialChannelType:
return self.to_channel_query()

with self.timings.measure("bounce_rate_query"):
bounce_rate_query = parse_select(
BOUNCE_RATE_CTE,
Expand Down Expand Up @@ -94,67 +98,7 @@ def counts_breakdown(self):
case WebStatsBreakdown.Page:
return ast.Field(chain=["properties", "$pathname"])
case WebStatsBreakdown.InitialChannelType:
# use this for now, switch to person.$virt_initial_channel_type when it's working. If fixing or adding
# anything to this, keep in sync with channel_type.py
return parse_expr(
"""
multiIf(
match(person.properties.$initial_utm_campaign, 'cross-network'),
'Cross Network',
(
match(person.properties.$initial_utm_medium, '^(.*cp.*|ppc|retargeting|paid.*)$') OR
person.properties.$initial_gclid IS NOT NULL OR
person.properties.$initial_gad_source IS NOT NULL
),
coalesce(
hogql_lookupPaidSourceType(person.properties.$initial_utm_source),
hogql_lookupPaidDomainType(person.properties.$initial_referring_domain),
if(
match(person.properties.$initial_utm_campaign, '^(.*(([^a-df-z]|^)shop|shopping).*)$'),
'Paid Shopping',
NULL
),
hogql_lookupPaidMediumType(person.properties.$initial_utm_medium),
multiIf (
person.properties.$initial_gad_source = '1',
'Paid Search',
match(person.properties.$initial_utm_campaign, '^(.*video.*)$'),
'Paid Video',
'Paid Other'
)
),
(
person.properties.$initial_referring_domain = '$direct'
AND (person.properties.$initial_utm_medium IS NULL OR person.properties.$initial_utm_medium = '')
AND (person.properties.$initial_utm_source IS NULL OR person.properties.$initial_utm_source IN ('', '(direct)', 'direct'))
),
'Direct',
coalesce(
hogql_lookupOrganicSourceType(person.properties.$initial_utm_source),
hogql_lookupOrganicDomainType(person.properties.$initial_referring_domain),
if(
match(person.properties.$initial_utm_campaign, '^(.*(([^a-df-z]|^)shop|shopping).*)$'),
'Organic Shopping',
NULL
),
hogql_lookupOrganicMediumType(person.properties.$initial_utm_medium),
multiIf(
match(person.properties.$initial_utm_campaign, '^(.*video.*)$'),
'Organic Video',
match(person.properties.$initial_utm_medium, 'push$'),
'Push',
NULL
)
)
)"""
)
raise NotImplementedError("Breakdown InitialChannelType not implemented")
case WebStatsBreakdown.InitialPage:
return ast.Field(chain=["person", "properties", "$initial_pathname"])
case WebStatsBreakdown.InitialReferringDomain:
Expand Down Expand Up @@ -192,67 +136,7 @@ def bounce_breakdown(self):
# use initial pathname for bounce rate
return ast.Call(name="any", args=[ast.Field(chain=["person", "properties", "$initial_pathname"])])
case WebStatsBreakdown.InitialChannelType:
# use this for now, switch to person.$virt_initial_channel_type when it's working. If fixing or adding
# anything to this, keep in sync with channel_type.py
return parse_expr(
"""
multiIf(
match(any(person.properties.$initial_utm_campaign), 'cross-network'),
'Cross Network',
(
match(any(person.properties.$initial_utm_medium), '^(.*cp.*|ppc|retargeting|paid.*)$') OR
any(person.properties.$initial_gclid) IS NOT NULL OR
any(person.properties.$initial_gad_source) IS NOT NULL
),
coalesce(
hogql_lookupPaidSourceType(any(person.properties.$initial_utm_source)),
hogql_lookupPaidDomainType(any(person.properties.$initial_referring_domain)),
if(
match(any(person.properties.$initial_utm_campaign), '^(.*(([^a-df-z]|^)shop|shopping).*)$'),
'Paid Shopping',
NULL
),
hogql_lookupPaidMediumType(any(person.properties.$initial_utm_medium)),
multiIf (
any(person.properties.$initial_gad_source) = '1',
'Paid Search',
match(any(person.properties.$initial_utm_campaign), '^(.*video.*)$'),
'Paid Video',
'Paid Other'
)
),
(
any(person.properties.$initial_referring_domain) = '$direct'
AND (any(person.properties.$initial_utm_medium) IS NULL OR any(person.properties.$initial_utm_medium) = '')
AND (any(person.properties.$initial_utm_source) IS NULL OR any(person.properties.$initial_utm_source) IN ('', '(direct)', 'direct'))
),
'Direct',
coalesce(
hogql_lookupOrganicSourceType(any(person.properties.$initial_utm_source)),
hogql_lookupOrganicDomainType(any(person.properties.$initial_referring_domain)),
if(
match(any(person.properties.$initial_utm_campaign), '^(.*(([^a-df-z]|^)shop|shopping).*)$'),
'Organic Shopping',
NULL
),
hogql_lookupOrganicMediumType(any(person.properties.$initial_utm_medium)),
multiIf(
match(any(person.properties.$initial_utm_campaign), '^(.*video.*)$'),
'Organic Video',
match(any(person.properties.$initial_utm_medium), 'push$'),
'Push',
NULL
)
)
)"""
)
raise NotImplementedError("Breakdown InitialChannelType not implemented")
case _:
return ast.Call(name="any", args=[self.counts_breakdown()])

Expand All @@ -276,3 +160,86 @@ def where_breakdown(self):
return parse_expr("TRUE") # actually show null values
case _:
return parse_expr('"context.columns.breakdown_value" IS NOT NULL')

def to_channel_query(self):
with self.timings.measure("channel_query"):
top_sources_query = parse_select(
"""
SELECT
multiIf(
match(person.properties.$initial_utm_campaign, 'cross-network'),
'Cross Network',
(
match(person.properties.$initial_utm_medium, '^(.*cp.*|ppc|retargeting|paid.*)$') OR
person.properties.$initial_gclid IS NOT NULL OR
person.properties.$initial_gad_source IS NOT NULL
),
coalesce(
hogql_lookupPaidSourceType(person.properties.$initial_utm_source),
hogql_lookupPaidDomainType(person.properties.$initial_referring_domain),
if(
match(person.properties.$initial_utm_campaign, '^(.*(([^a-df-z]|^)shop|shopping).*)$'),
'Paid Shopping',
NULL
),
hogql_lookupPaidMediumType(person.properties.$initial_utm_medium),
multiIf (
person.properties.$initial_gad_source = '1',
'Paid Search',
match(person.properties.$initial_utm_campaign, '^(.*video.*)$'),
'Paid Video',
'Paid Other'
)
),
(
person.properties.$initial_referring_domain = '$direct'
AND (person.properties.$initial_utm_medium IS NULL OR person.properties.$initial_utm_medium = '')
AND (person.properties.$initial_utm_source IS NULL OR person.properties.$initial_utm_source IN ('', '(direct)', 'direct'))
),
'Direct',
coalesce(
hogql_lookupOrganicSourceType(person.properties.$initial_utm_source),
hogql_lookupOrganicDomainType(person.properties.$initial_referring_domain),
if(
match(person.properties.$initial_utm_campaign, '^(.*(([^a-df-z]|^)shop|shopping).*)$'),
'Organic Shopping',
NULL
),
hogql_lookupOrganicMediumType(person.properties.$initial_utm_medium),
multiIf(
match(person.properties.$initial_utm_campaign, '^(.*video.*)$'),
'Organic Video',
match(person.properties.$initial_utm_medium, 'push$'),
'Push',
NULL
)
)
) AS "context.columns.breakdown_value",
count() as "context.columns.views",
uniq(events.person_id) as "context.columns.visitors"
FROM
events
WHERE
(event = '$pageview')
AND ({counts_where})
GROUP BY "context.columns.breakdown_value"
ORDER BY
"context.columns.views" DESC,
"context.columns.breakdown_value" DESC
LIMIT 10
""",
timings=self.timings,
backend="cpp",
placeholders={
"counts_where": self.events_where(),
},
)

return top_sources_query
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,6 @@ def test_breakdown_channel_type_doesnt_throw(self):
).results

self.assertEqual(
[(None, 2, 1, 0)],
results,
1,
len(results),
)

0 comments on commit 7ba143b

Please sign in to comment.