Skip to content

Commit

Permalink
feat(web-analytics): Handle null values better in session properties (#…
Browse files Browse the repository at this point in the history
…23289)

* Handle null values better in session properties

* Update session replay query test

* Remove unusef

* Update query snapshots

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
2 people authored and fuziontech committed Jul 2, 2024
1 parent b47619c commit 70deafe
Show file tree
Hide file tree
Showing 8 changed files with 188 additions and 34 deletions.
8 changes: 6 additions & 2 deletions frontend/src/scenes/web-analytics/WebAnalyticsTile.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ export const WebStatsTableTile = ({
const { key, type } = webStatsBreakdownToPropertyName(breakdownBy) || {}

const onClick = useCallback(
(breakdownValue: string) => {
(breakdownValue: string | null) => {
if (!key || !type) {
return
}
Expand Down Expand Up @@ -400,7 +400,7 @@ export const WebStatsTableTile = ({
)
}

const getBreakdownValue = (record: unknown, breakdownBy: WebStatsBreakdown): string | undefined => {
const getBreakdownValue = (record: unknown, breakdownBy: WebStatsBreakdown): string | null | undefined => {
if (typeof record !== 'object' || !record || !('result' in record)) {
return undefined
}
Expand Down Expand Up @@ -429,6 +429,10 @@ const getBreakdownValue = (record: unknown, breakdownBy: WebStatsBreakdown): str
break
}

if (breakdownValue === null) {
return null // null is a valid value, as opposed to undefined which signals that there isn't a valid value
}

if (typeof breakdownValue !== 'string') {
return undefined
}
Expand Down
27 changes: 25 additions & 2 deletions frontend/src/scenes/web-analytics/webAnalyticsLogic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ export const webAnalyticsLogic = kea<webAnalyticsLogicType>([
togglePropertyFilter: (
type: PropertyFilterType.Event | PropertyFilterType.Person | PropertyFilterType.Session,
key: string,
value: string | number,
value: string | number | null,
tabChange?: {
graphsTab?: string
sourceTab?: string
Expand Down Expand Up @@ -245,14 +245,37 @@ export const webAnalyticsLogic = kea<webAnalyticsLogicType>([
{
setWebAnalyticsFilters: (_, { webAnalyticsFilters }) => webAnalyticsFilters,
togglePropertyFilter: (oldPropertyFilters, { key, value, type }): WebAnalyticsPropertyFilters => {
if (value === null) {
// if there's already an isNotSet filter, remove it
const isNotSetFilterExists = oldPropertyFilters.some(
(f) => f.type === type || f.key === key || f.operator === PropertyOperator.IsNotSet
)
if (isNotSetFilterExists) {
return oldPropertyFilters.filter(
(f) => f.type !== type || f.key !== key || f.operator !== PropertyOperator.IsNotSet
)
}
return [
...oldPropertyFilters,
{
type,
key,
operator: PropertyOperator.IsNotSet,
},
]
}
const similarFilterExists = oldPropertyFilters.some(
(f) => f.type === type && f.key === key && f.operator === PropertyOperator.Exact
)
if (similarFilterExists) {
// if there's already a matching property, turn it off or merge them
return oldPropertyFilters
.map((f) => {
if (f.key !== key || f.type !== type || f.operator !== PropertyOperator.Exact) {
if (
f.key !== key ||
f.type !== type ||
![PropertyOperator.Exact, PropertyOperator.IsNotSet].includes(f.operator)
) {
return f
}
const oldValue = (Array.isArray(f.value) ? f.value : [f.value]).filter(isNotNil)
Expand Down
24 changes: 14 additions & 10 deletions posthog/hogql/database/schema/sessions_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,16 @@ def arg_max_merge_field(field_name: str) -> ast.Call:
)
],
),
"$entry_current_url": arg_min_merge_field("entry_url"),
"$exit_current_url": arg_max_merge_field("exit_url"),
"$entry_utm_source": arg_min_merge_field("initial_utm_source"),
"$entry_utm_campaign": arg_min_merge_field("initial_utm_campaign"),
"$entry_utm_medium": arg_min_merge_field("initial_utm_medium"),
"$entry_utm_term": arg_min_merge_field("initial_utm_term"),
"$entry_utm_content": arg_min_merge_field("initial_utm_content"),
"$entry_referring_domain": arg_min_merge_field("initial_referring_domain"),
"$entry_gclid": arg_min_merge_field("initial_gclid"),
"$entry_gad_source": arg_min_merge_field("initial_gad_source"),
"$entry_current_url": null_if_empty(arg_min_merge_field("entry_url")),
"$exit_current_url": null_if_empty(arg_max_merge_field("exit_url")),
"$entry_utm_source": null_if_empty(arg_min_merge_field("initial_utm_source")),
"$entry_utm_campaign": null_if_empty(arg_min_merge_field("initial_utm_campaign")),
"$entry_utm_medium": null_if_empty(arg_min_merge_field("initial_utm_medium")),
"$entry_utm_term": null_if_empty(arg_min_merge_field("initial_utm_term")),
"$entry_utm_content": null_if_empty(arg_min_merge_field("initial_utm_content")),
"$entry_referring_domain": null_if_empty(arg_min_merge_field("initial_referring_domain")),
"$entry_gclid": null_if_empty(arg_min_merge_field("initial_gclid")),
"$entry_gad_source": null_if_empty(arg_min_merge_field("initial_gad_source")),
"$event_count_map": ast.Call(
name="sumMap",
args=[ast.Field(chain=[table_name, "event_count_map"])],
Expand Down Expand Up @@ -420,3 +420,7 @@ def get_lazy_session_table_values_v1(key: str, search_term: Optional[str], team:
return [["1"], ["0"]]

return []


def null_if_empty(expr: ast.Expr) -> ast.Call:
return ast.Call(name="nullIf", args=[expr, ast.Constant(value="")])
23 changes: 12 additions & 11 deletions posthog/hogql/database/schema/sessions_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
LazyJoinToAdd,
)
from posthog.hogql.database.schema.channel_type import create_channel_type_expr, POSSIBLE_CHANNEL_TYPES
from posthog.hogql.database.schema.sessions_v1 import null_if_empty
from posthog.hogql.database.schema.util.where_clause_extractor import SessionMinTimestampWhereClauseExtractorV2
from posthog.hogql.errors import ResolutionError
from posthog.models.property_definition import PropertyType
Expand Down Expand Up @@ -185,20 +186,20 @@ def arg_max_merge_field(field_name: str) -> ast.Call:
)
],
),
"$entry_current_url": arg_min_merge_field("entry_url"),
"$end_current_url": arg_max_merge_field("end_url"),
"$entry_utm_source": arg_min_merge_field("initial_utm_source"),
"$entry_utm_campaign": arg_min_merge_field("initial_utm_campaign"),
"$entry_utm_medium": arg_min_merge_field("initial_utm_medium"),
"$entry_utm_term": arg_min_merge_field("initial_utm_term"),
"$entry_utm_content": arg_min_merge_field("initial_utm_content"),
"$entry_referring_domain": arg_min_merge_field("initial_referring_domain"),
"$entry_gclid": arg_min_merge_field("initial_gclid"),
"$entry_gad_source": arg_min_merge_field("initial_gad_source"),
"$entry_current_url": null_if_empty(arg_min_merge_field("entry_url")),
"$end_current_url": null_if_empty(arg_max_merge_field("end_url")),
"$entry_utm_source": null_if_empty(arg_min_merge_field("initial_utm_source")),
"$entry_utm_campaign": null_if_empty(arg_min_merge_field("initial_utm_campaign")),
"$entry_utm_medium": null_if_empty(arg_min_merge_field("initial_utm_medium")),
"$entry_utm_term": null_if_empty(arg_min_merge_field("initial_utm_term")),
"$entry_utm_content": null_if_empty(arg_min_merge_field("initial_utm_content")),
"$entry_referring_domain": null_if_empty(arg_min_merge_field("initial_referring_domain")),
"$entry_gclid": null_if_empty(arg_min_merge_field("initial_gclid")),
"$entry_gad_source": null_if_empty(arg_min_merge_field("initial_gad_source")),
"$pageview_count": ast.Call(name="sum", args=[ast.Field(chain=[table_name, "pageview_count"])]),
"$screen_count": ast.Call(name="sum", args=[ast.Field(chain=[table_name, "screen_count"])]),
"$autocapture_count": ast.Call(name="sum", args=[ast.Field(chain=[table_name, "autocapture_count"])]),
"$last_external_click_url": arg_max_merge_field("last_external_click_url"),
"$last_external_click_url": null_if_empty(arg_max_merge_field("last_external_click_url")),
}
# Alias
aggregate_fields["id"] = aggregate_fields["session_id"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -478,21 +478,21 @@ def test_session_replay_query(self):
)
expected = f"""SELECT
s.session_id AS session_id,
min(toTimeZone(s.min_first_timestamp, %(hogql_val_5)s)) AS start_time
min(toTimeZone(s.min_first_timestamp, %(hogql_val_6)s)) AS start_time
FROM
session_replay_events AS s
LEFT JOIN (SELECT
path(nullIf(argMinMerge(sessions.entry_url), %(hogql_val_0)s)) AS `$entry_pathname`,
path(nullIf(nullIf(argMinMerge(sessions.entry_url), %(hogql_val_0)s), %(hogql_val_1)s)) AS `$entry_pathname`,
sessions.session_id AS session_id
FROM
sessions
WHERE
and(equals(sessions.team_id, {self.team.id}), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, %(hogql_val_1)s), toIntervalDay(3)), %(hogql_val_2)s), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, %(hogql_val_3)s), toIntervalDay(3)), now64(6, %(hogql_val_4)s)), 0))
and(equals(sessions.team_id, {self.team.id}), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, %(hogql_val_2)s), toIntervalDay(3)), %(hogql_val_3)s), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, %(hogql_val_4)s), toIntervalDay(3)), now64(6, %(hogql_val_5)s)), 0))
GROUP BY
sessions.session_id,
sessions.session_id) AS s__session ON equals(s.session_id, s__session.session_id)
WHERE
and(equals(s.team_id, {self.team.id}), ifNull(equals(s__session.`$entry_pathname`, %(hogql_val_6)s), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_7)s), %(hogql_val_8)s), 0), ifNull(less(toTimeZone(s.min_first_timestamp, %(hogql_val_9)s), now64(6, %(hogql_val_10)s)), 0))
and(equals(s.team_id, {self.team.id}), ifNull(equals(s__session.`$entry_pathname`, %(hogql_val_7)s), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_8)s), %(hogql_val_9)s), 0), ifNull(less(toTimeZone(s.min_first_timestamp, %(hogql_val_10)s), now64(6, %(hogql_val_11)s)), 0))
GROUP BY
s.session_id
LIMIT 50000"""
Expand Down
8 changes: 6 additions & 2 deletions posthog/hogql/property.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,11 @@ def property_to_expr(
else:
chain = ["properties"]

properties_field = ast.Field(chain=chain)
if property.type == "session":
properties_field = None
else:
properties_field = ast.Field(chain=chain)

field = ast.Field(chain=[*chain, property.key])

if isinstance(value, list):
Expand Down Expand Up @@ -219,7 +223,7 @@ def property_to_expr(
]
+ (
[]
if properties_field == field
if not properties_field or properties_field == field
else [
ast.Not(
expr=ast.Call(
Expand Down
19 changes: 17 additions & 2 deletions posthog/hogql_queries/web_analytics/stats_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
PersonPropertyFilter,
)

BREAKDOWN_NULL_DISPLAY = "(none)"


class WebStatsTableQueryRunner(WebAnalyticsQueryRunner):
query: WebStatsTableQuery
Expand Down Expand Up @@ -468,8 +470,17 @@ def _counts_breakdown_value(self):
case WebStatsBreakdown.INITIAL_CHANNEL_TYPE:
return ast.Field(chain=["session", "$channel_type"])
case WebStatsBreakdown.INITIAL_UTM_SOURCE_MEDIUM_CAMPAIGN:
return parse_expr(
"concatWithSeparator(' / ', coalesce(nullIf(session.$entry_utm_source, ''), nullIf(session.$entry_referring_domain, ''), '(null)'), coalesce(nullIf(session.$entry_utm_medium, ''), '(null)'), coalesce(nullIf(session.$entry_utm_campaign, ''), '(null)'))"
return ast.Call(
name="concatWithSeparator",
args=[
ast.Constant(value=" / "),
coalesce_with_null_display(
ast.Field(chain=["session", "$entry_utm_source"]),
ast.Field(chain=["session", "$entry_referring_domain"]),
),
coalesce_with_null_display(ast.Field(chain=["session", "$entry_utm_medium"])),
coalesce_with_null_display(ast.Field(chain=["session", "$entry_utm_campaign"])),
],
)
case WebStatsBreakdown.BROWSER:
return ast.Field(chain=["properties", "$browser"])
Expand Down Expand Up @@ -530,3 +541,7 @@ def _apply_path_cleaning(self, path_expr: ast.Expr) -> ast.Expr:
)

return path_expr


def coalesce_with_null_display(*exprs: ast.Expr) -> ast.Expr:
return ast.Call(name="coalesce", args=[*exprs, ast.Constant(value=BREAKDOWN_NULL_DISPLAY)])
105 changes: 104 additions & 1 deletion posthog/hogql_queries/web_analytics/test/test_web_stats_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,109 @@ def test_source_medium_campaign(self, session_table_version: SessionTableVersion
).results

self.assertEqual(
[["google / (null) / (null)", 1, 1], ["news.ycombinator.com / referral / (null)", 1, 1]],
[["google / (none) / (none)", 1, 1], ["news.ycombinator.com / referral / (none)", 1, 1]],
results,
)

@parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]])
def test_null_in_utm_tags(self, session_table_version: SessionTableVersion):
d1 = "d1"
s1 = str(uuid7("2024-06-26"))

_create_person(
team_id=self.team.pk,
distinct_ids=[d1],
properties={
"name": d1,
},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id=d1,
timestamp="2024-06-26",
properties={"$session_id": s1, "utm_source": "google"},
)

d2 = "d2"
s2 = str(uuid7("2024-06-26"))
_create_person(
team_id=self.team.pk,
distinct_ids=[d2],
properties={
"name": d2,
},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id=d2,
timestamp="2024-06-26",
properties={
"$session_id": s2,
},
)

results = self._run_web_stats_table_query(
"all",
"2024-06-27",
breakdown_by=WebStatsBreakdown.INITIAL_UTM_SOURCE,
session_table_version=session_table_version,
).results

self.assertEqual(
[["google", 1.0, 1.0], [None, 1.0, 1.0]],
results,
)

@parameterized.expand([[SessionTableVersion.V1], [SessionTableVersion.V2]])
def test_is_not_set_filter(self, session_table_version: SessionTableVersion):
d1 = "d1"
s1 = str(uuid7("2024-06-26"))

_create_person(
team_id=self.team.pk,
distinct_ids=[d1],
properties={
"name": d1,
},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id=d1,
timestamp="2024-06-26",
properties={"$session_id": s1, "utm_source": "google"},
)

d2 = "d2"
s2 = str(uuid7("2024-06-26"))
_create_person(
team_id=self.team.pk,
distinct_ids=[d2],
properties={
"name": d2,
},
)
_create_event(
team=self.team,
event="$pageview",
distinct_id=d2,
timestamp="2024-06-26",
properties={
"$session_id": s2,
},
)

results = self._run_web_stats_table_query(
"all",
"2024-06-27",
breakdown_by=WebStatsBreakdown.INITIAL_UTM_SOURCE,
properties=[EventPropertyFilter(key="utm_source", operator=PropertyOperator.IS_NOT_SET)],
session_table_version=session_table_version,
).results

self.assertEqual(
[[None, 1.0, 1.0]],
results,
)

0 comments on commit 70deafe

Please sign in to comment.