Skip to content

Commit

Permalink
feat(web-analytics): Turn timezones into UTC offsets instead of timez…
Browse files Browse the repository at this point in the history
…one names (#26409)
  • Loading branch information
rafaeelaudibert authored Nov 26, 2024
1 parent 90a2854 commit c0652cf
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 26 deletions.
22 changes: 22 additions & 0 deletions frontend/src/scenes/web-analytics/tiles/WebAnalyticsTile.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,23 @@ import { DataTableNode, InsightVizNode, NodeKind, QuerySchema, WebStatsBreakdown
import { QueryContext, QueryContextColumnComponent, QueryContextColumnTitleComponent } from '~/queries/types'
import { ChartDisplayType, GraphPointPayload, InsightLogicProps, ProductKey, PropertyFilterType } from '~/types'

const toUtcOffsetFormat = (value: number): string => {
if (value === 0) {
return 'UTC'
}

const integerPart = Math.floor(value)
const sign = integerPart > 0 ? '+' : '-'

// India has half-hour offsets, and Australia has 45-minute offsets, why?
const decimalPart = value - integerPart
const decimalPartAsMinutes = decimalPart * 60
const formattedMinutes = decimalPartAsMinutes > 0 ? `:${decimalPartAsMinutes}` : ''

// E.g. UTC-3, UTC, UTC+5:30, UTC+11:45
return `UTC${sign}${integerPart}${formattedMinutes}`
}

const PercentageCell: QueryContextColumnComponent = ({ value }) => {
if (typeof value === 'number') {
return <span>{`${(value * 100).toFixed(1)}%`}</span>
Expand Down Expand Up @@ -121,6 +138,11 @@ const BreakdownValueCell: QueryContextColumnComponent = (props) => {
)
}
break
case WebStatsBreakdown.Timezone:
if (typeof value === 'number') {
return <>{toUtcOffsetFormat(value)}</>
}
break
}

if (typeof value === 'string') {
Expand Down
4 changes: 3 additions & 1 deletion posthog/hogql/functions/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,13 +421,15 @@ def compare_types(arg_types: list[ConstantType], sig_arg_types: tuple[ConstantTy
"toString": HogQLFunctionMeta(
"toString",
1,
1,
2,
signatures=[
((IntegerType(),), StringType()),
((StringType(),), StringType()),
((FloatType(),), StringType()),
((DateType(),), StringType()),
((DateType(), StringType()), StringType()),
((DateTimeType(),), StringType()),
((DateTimeType(), StringType()), StringType()),
],
),
"toJSONString": HogQLFunctionMeta("toJSONString", 1, 1),
Expand Down
13 changes: 10 additions & 3 deletions posthog/hogql_queries/web_analytics/stats_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def to_main_query(self) -> ast.SelectQuery:
)
GROUP BY "context.columns.breakdown_value"
ORDER BY "context.columns.visitors" DESC,
"context.columns.views" DESC,
"context.columns.breakdown_value" ASC
""",
timings=self.timings,
Expand Down Expand Up @@ -118,6 +119,7 @@ def _to_main_query_with_session_properties(self) -> ast.SelectQuery:
)
GROUP BY "context.columns.breakdown_value"
ORDER BY "context.columns.visitors" DESC,
"context.columns.views" DESC,
"context.columns.breakdown_value" ASC
""",
timings=self.timings,
Expand Down Expand Up @@ -162,6 +164,7 @@ def to_entry_bounce_query(self) -> ast.SelectQuery:
)
GROUP BY "context.columns.breakdown_value"
ORDER BY "context.columns.visitors" DESC,
"context.columns.views" DESC,
"context.columns.breakdown_value" ASC
""",
timings=self.timings,
Expand Down Expand Up @@ -269,6 +272,7 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery:
) AS scroll
ON counts.breakdown_value = scroll.breakdown_value
ORDER BY "context.columns.visitors" DESC,
"context.columns.views" DESC,
"context.columns.breakdown_value" ASC
""",
timings=self.timings,
Expand Down Expand Up @@ -346,6 +350,7 @@ def to_path_bounce_query(self) -> ast.SelectQuery:
) as bounce
ON counts.breakdown_value = bounce.breakdown_value
ORDER BY "context.columns.visitors" DESC,
"context.columns.views" DESC,
"context.columns.breakdown_value" ASC
""",
timings=self.timings,
Expand Down Expand Up @@ -500,9 +505,11 @@ def _counts_breakdown_value(self):
case WebStatsBreakdown.CITY:
return parse_expr("tuple(properties.$geoip_country_code, properties.$geoip_city_name)")
case WebStatsBreakdown.TIMEZONE:
# Timezone offsets would be slightly more useful, but that's not easily achievable
# with Clickhouse, we might attempt to change this in the future
return ast.Field(chain=["properties", "$timezone"])
# Get the difference between the UNIX timestamp at UTC and the UNIX timestamp at the event's timezone
# Value is in milliseconds, turn it to hours, works even for fractional timezone offsets (I'm looking at you, Australia)
return parse_expr(
"if(or(isNull(properties.$timezone), empty(properties.$timezone)), NULL, (toUnixTimestamp64Milli(parseDateTimeBestEffort(assumeNotNull(toString(timestamp, properties.$timezone)))) - toUnixTimestamp64Milli(timestamp)) / 3600000)"
)
case _:
raise NotImplementedError("Breakdown not implemented")

Expand Down
136 changes: 114 additions & 22 deletions posthog/hogql_queries/web_analytics/test/test_web_stats_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1006,50 +1006,142 @@ def test_cohort_test_filters(self):
assert results == [["/path1", 1, 1]]

def test_timezone_filter(self):
d1, s1 = "d1", str(uuid7("2024-07-30"))
d2, s2 = "d2", str(uuid7("2024-07-30"))
date = "2024-07-30"

for idx, (distinct_id, session_id) in enumerate(
[
("UTC", str(uuid7(date))),
("Asia/Calcutta", str(uuid7(date))),
("America/New_York", str(uuid7(date))),
("America/Sao_Paulo", str(uuid7(date))),
]
):
_create_person(
team_id=self.team.pk,
distinct_ids=[distinct_id],
properties={"name": session_id, "email": f"{distinct_id}@example.com"},
)

for i in range(idx + 1):
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
timestamp=date,
properties={"$session_id": session_id, "$pathname": f"/path{i}", "$timezone": distinct_id},
)

results = self._run_web_stats_table_query(
"all",
None,
breakdown_by=WebStatsBreakdown.TIMEZONE,
).results

# Brasilia UTC-3, New York UTC-4, Calcutta UTC+5:30, UTC
assert results == [[-3.0, 1.0, 4.0], [-4.0, 1.0, 3.0], [5.5, 1.0, 2.0], [0.0, 1.0, 1.0]]

def test_timezone_filter_dst_change(self):
did = "id"
sid = str(uuid7("2019-02-17"))

_create_person(
team_id=self.team.pk,
distinct_ids=[d1],
properties={"name": d1, "email": "[email protected]"},
distinct_ids=[did],
properties={"name": sid, "email": f"[email protected]"},
)

# Cross daylight savings time change in Brazil
for i in range(6):
_create_event(
team=self.team,
event="$pageview",
distinct_id=did,
timestamp=f"2019-02-17 0{i}:00:00",
properties={"$session_id": sid, "$pathname": f"/path1", "$timezone": "America/Sao_Paulo"},
)

results = self._run_web_stats_table_query(
"all",
None,
breakdown_by=WebStatsBreakdown.TIMEZONE,
).results

# Change from UTC-2 to UTC-3 in the middle of the night
assert results == [[-3.0, 1.0, 4.0], [-2.0, 1.0, 2.0]]

def test_timezone_filter_with_invalid_timezone(self):
date = "2024-07-30"

for idx, (distinct_id, session_id) in enumerate(
[
("UTC", str(uuid7(date))),
("Timezone_not_exists", str(uuid7(date))),
]
):
_create_person(
team_id=self.team.pk,
distinct_ids=[distinct_id],
properties={"name": session_id, "email": f"{distinct_id}@example.com"},
)

for i in range(idx + 1):
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
timestamp=date,
properties={"$session_id": session_id, "$pathname": f"/path{i}", "$timezone": distinct_id},
)

with self.assertRaisesRegex(Exception, "Cannot load time zone"):
self._run_web_stats_table_query(
"all",
None,
breakdown_by=WebStatsBreakdown.TIMEZONE,
)

def test_timezone_filter_with_empty_timezone(self):
did = "id"
sid = str(uuid7("2019-02-17"))

_create_person(
team_id=self.team.pk,
distinct_ids=[did],
properties={"name": sid, "email": f"[email protected]"},
)

# Key not exists
_create_event(
team=self.team,
event="$pageview",
distinct_id=d1,
timestamp="2024-07-30",
properties={"$session_id": s1, "$pathname": "/path1", "$timezone": "America/New_York"},
distinct_id=did,
timestamp=f"2019-02-17 00:00:00",
properties={"$session_id": sid, "$pathname": f"/path1"},
)

_create_person(
team_id=self.team.pk,
distinct_ids=[d2],
properties={"name": d2, "email": "[email protected]"},
)
# Key exists, it's null
_create_event(
team=self.team,
event="$pageview",
distinct_id=d2,
timestamp="2024-07-30",
properties={"$session_id": s2, "$pathname": "/path2", "$timezone": "America/Brasilia"},
distinct_id=did,
timestamp=f"2019-02-17 00:00:00",
properties={"$session_id": sid, "$pathname": f"/path1", "$timezone": None},
)

# Key exists, it's empty string
_create_event(
team=self.team,
event="$pageview",
distinct_id=d2,
timestamp="2024-07-30",
properties={"$session_id": s2, "$pathname": "/path3", "$timezone": "America/Brasilia"},
distinct_id=did,
timestamp=f"2019-02-17 00:00:00",
properties={"$session_id": sid, "$pathname": f"/path1", "$timezone": ""},
)

flush_persons_and_events()

results = self._run_web_stats_table_query(
"all",
None,
breakdown_by=WebStatsBreakdown.TIMEZONE,
filter_test_accounts=True,
).results

assert results == [["America/Brasilia", 1.0, 2.0], ["America/New_York", 1.0, 1.0]]
# Don't crash, treat all of them null
assert results == []

0 comments on commit c0652cf

Please sign in to comment.