Skip to content

Commit

Permalink
feat(web-analytics): Add Sessions Table V2 (#23023)
Browse files Browse the repository at this point in the history
* Add raw_sessions table

* Fix

* Change time chunking to 5 minutes

* Add modes of operation, and some comments

* WIP wire up sessions table V2

* More working v2 sessions tests

* Optimize imports

* Fix v1 sessions table test

* Fix more tests

* Fix channel type tests

* Fix session replay joining with v2

* Web analytics queries and their tests working

* Fix where clause extractor tests for v1

* Fix backfill script

* Show last select query instead of first

* Run ruff

* Fix ids in tests

* Fix database init

* spelling

* Fix test_query

* Formatting

* Handle session properties with v2 session table

* Add more columns, fix some properties

* Add new properties to taxonomy

* Fix trends tests

* Fix modifiers

* Set v1 sessions table to default

* Capture viewport size

* Fix keyword arg rename

* Update query snapshots

* Update query snapshots

* Fix test_utils

* Fix test_trends

* Make it easier to run test_parser_cpp from pytcharm

* Update query snapshots

* Update query snapshots

* Add last external click url to the sessions MV

* Update query snapshots

* Add test_last_external_click_url

* Add ingest from date

* Run schema build after a rebase

* Tweak test_all

* Update query snapshots

* Run schema after rebase

* Update query snapshots

* Update UI snapshots for `chromium` (2)

* Update UI snapshots for `chromium` (2)

* Change ingestion date and add explaining comment

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
robbie-c and github-actions[bot] authored Jun 25, 2024
1 parent 4dc0bf2 commit bf8f4da
Show file tree
Hide file tree
Showing 44 changed files with 2,777 additions and 369 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
15 changes: 15 additions & 0 deletions frontend/src/lib/taxonomy.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1036,8 +1036,18 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = {
$entry_pathname: {
label: 'Entry pathname',
description: <span>The first pathname visited in this session</span>,
examples: ['/interesting-article?parameter=true'],
},
$end_current_url: {
label: 'Entry URL',
description: <span>The first URL visited in this session</span>,
examples: ['https://example.com/interesting-article?parameter=true'],
},
$end_pathname: {
label: 'Entry pathname',
description: <span>The first pathname visited in this session</span>,
examples: ['/interesting-article?parameter=true'],
},
$exit_current_url: {
label: 'Exit URL',
description: <span>The last URL visited in this session</span>,
Expand All @@ -1058,6 +1068,11 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = {
description: <span>The number of autocapture events in this session</span>,
examples: ['123'],
},
$screen_count: {
label: 'Screen count',
description: <span>The number of screen events in this session</span>,
examples: ['123'],
},
$channel_type: {
label: 'Channel type',
description: <span>What type of acquisition channel this traffic came from.</span>,
Expand Down
4 changes: 4 additions & 0 deletions frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -4377,6 +4377,10 @@
},
"s3TableUseInvalidColumns": {
"type": "boolean"
},
"sessionTableVersion": {
"enum": ["auto", "v1", "v2"],
"type": "string"
}
},
"type": "object"
Expand Down
1 change: 1 addition & 0 deletions frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ export interface HogQLQueryModifiers {
s3TableUseInvalidColumns?: boolean
personsJoinMode?: 'inner' | 'left'
bounceRatePageViewMode?: 'count_pageviews' | 'uniq_urls'
sessionTableVersion?: 'auto' | 'v1' | 'v2'
}

export interface DataWarehouseEventsModifier {
Expand Down
23 changes: 20 additions & 3 deletions posthog/api/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,20 @@
from rest_framework.exceptions import ValidationError

from posthog.api.routing import TeamAndOrgViewSetMixin
from posthog.hogql.database.schema.sessions import get_lazy_session_table_properties, get_lazy_session_table_values
from posthog.hogql.database.schema.sessions_v1 import (
get_lazy_session_table_properties_v1,
get_lazy_session_table_values_v1,
)
from posthog.hogql.database.schema.sessions_v2 import (
get_lazy_session_table_values_v2,
get_lazy_session_table_properties_v2,
)
from posthog.hogql.modifiers import create_default_modifiers_for_team
from posthog.rate_limit import (
ClickHouseBurstRateThrottle,
ClickHouseSustainedRateThrottle,
)
from posthog.schema import SessionTableVersion
from posthog.utils import convert_property_value, flatten


Expand All @@ -30,7 +39,11 @@ def values(self, request: request.Request, **kwargs) -> response.Response:
if not key:
raise ValidationError(detail=f"Key not provided")

result = get_lazy_session_table_values(key, search_term=search_term, team=team)
modifiers = create_default_modifiers_for_team(team)
if modifiers.sessionTableVersion == SessionTableVersion.V2:
result = get_lazy_session_table_values_v2(key, search_term=search_term, team=team)
else:
result = get_lazy_session_table_values_v1(key, search_term=search_term, team=team)

flattened = []
for value in result:
Expand All @@ -47,7 +60,11 @@ def property_definitions(self, request: request.Request, **kwargs) -> response.R

# unlike e.g. event properties, there's a very limited number of session properties,
# so we can just return them all
results = get_lazy_session_table_properties(search)
modifiers = create_default_modifiers_for_team(self.team)
if modifiers.sessionTableVersion == SessionTableVersion.V2:
results = get_lazy_session_table_properties_v2(search)
else:
results = get_lazy_session_table_properties_v1(search)
return response.Response(
{
"count": len(results),
Expand Down
8 changes: 4 additions & 4 deletions posthog/api/test/__snapshots__/test_properties_timeline.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@
ORDER BY timestamp ASC ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING) AS end_event_number
FROM
(SELECT timestamp, person_properties AS properties,
array(replaceRegexpAll(JSONExtractRaw(person_properties, 'foo'), '^"|"$', ''), replaceRegexpAll(JSONExtractRaw(person_properties, 'bar'), '^"|"$', '')) AS relevant_property_values,
array(replaceRegexpAll(JSONExtractRaw(person_properties, 'bar'), '^"|"$', ''), replaceRegexpAll(JSONExtractRaw(person_properties, 'foo'), '^"|"$', '')) AS relevant_property_values,
lagInFrame(relevant_property_values) OVER (
ORDER BY timestamp ASC ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS previous_relevant_property_values,
row_number() OVER (
Expand Down Expand Up @@ -482,7 +482,7 @@
ORDER BY timestamp ASC ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING) AS end_event_number
FROM
(SELECT timestamp, person_properties AS properties,
array("mat_pp_foo", "mat_pp_bar") AS relevant_property_values,
array("mat_pp_bar", "mat_pp_foo") AS relevant_property_values,
lagInFrame(relevant_property_values) OVER (
ORDER BY timestamp ASC ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS previous_relevant_property_values,
row_number() OVER (
Expand Down Expand Up @@ -522,7 +522,7 @@
ORDER BY timestamp ASC ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING) AS end_event_number
FROM
(SELECT timestamp, person_properties AS properties,
array(replaceRegexpAll(JSONExtractRaw(person_properties, 'foo'), '^"|"$', ''), replaceRegexpAll(JSONExtractRaw(person_properties, 'bar'), '^"|"$', '')) AS relevant_property_values,
array(replaceRegexpAll(JSONExtractRaw(person_properties, 'bar'), '^"|"$', ''), replaceRegexpAll(JSONExtractRaw(person_properties, 'foo'), '^"|"$', '')) AS relevant_property_values,
lagInFrame(relevant_property_values) OVER (
ORDER BY timestamp ASC ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS previous_relevant_property_values,
row_number() OVER (
Expand Down Expand Up @@ -558,7 +558,7 @@
ORDER BY timestamp ASC ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING) AS end_event_number
FROM
(SELECT timestamp, person_properties AS properties,
array("mat_pp_foo", "mat_pp_bar") AS relevant_property_values,
array("mat_pp_bar", "mat_pp_foo") AS relevant_property_values,
lagInFrame(relevant_property_values) OVER (
ORDER BY timestamp ASC ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS previous_relevant_property_values,
row_number() OVER (
Expand Down
6 changes: 4 additions & 2 deletions posthog/api/test/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,27 @@
from rest_framework import status

from posthog.models.event.util import create_event
from posthog.models.utils import uuid7
from posthog.test.base import APIBaseTest


class TestSessionsAPI(APIBaseTest):
def setUp(self) -> None:
super().setUp()
s1 = str(uuid7())

create_event(
team=self.team,
event="$pageview",
distinct_id="d1",
properties={"$session_id": "s1", "utm_source": "google"},
properties={"$session_id": s1, "utm_source": "google"},
event_uuid=(uuid.uuid4()),
)
create_event(
team=self.team,
event="$pageview",
distinct_id="d1",
properties={"$session_id": "s1", "utm_source": "youtube"},
properties={"$session_id": s1, "utm_source": "youtube"},
event_uuid=(uuid.uuid4()),
)

Expand Down
16 changes: 16 additions & 0 deletions posthog/clickhouse/migrations/0064_sessions_with_uuidv7.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from posthog.clickhouse.client.migration_tools import run_sql_with_exceptions
from posthog.models.raw_sessions.sql import (
DISTRIBUTED_RAW_SESSIONS_TABLE_SQL,
WRITABLE_RAW_SESSIONS_TABLE_SQL,
RAW_SESSIONS_VIEW_SQL,
RAW_SESSIONS_TABLE_SQL,
RAW_SESSIONS_TABLE_MV_SQL,
)

operations = [
run_sql_with_exceptions(WRITABLE_RAW_SESSIONS_TABLE_SQL),
run_sql_with_exceptions(DISTRIBUTED_RAW_SESSIONS_TABLE_SQL),
run_sql_with_exceptions(RAW_SESSIONS_TABLE_SQL),
run_sql_with_exceptions(RAW_SESSIONS_TABLE_MV_SQL),
run_sql_with_exceptions(RAW_SESSIONS_VIEW_SQL),
]
10 changes: 10 additions & 0 deletions posthog/clickhouse/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@
PERSON_OVERRIDES_CREATE_MATERIALIZED_VIEW_SQL,
KAFKA_PERSON_OVERRIDES_TABLE_SQL,
)
from posthog.models.raw_sessions.sql import (
RAW_SESSIONS_TABLE_SQL,
DISTRIBUTED_RAW_SESSIONS_TABLE_SQL,
WRITABLE_RAW_SESSIONS_TABLE_SQL,
RAW_SESSIONS_TABLE_MV_SQL,
)
from posthog.models.sessions.sql import (
SESSIONS_TABLE_SQL,
SESSIONS_TABLE_MV_SQL,
Expand Down Expand Up @@ -125,6 +131,7 @@
SESSION_REPLAY_EVENTS_TABLE_SQL,
CHANNEL_DEFINITION_TABLE_SQL,
SESSIONS_TABLE_SQL,
RAW_SESSIONS_TABLE_SQL,
HEATMAPS_TABLE_SQL,
)
CREATE_DISTRIBUTED_TABLE_QUERIES = (
Expand All @@ -138,7 +145,9 @@
DISTRIBUTED_PERFORMANCE_EVENTS_TABLE_SQL,
DISTRIBUTED_SESSION_REPLAY_EVENTS_TABLE_SQL,
WRITABLE_SESSIONS_TABLE_SQL,
WRITABLE_RAW_SESSIONS_TABLE_SQL,
DISTRIBUTED_SESSIONS_TABLE_SQL,
DISTRIBUTED_RAW_SESSIONS_TABLE_SQL,
WRITABLE_HEATMAPS_TABLE_SQL,
DISTRIBUTED_HEATMAPS_TABLE_SQL,
)
Expand Down Expand Up @@ -177,6 +186,7 @@
PERFORMANCE_EVENTS_TABLE_MV_SQL,
SESSION_REPLAY_EVENTS_TABLE_MV_SQL,
SESSIONS_TABLE_MV_SQL,
RAW_SESSIONS_TABLE_MV_SQL,
HEATMAPS_TABLE_MV_SQL,
)

Expand Down
Loading

0 comments on commit bf8f4da

Please sign in to comment.