From 7f81bcb520457fed9e23ac0dbfa33a2f27d59691 Mon Sep 17 00:00:00 2001 From: Robbie Date: Sun, 9 Jun 2024 20:00:17 +0100 Subject: [PATCH] feat(web-analytics): Add bugfix for bounce rate workaround (#22827) * Add bugfix for bounce rate workaround * Parametrize the bounce rate test across different pageview modes * Add some more comments --- posthog/hogql/database/schema/sessions.py | 6 ++++- .../database/schema/test/test_sessions.py | 27 +++++++++++-------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/posthog/hogql/database/schema/sessions.py b/posthog/hogql/database/schema/sessions.py index 8e6f547ea17ba..586da5d102206 100644 --- a/posthog/hogql/database/schema/sessions.py +++ b/posthog/hogql/database/schema/sessions.py @@ -205,6 +205,7 @@ def arg_max_merge_field(field_name: str) -> ast.Call: aggregate_fields["$is_bounce"] = ast.Call( name="if", args=[ + # if pageview_count is 0, return NULL so it doesn't contribute towards the bounce rate either way ast.Call(name="equals", args=[bounce_pageview_count, ast.Constant(value=0)]), ast.Constant(value=None), ast.Call( @@ -213,10 +214,13 @@ def arg_max_merge_field(field_name: str) -> ast.Call: ast.Call( name="or", args=[ - ast.Call(name="greater", args=[aggregate_fields["$pageview_count"], ast.Constant(value=1)]), + # if > 1 pageview, not a bounce + ast.Call(name="greater", args=[bounce_pageview_count, ast.Constant(value=1)]), + # if > 0 autocapture events, not a bounce ast.Call( name="greater", args=[aggregate_fields["$autocapture_count"], ast.Constant(value=0)] ), + # if session duration >= 10 seconds, not a bounce ast.Call( name="greaterOrEquals", args=[aggregate_fields["$session_duration"], ast.Constant(value=10)], diff --git a/posthog/hogql/database/schema/test/test_sessions.py b/posthog/hogql/database/schema/test/test_sessions.py index aebaec24b6bdd..53e13beee53c5 100644 --- a/posthog/hogql/database/schema/test/test_sessions.py +++ b/posthog/hogql/database/schema/test/test_sessions.py @@ -1,8 +1,11 @@ +from parameterized import parameterized + from posthog.hogql import ast from posthog.hogql.database.schema.sessions import get_lazy_session_table_properties from posthog.hogql.parser import parse_select from posthog.hogql.query import execute_hogql_query from posthog.models.property_definition import PropertyType +from posthog.schema import HogQLQueryModifiers, BounceRatePageViewMode from posthog.test.base import ( APIBaseTest, ClickhouseTestMixin, @@ -139,27 +142,28 @@ def test_persons_and_sessions_on_events(self): self.assertEqual(row1, (p1.uuid, "source1")) self.assertEqual(row2, (p2.uuid, "source2")) - def test_bounce_rate(self): + @parameterized.expand([(BounceRatePageViewMode.uniq_urls,), (BounceRatePageViewMode.count_pageviews,)]) + def test_bounce_rate(self, bounceRatePageViewMode): # person with 2 different sessions _create_event( event="$pageview", team=self.team, distinct_id="d1", - properties={"$session_id": "s1a"}, + properties={"$session_id": "s1a", "$current_url": "https://example.com/1"}, timestamp="2023-12-02", ) _create_event( event="$pageview", team=self.team, distinct_id="d1", - properties={"$session_id": "s1a"}, + properties={"$session_id": "s1a", "$current_url": "https://example.com/2"}, timestamp="2023-12-03", ) _create_event( event="$pageview", team=self.team, distinct_id="d1", - properties={"$session_id": "s1b"}, + properties={"$session_id": "s1b", "$current_url": "https://example.com/3"}, timestamp="2023-12-12", ) # session with 1 pageview @@ -167,7 +171,7 @@ def test_bounce_rate(self): event="$pageview", team=self.team, distinct_id="d2", - properties={"$session_id": "s2"}, + properties={"$session_id": "s2", "$current_url": "https://example.com/4"}, timestamp="2023-12-11", ) # session with 1 pageview and 1 autocapture @@ -175,14 +179,14 @@ def test_bounce_rate(self): event="$pageview", team=self.team, distinct_id="d3", - properties={"$session_id": "s3"}, + properties={"$session_id": "s3", "$current_url": "https://example.com/5"}, timestamp="2023-12-11", ) _create_event( event="$autocapture", team=self.team, distinct_id="d3", - properties={"$session_id": "s3"}, + properties={"$session_id": "s3", "$current_url": "https://example.com/5"}, timestamp="2023-12-11", ) # short session with a pageleave @@ -190,14 +194,14 @@ def test_bounce_rate(self): event="$pageview", team=self.team, distinct_id="d4", - properties={"$session_id": "s4"}, + properties={"$session_id": "s4", "$current_url": "https://example.com/6"}, timestamp="2023-12-11T12:00:00", ) _create_event( event="$pageleave", team=self.team, distinct_id="d4", - properties={"$session_id": "s4"}, + properties={"$session_id": "s4", "$current_url": "https://example.com/6"}, timestamp="2023-12-11T12:00:01", ) # long session with a pageleave @@ -205,14 +209,14 @@ def test_bounce_rate(self): event="$pageview", team=self.team, distinct_id="d5", - properties={"$session_id": "s5"}, + properties={"$session_id": "s5", "$current_url": "https://example.com/7"}, timestamp="2023-12-11T12:00:00", ) _create_event( event="$pageleave", team=self.team, distinct_id="d5", - properties={"$session_id": "s5"}, + properties={"$session_id": "s5", "$current_url": "https://example.com/7"}, timestamp="2023-12-11T12:00:11", ) response = execute_hogql_query( @@ -220,6 +224,7 @@ def test_bounce_rate(self): "select $is_bounce, session_id from sessions ORDER BY session_id", ), self.team, + modifiers=HogQLQueryModifiers(bounceRatePageViewMode=bounceRatePageViewMode), ) self.assertEqual( [