= [
diff --git a/frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx b/frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx
index 5b3859cf9e298..4fb41fe7261cc 100644
--- a/frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx
+++ b/frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx
@@ -6,7 +6,7 @@ import { NodeKind } from '~/queries/schema'
export function WebAnalyticsScene(): JSX.Element {
return (
- Top pages
+ Top sources
+ Top clicks
+
+ Top pages
+
)
}
diff --git a/posthog/api/query.py b/posthog/api/query.py
index cefe52b841f89..21ede66e8e91e 100644
--- a/posthog/api/query.py
+++ b/posthog/api/query.py
@@ -28,6 +28,8 @@
from posthog.hogql_queries.lifecycle_query_runner import LifecycleQueryRunner
from posthog.hogql_queries.trends_query_runner import TrendsQueryRunner
+from posthog.hogql_queries.web_analytics.top_clicks import TopClicksQueryRunner
+from posthog.hogql_queries.web_analytics.top_pages import TopPagesQueryRunner
from posthog.hogql_queries.web_analytics.top_sources import TopSourcesQueryRunner
from posthog.models import Team
from posthog.models.event.events_query import run_events_query
@@ -252,8 +254,17 @@ def process_query(
serializer.is_valid(raise_exception=True)
return get_session_events(serializer) or {}
elif query_kind == "WebTopSourcesQuery":
+ refresh_requested = refresh_requested_by_client(request) if request else False
top_sources_query_runner = TopSourcesQueryRunner(query_json, team)
- return _unwrap_pydantic_dict(top_sources_query_runner.run())
+ return _unwrap_pydantic_dict(top_sources_query_runner.run(refresh_requested=refresh_requested))
+ elif query_kind == "WebTopClicksQuery":
+ refresh_requested = refresh_requested_by_client(request) if request else False
+ top_clicks_query_runner = TopClicksQueryRunner(query_json, team)
+ return _unwrap_pydantic_dict(top_clicks_query_runner.run(refresh_requested=refresh_requested))
+ elif query_kind == "WebTopPagesQuery":
+ refresh_requested = refresh_requested_by_client(request) if request else False
+ top_pages_query_runner = TopPagesQueryRunner(query_json, team)
+ return _unwrap_pydantic_dict(top_pages_query_runner.run(refresh_requested=refresh_requested))
else:
if query_json.get("source"):
return process_query(team, query_json["source"])
diff --git a/posthog/hogql_queries/lifecycle_query_runner.py b/posthog/hogql_queries/lifecycle_query_runner.py
index c7bebc76be062..9583a9eb3bd38 100644
--- a/posthog/hogql_queries/lifecycle_query_runner.py
+++ b/posthog/hogql_queries/lifecycle_query_runner.py
@@ -11,14 +11,14 @@
from posthog.hogql.property import property_to_expr, action_to_expr
from posthog.hogql.query import execute_hogql_query
from posthog.hogql.timings import HogQLTimings
-from posthog.hogql_queries.query_runner import QueryRunner
+from posthog.hogql_queries.query_runner import InsightQueryRunner
from posthog.models import Team, Action
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.models.filters.mixins.utils import cached_property
from posthog.schema import LifecycleQuery, ActionsNode, EventsNode, LifecycleQueryResponse
-class LifecycleQueryRunner(QueryRunner):
+class LifecycleQueryRunner(InsightQueryRunner):
query: LifecycleQuery
query_type = LifecycleQuery
diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py
index 5dbd4850e599d..629e1b578984a 100644
--- a/posthog/hogql_queries/query_runner.py
+++ b/posthog/hogql_queries/query_runner.py
@@ -2,11 +2,12 @@
from datetime import datetime
from typing import Any, Generic, List, Optional, Type, Dict, TypeVar
-from prometheus_client import Counter
-from django.core.cache import cache
from django.conf import settings
+from django.core.cache import cache
+from prometheus_client import Counter
from pydantic import BaseModel, ConfigDict
+from posthog.caching.insights_api import BASE_MINIMUM_INSIGHT_REFRESH_INTERVAL
from posthog.clickhouse.query_tagging import tag_queries
from posthog.hogql import ast
from posthog.hogql.context import HogQLContext
@@ -15,7 +16,7 @@
from posthog.metrics import LABEL_TEAM_ID
from posthog.models import Team
from posthog.schema import QueryTiming
-from posthog.types import InsightQueryNode
+from posthog.types import InsightOrWebAnalyticsQueryNode
from posthog.utils import generate_cache_key, get_safe_cache
QUERY_CACHE_WRITE_COUNTER = Counter(
@@ -39,6 +40,8 @@ class QueryResponse(BaseModel, Generic[DataT]):
)
result: DataT
timings: Optional[List[QueryTiming]] = None
+ types: Optional[Any] = None
+ columns: Optional[Any] = None
class CachedQueryResponse(QueryResponse):
@@ -50,13 +53,15 @@ class CachedQueryResponse(QueryResponse):
next_allowed_client_refresh: str
-class QueryRunner(ABC):
- query: InsightQueryNode
- query_type: Type[InsightQueryNode]
+class BaseQueryRunner(ABC):
+ query: InsightOrWebAnalyticsQueryNode
+ query_type: Type[InsightOrWebAnalyticsQueryNode]
team: Team
timings: HogQLTimings
- def __init__(self, query: InsightQueryNode | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None):
+ def __init__(
+ self, query: InsightOrWebAnalyticsQueryNode | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None
+ ):
self.team = team
self.timings = timings or HogQLTimings()
if isinstance(query, self.query_type):
@@ -99,11 +104,6 @@ def run(self, refresh_requested: bool) -> CachedQueryResponse:
def to_query(self) -> ast.SelectQuery:
raise NotImplementedError()
- @abstractmethod
- def to_persons_query(self) -> str:
- # TODO: add support for selecting and filtering by breakdowns
- raise NotImplementedError()
-
def to_hogql(self) -> str:
with self.timings.measure("to_hogql"):
return print_ast(
@@ -116,7 +116,9 @@ def toJSON(self) -> str:
return self.query.model_dump_json(exclude_defaults=True, exclude_none=True)
def _cache_key(self) -> str:
- return generate_cache_key(f"query_{self.toJSON()}_{self.team.pk}_{self.team.timezone}")
+ return generate_cache_key(
+ f"query_{self.toJSON()}_{self.__class__.__name__}_{self.team.pk}_{self.team.timezone}"
+ )
@abstractmethod
def _is_stale(self, cached_result_package):
@@ -125,3 +127,18 @@ def _is_stale(self, cached_result_package):
@abstractmethod
def _refresh_frequency(self):
raise NotImplementedError()
+
+
+class InsightQueryRunner(BaseQueryRunner):
+ @abstractmethod
+ def to_persons_query(self) -> str:
+ # TODO: add support for selecting and filtering by breakdowns
+ raise NotImplementedError()
+
+
+class WebAnalyticsQueryRunner(BaseQueryRunner):
+ def _is_stale(self, cached_result_package):
+ return True
+
+ def _refresh_frequency(self):
+ return BASE_MINIMUM_INSIGHT_REFRESH_INTERVAL
diff --git a/posthog/hogql_queries/test/test_query_runner.py b/posthog/hogql_queries/test/test_query_runner.py
index d9af90a1e4ff9..2d014fee2d6cf 100644
--- a/posthog/hogql_queries/test/test_query_runner.py
+++ b/posthog/hogql_queries/test/test_query_runner.py
@@ -1,10 +1,12 @@
from datetime import datetime, timedelta
-from dateutil.parser import isoparse
-from zoneinfo import ZoneInfo
from typing import Any, List, Literal, Optional, Type
+from zoneinfo import ZoneInfo
+
+from dateutil.parser import isoparse
from freezegun import freeze_time
from pydantic import BaseModel
-from posthog.hogql_queries.query_runner import QueryResponse, QueryRunner
+
+from posthog.hogql_queries.query_runner import QueryResponse, BaseQueryRunner
from posthog.models.team.team import Team
from posthog.test.base import BaseTest
from posthog.types import InsightQueryNode
@@ -20,7 +22,7 @@ class QueryRunnerTest(BaseTest):
def setup_test_query_runner_class(self, query_class: Type[InsightQueryNode] = TestQuery): # type: ignore
"""Setup required methods and attributes of the abstract base class."""
- class TestQueryRunner(QueryRunner):
+ class TestQueryRunner(BaseQueryRunner):
query_type = query_class
def calculate(self) -> QueryResponse:
@@ -86,7 +88,20 @@ def test_cache_key(self):
runner = TestQueryRunner(query={"some_attr": "bla"}, team=team) # type: ignore
cache_key = runner._cache_key()
- self.assertEqual(cache_key, "cache_f0f2ce8b1f3d107b9671a178b25be2aa")
+ self.assertEqual(cache_key, "cache_33c9ea3098895d5a363a75feefafef06")
+
+ def test_cache_key_runner_subclass(self):
+ TestQueryRunner = self.setup_test_query_runner_class()
+
+ class TestSubclassQueryRunner(TestQueryRunner): # type: ignore
+ pass
+
+ team = Team.objects.create(pk=42, organization=self.organization)
+
+ runner = TestSubclassQueryRunner(query={"some_attr": "bla"}, team=team) # type: ignore
+
+ cache_key = runner._cache_key()
+ self.assertEqual(cache_key, "cache_d626615de8ad0df73c1d8610ca586597")
def test_cache_key_different_timezone(self):
TestQueryRunner = self.setup_test_query_runner_class()
@@ -97,7 +112,7 @@ def test_cache_key_different_timezone(self):
runner = TestQueryRunner(query={"some_attr": "bla"}, team=team) # type: ignore
cache_key = runner._cache_key()
- self.assertEqual(cache_key, "cache_0fa2172980705adb41741351f40189b7")
+ self.assertEqual(cache_key, "cache_aeb23ec9e8de56dd8499f99f2e976d5a")
def test_cache_response(self):
TestQueryRunner = self.setup_test_query_runner_class()
diff --git a/posthog/hogql_queries/trends_query_runner.py b/posthog/hogql_queries/trends_query_runner.py
index 373b55b32790b..076fa8b109dfc 100644
--- a/posthog/hogql_queries/trends_query_runner.py
+++ b/posthog/hogql_queries/trends_query_runner.py
@@ -13,7 +13,7 @@
from posthog.hogql.property import property_to_expr
from posthog.hogql.query import execute_hogql_query
from posthog.hogql.timings import HogQLTimings
-from posthog.hogql_queries.query_runner import QueryRunner
+from posthog.hogql_queries.query_runner import InsightQueryRunner
from posthog.hogql_queries.utils.formula_ast import FormulaAST
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.hogql_queries.utils.query_previous_period_date_range import QueryPreviousPeriodDateRange
@@ -31,7 +31,7 @@ def __init__(self, series: EventsNode | ActionsNode, is_previous_period_series:
self.is_previous_period_series = is_previous_period_series
-class TrendsQueryRunner(QueryRunner):
+class TrendsQueryRunner(InsightQueryRunner):
query: TrendsQuery
query_type = TrendsQuery
series: List[SeriesWithExtras]
diff --git a/posthog/hogql_queries/web_analytics/top_clicks.py b/posthog/hogql_queries/web_analytics/top_clicks.py
new file mode 100644
index 0000000000000..9c912a6b6f149
--- /dev/null
+++ b/posthog/hogql_queries/web_analytics/top_clicks.py
@@ -0,0 +1,53 @@
+from django.utils.timezone import datetime
+
+from posthog.hogql import ast
+from posthog.hogql.parser import parse_select
+from posthog.hogql.query import execute_hogql_query
+from posthog.hogql_queries.query_runner import WebAnalyticsQueryRunner
+from posthog.hogql_queries.utils.query_date_range import QueryDateRange
+from posthog.models.filters.mixins.utils import cached_property
+from posthog.schema import WebTopClicksQuery, WebTopClicksQueryResponse
+
+
+class TopClicksQueryRunner(WebAnalyticsQueryRunner):
+ query: WebTopClicksQuery
+ query_type = WebTopClicksQuery
+
+ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
+ with self.timings.measure("top_clicks_query"):
+ top_sources_query = parse_select(
+ """
+SELECT
+ properties.$el_text as el_text,
+ count() as total_clicks,
+ COUNT(DISTINCT events.person_id) as unique_visitors
+FROM
+ events
+WHERE
+ event == '$autocapture'
+AND events.timestamp >= now() - INTERVAL 7 DAY
+AND events.properties.$event_type = 'click'
+AND el_text IS NOT NULL
+GROUP BY
+ el_text
+ORDER BY total_clicks DESC
+ """,
+ timings=self.timings,
+ )
+ return top_sources_query
+
+ def calculate(self):
+ response = execute_hogql_query(
+ query_type="top_sources_query",
+ query=self.to_query(),
+ team=self.team,
+ timings=self.timings,
+ )
+
+ return WebTopClicksQueryResponse(
+ columns=response.columns, result=response.results, timings=response.timings, types=response.types
+ )
+
+ @cached_property
+ def query_date_range(self):
+ return QueryDateRange(date_range=self.query.dateRange, team=self.team, interval=None, now=datetime.now())
diff --git a/posthog/hogql_queries/web_analytics/top_pages.py b/posthog/hogql_queries/web_analytics/top_pages.py
new file mode 100644
index 0000000000000..1de2a2936c8dd
--- /dev/null
+++ b/posthog/hogql_queries/web_analytics/top_pages.py
@@ -0,0 +1,151 @@
+from django.utils.timezone import datetime
+
+from posthog.hogql import ast
+from posthog.hogql.parser import parse_select
+from posthog.hogql.query import execute_hogql_query
+from posthog.hogql_queries.query_runner import WebAnalyticsQueryRunner
+from posthog.hogql_queries.utils.query_date_range import QueryDateRange
+from posthog.models.filters.mixins.utils import cached_property
+from posthog.schema import WebTopPagesQuery, WebTopPagesQueryResponse
+
+
+class TopPagesQueryRunner(WebAnalyticsQueryRunner):
+ query: WebTopPagesQuery
+ query_type = WebTopPagesQuery
+
+ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
+ with self.timings.measure("top_pages_query"):
+ top_sources_query = parse_select(
+ """
+WITH
+
+scroll_depth_cte AS (
+SELECT
+ events.properties.`$prev_pageview_pathname` AS pathname,
+ countIf(events.event == '$pageview') as total_pageviews,
+ COUNT(DISTINCT events.properties.distinct_id) as unique_visitors, -- might want to use person id? have seen a small number of pages where unique > total
+ avg(CASE
+ WHEN events.properties.`$prev_pageview_max_content_percentage` IS NULL THEN NULL
+ WHEN events.properties.`$prev_pageview_max_content_percentage` > 0.8 THEN 100
+ ELSE 0
+ END) AS scroll_gt80_percentage,
+ avg(events.properties.$prev_pageview_max_scroll_percentage) * 100 as average_scroll_percentage
+FROM
+ events
+WHERE
+ (event = '$pageview' OR event = '$pageleave') AND events.properties.`$prev_pageview_pathname` IS NOT NULL
+ AND events.timestamp >= now() - INTERVAL 7 DAY
+GROUP BY pathname
+)
+
+,
+
+session_cte AS (
+SELECT
+ events.properties.`$session_id` AS session_id,
+ min(events.timestamp) AS min_timestamp,
+ max(events.timestamp) AS max_timestamp,
+ dateDiff('second', min_timestamp, max_timestamp) AS duration_s,
+
+ -- create a tuple so that these are grouped in the same order, see https://github.com/ClickHouse/ClickHouse/discussions/42338
+ groupArray((events.timestamp, events.properties.`$referrer`, events.properties.`$pathname`, events.properties.utm_source)) AS tuple_array,
+ arrayFirstIndex(x -> tupleElement(x, 1) == min_timestamp, tuple_array) as index_of_earliest,
+ arrayFirstIndex(x -> tupleElement(x, 1) == max_timestamp, tuple_array) as index_of_latest,
+ tupleElement(arrayElement(
+ tuple_array,
+ index_of_earliest
+ ), 2) AS earliest_referrer,
+ tupleElement(arrayElement(
+ tuple_array,
+ index_of_earliest
+ ), 3) AS earliest_pathname,
+ tupleElement(arrayElement(
+ tuple_array,
+ index_of_earliest
+ ), 4) AS earliest_utm_source,
+
+ if(domain(earliest_referrer) = '', earliest_referrer, domain(earliest_referrer)) AS referrer_domain,
+ multiIf(
+ earliest_utm_source IS NOT NULL, earliest_utm_source,
+ -- This will need to be an approach that scales better
+ referrer_domain == 'app.posthog.com', 'posthog',
+ referrer_domain == 'eu.posthog.com', 'posthog',
+ referrer_domain == 'posthog.com', 'posthog',
+ referrer_domain == 'www.google.com', 'google',
+ referrer_domain == 'www.google.co.uk', 'google',
+ referrer_domain == 'www.google.com.hk', 'google',
+ referrer_domain == 'www.google.de', 'google',
+ referrer_domain == 't.co', 'twitter',
+ referrer_domain == 'github.com', 'github',
+ referrer_domain == 'duckduckgo.com', 'duckduckgo',
+ referrer_domain == 'www.bing.com', 'bing',
+ referrer_domain == 'bing.com', 'bing',
+ referrer_domain == 'yandex.ru', 'yandex',
+ referrer_domain == 'quora.com', 'quora',
+ referrer_domain == 'www.quora.com', 'quora',
+ referrer_domain == 'linkedin.com', 'linkedin',
+ referrer_domain == 'www.linkedin.com', 'linkedin',
+ startsWith(referrer_domain, 'http://localhost:'), 'localhost',
+ referrer_domain
+ ) AS blended_source,
+
+ countIf(events.event == '$pageview') AS num_pageviews,
+ countIf(events.event == '$autocapture') AS num_autocaptures,
+ -- in v1 we'd also want to count whether there were any conversion events
+
+ any(events.person_id) as person_id,
+ -- definition of a GA4 bounce from here https://support.google.com/analytics/answer/12195621?hl=en
+ (num_autocaptures == 0 AND num_pageviews <= 1 AND duration_s < 10) AS is_bounce
+FROM
+ events
+WHERE
+ session_id IS NOT NULL
+AND
+ events.timestamp >= now() - INTERVAL 8 DAY
+GROUP BY
+ events.properties.`$session_id`
+HAVING
+ min_timestamp >= now() - INTERVAL 7 DAY
+)
+
+,
+
+bounce_rate_cte AS (
+SELECT session_cte.earliest_pathname,
+ avg(session_cte.is_bounce) as bounce_rate
+FROM session_cte
+GROUP BY earliest_pathname
+)
+
+
+
+SELECT scroll_depth_cte.pathname as pathname,
+scroll_depth_cte.total_pageviews as total_pageviews,
+scroll_depth_cte.unique_visitors as unique_visitors,
+scroll_depth_cte.scroll_gt80_percentage as scroll_gt80_percentage,
+scroll_depth_cte.average_scroll_percentage as average_scroll_percentage,
+bounce_rate_cte.bounce_rate as bounce_rate
+FROM
+ scroll_depth_cte LEFT OUTER JOIN bounce_rate_cte
+ON scroll_depth_cte.pathname = bounce_rate_cte.earliest_pathname
+ORDER BY total_pageviews DESC
+ """,
+ timings=self.timings,
+ )
+ return top_sources_query
+
+ def calculate(self):
+ response = execute_hogql_query(
+ query_type="top_sources_query",
+ query=self.to_query(),
+ team=self.team,
+ timings=self.timings,
+ )
+
+ return WebTopPagesQueryResponse(
+ columns=response.columns, result=response.results, timings=response.timings, types=response.types
+ )
+
+ @cached_property
+ def query_date_range(self):
+ return QueryDateRange(date_range=self.query.dateRange, team=self.team, interval=None, now=datetime.now())
diff --git a/posthog/hogql_queries/web_analytics/top_sources.py b/posthog/hogql_queries/web_analytics/top_sources.py
index 4372b98002f0a..df08dbc843e39 100644
--- a/posthog/hogql_queries/web_analytics/top_sources.py
+++ b/posthog/hogql_queries/web_analytics/top_sources.py
@@ -1,21 +1,18 @@
-from datetime import timedelta
-from math import ceil
-from typing import Optional
-
from django.utils.timezone import datetime
-from posthog.caching.insights_api import BASE_MINIMUM_INSIGHT_REFRESH_INTERVAL, REDUCED_MINIMUM_INSIGHT_REFRESH_INTERVAL
-from posthog.caching.utils import is_stale
from posthog.hogql import ast
from posthog.hogql.parser import parse_select
from posthog.hogql.query import execute_hogql_query
+from posthog.hogql_queries.query_runner import WebAnalyticsQueryRunner
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
-from posthog.hogql_queries.web_analytics.web_analytics_query_runner import WebAnalyticsQueryRunner
from posthog.models.filters.mixins.utils import cached_property
-from posthog.schema import HogQLQueryResponse
+from posthog.schema import WebTopSourcesQuery, WebTopSourcesQueryResponse
class TopSourcesQueryRunner(WebAnalyticsQueryRunner):
+ query: WebTopSourcesQuery
+ query_type = WebTopSourcesQuery
+
def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
with self.timings.measure("top_sources_query"):
top_sources_query = parse_select(
@@ -110,7 +107,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
)
return top_sources_query
- def calculate(self) -> HogQLQueryResponse:
+ def calculate(self):
response = execute_hogql_query(
query_type="top_sources_query",
query=self.to_query(),
@@ -118,30 +115,10 @@ def calculate(self) -> HogQLQueryResponse:
timings=self.timings,
)
- return response
+ return WebTopSourcesQueryResponse(
+ columns=response.columns, result=response.results, timings=response.timings, types=response.types
+ )
@cached_property
def query_date_range(self):
return QueryDateRange(date_range=self.query.dateRange, team=self.team, interval=None, now=datetime.now())
-
- def _is_stale(self, cached_result_package):
- date_to = self.query_date_range.date_to()
- interval = self.query_date_range.interval_name
- return is_stale(self.team, date_to, interval, cached_result_package)
-
- def _refresh_frequency(self):
- date_to = self.query_date_range.date_to()
- date_from = self.query_date_range.date_from()
- interval = self.query_date_range.interval_name
-
- delta_days: Optional[int] = None
- if date_from and date_to:
- delta = date_to - date_from
- delta_days = ceil(delta.total_seconds() / timedelta(days=1).total_seconds())
-
- refresh_frequency = BASE_MINIMUM_INSIGHT_REFRESH_INTERVAL
- if interval == "hour" or (delta_days is not None and delta_days <= 7):
- # The interval is shorter for short-term insights
- refresh_frequency = REDUCED_MINIMUM_INSIGHT_REFRESH_INTERVAL
-
- return refresh_frequency
diff --git a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py
deleted file mode 100644
index 641f8a5c5e582..0000000000000
--- a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Optional, Dict
-
-from posthog.hogql.timings import HogQLTimings
-from posthog.models import Team
-from posthog.schema import HogQLQueryResponse
-from posthog.types import WebAnalyticsQueryNode
-from posthog.utils import generate_cache_key
-
-
-class WebAnalyticsQueryRunner(ABC):
- team: Team
- timings: HogQLTimings
- query: WebAnalyticsQueryNode
-
- def __init__(
- self, query: WebAnalyticsQueryNode | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None
- ):
- self.team = team
- self.timings = timings or HogQLTimings()
- if isinstance(query, WebAnalyticsQueryNode):
- self.query = query
- else:
- self.query = WebAnalyticsQueryNode.model_validate(query)
-
- @abstractmethod
- def calculate(self) -> HogQLQueryResponse:
- raise NotImplementedError()
-
- def run(self) -> HogQLQueryResponse:
- return self.calculate()
-
- def toJSON(self) -> str:
- return self.query.model_dump_json(exclude_defaults=True, exclude_none=True)
-
- def _cache_key(self) -> str:
- return generate_cache_key(
- f"query_{self.__class__.__name__}_{self.toJSON()}_{self.team.pk}_{self.team.timezone}"
- )
-
- @abstractmethod
- def _is_stale(self, cached_result_package):
- raise NotImplementedError()
-
- @abstractmethod
- def _refresh_frequency(self):
- raise NotImplementedError()
diff --git a/posthog/schema.py b/posthog/schema.py
index bbe34de311ec6..094f194ac0fc1 100644
--- a/posthog/schema.py
+++ b/posthog/schema.py
@@ -468,11 +468,43 @@ class TrendsQueryResponse(BaseModel):
timings: Optional[List[QueryTiming]] = None
-class WebAnalyticsFilters(BaseModel):
- pass
+class WebTopClicksQueryResponse(BaseModel):
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+ columns: Optional[List] = None
+ is_cached: Optional[bool] = None
+ last_refresh: Optional[str] = None
+ next_allowed_client_refresh: Optional[str] = None
+ result: List
+ timings: Optional[List[QueryTiming]] = None
+ types: Optional[List] = None
+
+
+class WebTopPagesQueryResponse(BaseModel):
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+ columns: Optional[List] = None
+ is_cached: Optional[bool] = None
+ last_refresh: Optional[str] = None
+ next_allowed_client_refresh: Optional[str] = None
+ result: List
+ timings: Optional[List[QueryTiming]] = None
+ types: Optional[List] = None
+
+
+class WebTopSourcesQueryResponse(BaseModel):
model_config = ConfigDict(
extra="forbid",
)
+ columns: Optional[List] = None
+ is_cached: Optional[bool] = None
+ last_refresh: Optional[str] = None
+ next_allowed_client_refresh: Optional[str] = None
+ result: List
+ timings: Optional[List[QueryTiming]] = None
+ types: Optional[List] = None
class Breakdown(BaseModel):
@@ -661,14 +693,34 @@ class TimeToSeeDataSessionsQuery(BaseModel):
teamId: Optional[float] = Field(default=None, description="Project to filter on. Defaults to current project")
+class WebTopClicksQuery(BaseModel):
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+ dateRange: Optional[DateRange] = None
+ filters: Any
+ kind: Literal["WebTopClicksQuery"] = "WebTopClicksQuery"
+ response: Optional[WebTopClicksQueryResponse] = None
+
+
+class WebTopPagesQuery(BaseModel):
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+ dateRange: Optional[DateRange] = None
+ filters: Any
+ kind: Literal["WebTopPagesQuery"] = "WebTopPagesQuery"
+ response: Optional[WebTopPagesQueryResponse] = None
+
+
class WebTopSourcesQuery(BaseModel):
model_config = ConfigDict(
extra="forbid",
)
dateRange: Optional[DateRange] = None
- filters: WebAnalyticsFilters
+ filters: Any
kind: Literal["WebTopSourcesQuery"] = "WebTopSourcesQuery"
- response: Optional[HogQLQueryResponse] = None
+ response: Optional[WebTopSourcesQueryResponse] = None
class DatabaseSchemaQuery(BaseModel):
@@ -1000,7 +1052,14 @@ class DataTableNode(BaseModel):
showSearch: Optional[bool] = Field(default=None, description="Include a free text search field (PersonsNode only)")
showTimings: Optional[bool] = Field(default=None, description="Show a detailed query timing breakdown")
source: Union[
- EventsNode, EventsQuery, PersonsNode, HogQLQuery, TimeToSeeDataSessionsQuery, WebTopSourcesQuery
+ EventsNode,
+ EventsQuery,
+ PersonsNode,
+ HogQLQuery,
+ TimeToSeeDataSessionsQuery,
+ WebTopSourcesQuery,
+ WebTopClicksQuery,
+ WebTopPagesQuery,
] = Field(..., description="Source of the events")
@@ -1279,6 +1338,8 @@ class Model(RootModel):
HogQLMetadata,
TimeToSeeDataSessionsQuery,
WebTopSourcesQuery,
+ WebTopClicksQuery,
+ WebTopPagesQuery,
],
]
diff --git a/posthog/types.py b/posthog/types.py
index bb126661dbad3..20a90a09473df 100644
--- a/posthog/types.py
+++ b/posthog/types.py
@@ -12,10 +12,21 @@
StickinessQuery,
TrendsQuery,
WebTopSourcesQuery,
+ WebTopClicksQuery,
+ WebTopPagesQuery,
)
FilterType = Union[Filter, PathFilter, RetentionFilter, StickinessFilter]
InsightQueryNode = Union[TrendsQuery, FunnelsQuery, RetentionQuery, PathsQuery, StickinessQuery, LifecycleQuery]
-
-WebAnalyticsQueryNode = Union[WebTopSourcesQuery]
+InsightOrWebAnalyticsQueryNode = Union[
+ TrendsQuery,
+ FunnelsQuery,
+ RetentionQuery,
+ PathsQuery,
+ StickinessQuery,
+ LifecycleQuery,
+ WebTopSourcesQuery,
+ WebTopClicksQuery,
+ WebTopPagesQuery,
+]