From 106d7b3f7c0660cbccadcc4246713378e1117bc1 Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Tue, 26 Sep 2023 16:02:25 +0100 Subject: [PATCH] Add web top sources query to the backend --- frontend/src/queries/schema.json | 15 + frontend/src/queries/schema.ts | 13 +- .../Settings/VerifiedDomains/SSOSelect.tsx | 2 +- .../scenes/saved-insights/SavedInsights.tsx | 6 + .../web-analytics/WebAnalyticsScene.tsx | 272 +----------------- posthog/api/query.py | 5 + .../hogql_queries/web_analytics/__init__.py | 0 .../web_analytics/top_sources.py | 148 ++++++++++ .../web_analytics/validationError.txt | 0 .../web_analytics_query_runner.py | 97 +++++++ posthog/schema.py | 14 +- 11 files changed, 306 insertions(+), 266 deletions(-) create mode 100644 posthog/hogql_queries/web_analytics/__init__.py create mode 100644 posthog/hogql_queries/web_analytics/top_sources.py create mode 100644 posthog/hogql_queries/web_analytics/validationError.txt create mode 100644 posthog/hogql_queries/web_analytics/web_analytics_query_runner.py diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 7edaeb0039cf1e..d00115472b37a6 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -389,6 +389,9 @@ }, { "$ref": "#/definitions/TimeToSeeDataSessionsQuery" + }, + { + "$ref": "#/definitions/WebTopSourcesQuery" } ], "description": "Source of the events" @@ -2269,6 +2272,18 @@ }, "required": ["result"], "type": "object" + }, + "WebTopSourcesQuery": { + "additionalProperties": false, + "properties": { + "kind": { + "const": "WebTopSourcesQuery", + "type": "string" + }, + "query": {} + }, + "required": ["kind", "query"], + "type": "object" } } } diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 54c296a3521a0b..f071d1adac2def 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -57,6 +57,9 @@ export enum NodeKind { StickinessQuery = 'StickinessQuery', LifecycleQuery = 'LifecycleQuery', + // Web analytics queries + WebTopSourcesQuery = 'WebTopSourcesQuery', + // Time to see data TimeToSeeDataSessionsQuery = 'TimeToSeeDataSessionsQuery', TimeToSeeDataQuery = 'TimeToSeeDataQuery', @@ -277,7 +280,7 @@ export type HasPropertiesNode = EventsNode | EventsQuery | PersonsNode export interface DataTableNode extends Node, DataTableNodeViewProps { kind: NodeKind.DataTableNode /** Source of the events */ - source: EventsNode | EventsQuery | PersonsNode | HogQLQuery | TimeToSeeDataSessionsQuery + source: EventsNode | EventsQuery | PersonsNode | HogQLQuery | TimeToSeeDataSessionsQuery | WebTopSourcesQuery /** Columns shown in the table, unless the `source` provides them. */ columns?: HogQLExpression[] @@ -483,6 +486,14 @@ export interface LifecycleQuery extends InsightsQueryBase { response?: LifecycleQueryResponse } +export interface WebAnalyticsQueryBase {} + +export interface WebTopSourcesQuery extends WebAnalyticsQueryBase { + kind: NodeKind.WebTopSourcesQuery + query: any +} +export interface WebTopSourcesQueryResponse extends QueryResponse {} + export type InsightQueryNode = | TrendsQuery | FunnelsQuery diff --git a/frontend/src/scenes/organization/Settings/VerifiedDomains/SSOSelect.tsx b/frontend/src/scenes/organization/Settings/VerifiedDomains/SSOSelect.tsx index a35a6afd0c0b35..f8072bdc57d326 100644 --- a/frontend/src/scenes/organization/Settings/VerifiedDomains/SSOSelect.tsx +++ b/frontend/src/scenes/organization/Settings/VerifiedDomains/SSOSelect.tsx @@ -5,7 +5,7 @@ import { SSO_PROVIDER_NAMES } from 'lib/constants' import { preflightLogic } from 'scenes/PreflightCheck/preflightLogic' import { SSOProvider } from '~/types' -interface SSOSelectInterface { +export interface SSOSelectInterface { value: SSOProvider | '' loading: boolean onChange: (value: SSOProvider | '') => void diff --git a/frontend/src/scenes/saved-insights/SavedInsights.tsx b/frontend/src/scenes/saved-insights/SavedInsights.tsx index a59b440c4c2a2c..685898f2468e95 100644 --- a/frontend/src/scenes/saved-insights/SavedInsights.tsx +++ b/frontend/src/scenes/saved-insights/SavedInsights.tsx @@ -241,6 +241,12 @@ export const QUERY_TYPES_METADATA: Record = { icon: InsightSQLIcon, inMenu: true, }, + [NodeKind.WebTopSourcesQuery]: { + name: 'Top Sources', + description: 'View top sources for a website', + icon: InsightsTrendsIcon, + inMenu: true, + }, } export const INSIGHT_TYPE_OPTIONS: LemonSelectOptions = [ diff --git a/frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx b/frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx index 61a5b16ae49f7a..1f1789badc10bd 100644 --- a/frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx +++ b/frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx @@ -1,47 +1,22 @@ import { SceneExport } from 'scenes/sceneTypes' import { webAnalyticsLogic } from 'scenes/web-analytics/webAnalyticsLogic' import { Query } from '~/queries/Query/Query' -import { NodeKind } from '~/queries/schema' +import { DataTableNode, NodeKind } from '~/queries/schema' export function WebAnalyticsScene(): JSX.Element { + const query: DataTableNode = { + full: true, + kind: NodeKind.DataTableNode, + source: { + kind: NodeKind.WebTopSourcesQuery, + query: {}, + }, + } + return (
Top pages - - Top sources - - Top clicks - +
) } @@ -50,228 +25,3 @@ export const scene: SceneExport = { component: WebAnalyticsScene, logic: webAnalyticsLogic, } - -// Whilst this component is using HogQLQuery nodes, the SQL needs to exist client-side. -// These should be deleted once the new web analytics node types are created - -const TOP_CLICKS_SQL = ` - SELECT - properties.$el_text as el_text, - count() as total_clicks, - COUNT(DISTINCT events.person_id) as unique_visitors -FROM - events -WHERE - event == '$autocapture' -AND events.timestamp >= now() - INTERVAL 7 DAY -AND events.properties.$event_type = 'click' -AND el_text IS NOT NULL -GROUP BY - el_text -ORDER BY total_clicks DESC - ` - -const TOP_PAGES_SQL = ` -WITH - -scroll_depth_cte AS ( -SELECT - events.properties.\`$prev_pageview_pathname\` AS pathname, - countIf(events.event == '$pageview') as total_pageviews, - COUNT(DISTINCT events.properties.distinct_id) as unique_visitors, -- might want to use person id? have seen a small number of pages where unique > total - avg(CASE - WHEN events.properties.\`$prev_pageview_max_content_percentage\` IS NULL THEN NULL - WHEN events.properties.\`$prev_pageview_max_content_percentage\` > 0.8 THEN 100 - ELSE 0 - END) AS scroll_gt80_percentage, - avg(events.properties.$prev_pageview_max_scroll_percentage) * 100 as average_scroll_percentage -FROM - events -WHERE - (event = '$pageview' OR event = '$pageleave') AND events.properties.\`$prev_pageview_pathname\` IS NOT NULL - AND events.timestamp >= now() - INTERVAL 7 DAY -GROUP BY pathname -) - -, - -session_cte AS ( -SELECT - events.properties.\`$session_id\` AS session_id, - min(events.timestamp) AS min_timestamp, - max(events.timestamp) AS max_timestamp, - dateDiff('second', min_timestamp, max_timestamp) AS duration_s, - - -- create a tuple so that these are grouped in the same order, see https://github.com/ClickHouse/ClickHouse/discussions/42338 - groupArray((events.timestamp, events.properties.\`$referrer\`, events.properties.\`$pathname\`, events.properties.utm_source)) AS tuple_array, - arrayFirstIndex(x -> tupleElement(x, 1) == min_timestamp, tuple_array) as index_of_earliest, - arrayFirstIndex(x -> tupleElement(x, 1) == max_timestamp, tuple_array) as index_of_latest, - tupleElement(arrayElement( - tuple_array, - index_of_earliest - ), 2) AS earliest_referrer, - tupleElement(arrayElement( - tuple_array, - index_of_earliest - ), 3) AS earliest_pathname, - tupleElement(arrayElement( - tuple_array, - index_of_earliest - ), 4) AS earliest_utm_source, - - if(domain(earliest_referrer) = '', earliest_referrer, domain(earliest_referrer)) AS referrer_domain, - multiIf( - earliest_utm_source IS NOT NULL, earliest_utm_source, - -- This will need to be an approach that scales better - referrer_domain == 'app.posthog.com', 'posthog', - referrer_domain == 'eu.posthog.com', 'posthog', - referrer_domain == 'posthog.com', 'posthog', - referrer_domain == 'www.google.com', 'google', - referrer_domain == 'www.google.co.uk', 'google', - referrer_domain == 'www.google.com.hk', 'google', - referrer_domain == 'www.google.de', 'google', - referrer_domain == 't.co', 'twitter', - referrer_domain == 'github.com', 'github', - referrer_domain == 'duckduckgo.com', 'duckduckgo', - referrer_domain == 'www.bing.com', 'bing', - referrer_domain == 'bing.com', 'bing', - referrer_domain == 'yandex.ru', 'yandex', - referrer_domain == 'quora.com', 'quora', - referrer_domain == 'www.quora.com', 'quora', - referrer_domain == 'linkedin.com', 'linkedin', - referrer_domain == 'www.linkedin.com', 'linkedin', - startsWith(referrer_domain, 'http://localhost:'), 'localhost', - referrer_domain - ) AS blended_source, - - countIf(events.event == '$pageview') AS num_pageviews, - countIf(events.event == '$autocapture') AS num_autocaptures, - -- in v1 we'd also want to count whether there were any conversion events - - any(events.person_id) as person_id, - -- definition of a GA4 bounce from here https://support.google.com/analytics/answer/12195621?hl=en - (num_autocaptures == 0 AND num_pageviews <= 1 AND duration_s < 10) AS is_bounce -FROM - events -WHERE - session_id IS NOT NULL -AND - events.timestamp >= now() - INTERVAL 8 DAY -GROUP BY - events.properties.\`$session_id\` -HAVING - min_timestamp >= now() - INTERVAL 7 DAY -) - -, - -bounce_rate_cte AS ( -SELECT session_cte.earliest_pathname, - avg(session_cte.is_bounce) as bounce_rate -FROM session_cte -GROUP BY earliest_pathname -) - - - -SELECT scroll_depth_cte.pathname as pathname, -scroll_depth_cte.total_pageviews as total_pageviews, -scroll_depth_cte.unique_visitors as unique_visitors, -scroll_depth_cte.scroll_gt80_percentage as scroll_gt80_percentage, -scroll_depth_cte.average_scroll_percentage as average_scroll_percentage, -bounce_rate_cte.bounce_rate as bounce_rate -FROM - scroll_depth_cte LEFT OUTER JOIN bounce_rate_cte -ON scroll_depth_cte.pathname = bounce_rate_cte.earliest_pathname -ORDER BY total_pageviews DESC -` - -const TOP_SOURCES = ` -WITH - -session_cte AS ( -SELECT - events.properties.\`$session_id\` AS session_id, - min(events.timestamp) AS min_timestamp, - max(events.timestamp) AS max_timestamp, - dateDiff('second', min_timestamp, max_timestamp) AS duration_s, - - -- create a tuple so that these are grouped in the same order, see https://github.com/ClickHouse/ClickHouse/discussions/42338 - groupArray((events.timestamp, events.properties.\`$referrer\`, events.properties.\`$pathname\`, events.properties.utm_source)) AS tuple_array, - arrayFirstIndex(x -> tupleElement(x, 1) == min_timestamp, tuple_array) as index_of_earliest, - arrayFirstIndex(x -> tupleElement(x, 1) == max_timestamp, tuple_array) as index_of_latest, - tupleElement(arrayElement( - tuple_array, - index_of_earliest - ), 2) AS earliest_referrer, - tupleElement(arrayElement( - tuple_array, - index_of_earliest - ), 3) AS earliest_pathname, - tupleElement(arrayElement( - tuple_array, - index_of_earliest - ), 4) AS earliest_utm_source, - - if(domain(earliest_referrer) = '', earliest_referrer, domain(earliest_referrer)) AS referrer_domain, - multiIf( - earliest_utm_source IS NOT NULL, earliest_utm_source, - -- This will need to be an approach that scales better - referrer_domain == 'app.posthog.com', 'posthog', - referrer_domain == 'eu.posthog.com', 'posthog', - referrer_domain == 'posthog.com', 'posthog', - referrer_domain == 'www.google.com', 'google', - referrer_domain == 'www.google.co.uk', 'google', - referrer_domain == 'www.google.com.hk', 'google', - referrer_domain == 'www.google.de', 'google', - referrer_domain == 't.co', 'twitter', - referrer_domain == 'github.com', 'github', - referrer_domain == 'duckduckgo.com', 'duckduckgo', - referrer_domain == 'www.bing.com', 'bing', - referrer_domain == 'bing.com', 'bing', - referrer_domain == 'yandex.ru', 'yandex', - referrer_domain == 'quora.com', 'quora', - referrer_domain == 'www.quora.com', 'quora', - referrer_domain == 'linkedin.com', 'linkedin', - referrer_domain == 'www.linkedin.com', 'linkedin', - startsWith(referrer_domain, 'http://localhost:'), 'localhost', - referrer_domain - ) AS blended_source, - - countIf(events.event == '$pageview') AS num_pageviews, - countIf(events.event == '$autocapture') AS num_autocaptures, - -- in v1 we'd also want to count whether there were any conversion events - - any(events.person_id) as person_id, - -- definition of a GA4 bounce from here https://support.google.com/analytics/answer/12195621?hl=en - (num_autocaptures == 0 AND num_pageviews <= 1 AND duration_s < 10) AS is_bounce -FROM - events -WHERE - session_id IS NOT NULL -AND - events.timestamp >= now() - INTERVAL 8 DAY -GROUP BY - events.properties.\`$session_id\` -HAVING - min_timestamp >= now() - INTERVAL 7 DAY -) - - - -SELECT - blended_source, - count(num_pageviews) as total_pageviews, - count(DISTINCT person_id) as unique_visitors, - avg(is_bounce) AS bounce_rate -FROM - session_cte -WHERE - blended_source IS NOT NULL -GROUP BY blended_source - -ORDER BY total_pageviews DESC -LIMIT 100 - - -` diff --git a/posthog/api/query.py b/posthog/api/query.py index 628a55da744eea..f019cde8bb674b 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -28,6 +28,7 @@ from posthog.hogql_queries.lifecycle_query_runner import LifecycleQueryRunner from posthog.hogql_queries.trends_query_runner import TrendsQueryRunner +from posthog.hogql_queries.web_analytics.top_sources import TopSourcesQueryRunner from posthog.models import Team from posthog.models.event.events_query import run_events_query from posthog.models.user import User @@ -250,6 +251,10 @@ def process_query( ) serializer.is_valid(raise_exception=True) return get_session_events(serializer) or {} + elif query_kind == "WebTopSourcesQuery": + refresh_requested = refresh_requested_by_client(request) if request else False + top_sources_query_runner = TopSourcesQueryRunner(query_json, team) + return _unwrap_pydantic_dict(top_sources_query_runner.run(refresh_requested=refresh_requested)) else: if query_json.get("source"): return process_query(team, query_json["source"]) diff --git a/posthog/hogql_queries/web_analytics/__init__.py b/posthog/hogql_queries/web_analytics/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/posthog/hogql_queries/web_analytics/top_sources.py b/posthog/hogql_queries/web_analytics/top_sources.py new file mode 100644 index 00000000000000..598e93e87ef3aa --- /dev/null +++ b/posthog/hogql_queries/web_analytics/top_sources.py @@ -0,0 +1,148 @@ +from datetime import timedelta +from math import ceil +from typing import Optional + +from django.utils.timezone import datetime + +from posthog.caching.insights_api import BASE_MINIMUM_INSIGHT_REFRESH_INTERVAL, REDUCED_MINIMUM_INSIGHT_REFRESH_INTERVAL +from posthog.caching.utils import is_stale +from posthog.hogql import ast +from posthog.hogql.parser import parse_select +from posthog.hogql.query import execute_hogql_query +from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.hogql_queries.web_analytics.web_analytics_query_runner import WebAnalyticsQueryRunner +from posthog.models.filters.mixins.utils import cached_property + + +class TopSourcesQueryRunner(WebAnalyticsQueryRunner): + def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: + with self.timings.measure("top_sources_query"): + top_sources_query = parse_select( + """ +WITH + +session_cte AS ( +SELECT + events.properties.`$session_id` AS session_id, + min(events.timestamp) AS min_timestamp, + max(events.timestamp) AS max_timestamp, + dateDiff('second', min_timestamp, max_timestamp) AS duration_s, + + -- create a tuple so that these are grouped in the same order, see https://github.com/ClickHouse/ClickHouse/discussions/42338 + groupArray((events.timestamp, events.properties.`$referrer`, events.properties.`$pathname`, events.properties.utm_source)) AS tuple_array, + arrayFirstIndex(x -> tupleElement(x, 1) == min_timestamp, tuple_array) as index_of_earliest, + arrayFirstIndex(x -> tupleElement(x, 1) == max_timestamp, tuple_array) as index_of_latest, + tupleElement(arrayElement( + tuple_array, + index_of_earliest + ), 2) AS earliest_referrer, + tupleElement(arrayElement( + tuple_array, + index_of_earliest + ), 3) AS earliest_pathname, + tupleElement(arrayElement( + tuple_array, + index_of_earliest + ), 4) AS earliest_utm_source, + + if(domain(earliest_referrer) = '', earliest_referrer, domain(earliest_referrer)) AS referrer_domain, + multiIf( + earliest_utm_source IS NOT NULL, earliest_utm_source, + -- This will need to be an approach that scales better + referrer_domain == 'app.posthog.com', 'posthog', + referrer_domain == 'eu.posthog.com', 'posthog', + referrer_domain == 'posthog.com', 'posthog', + referrer_domain == 'www.google.com', 'google', + referrer_domain == 'www.google.co.uk', 'google', + referrer_domain == 'www.google.com.hk', 'google', + referrer_domain == 'www.google.de', 'google', + referrer_domain == 't.co', 'twitter', + referrer_domain == 'github.com', 'github', + referrer_domain == 'duckduckgo.com', 'duckduckgo', + referrer_domain == 'www.bing.com', 'bing', + referrer_domain == 'bing.com', 'bing', + referrer_domain == 'yandex.ru', 'yandex', + referrer_domain == 'quora.com', 'quora', + referrer_domain == 'www.quora.com', 'quora', + referrer_domain == 'linkedin.com', 'linkedin', + referrer_domain == 'www.linkedin.com', 'linkedin', + startsWith(referrer_domain, 'http://localhost:'), 'localhost', + referrer_domain + ) AS blended_source, + + countIf(events.event == '$pageview') AS num_pageviews, + countIf(events.event == '$autocapture') AS num_autocaptures, + -- in v1 we'd also want to count whether there were any conversion events + + any(events.person_id) as person_id, + -- definition of a GA4 bounce from here https://support.google.com/analytics/answer/12195621?hl=en + (num_autocaptures == 0 AND num_pageviews <= 1 AND duration_s < 10) AS is_bounce +FROM + events +WHERE + session_id IS NOT NULL +AND + events.timestamp >= now() - INTERVAL 8 DAY +GROUP BY + events.properties.`$session_id` +HAVING + min_timestamp >= now() - INTERVAL 7 DAY +) + + + +SELECT + blended_source, + count(num_pageviews) as total_pageviews, + count(DISTINCT person_id) as unique_visitors, + avg(is_bounce) AS bounce_rate +FROM + session_cte +WHERE + blended_source IS NOT NULL +GROUP BY blended_source + +ORDER BY total_pageviews DESC +LIMIT 100 + """, + timings=self.timings, + ) + return top_sources_query + + def calculate(self): + response = execute_hogql_query( + query_type="top_sources_query", + query=self.to_query(), + team=self.team, + timings=self.timings, + ) + + return response + + @cached_property + def query_date_range(self): + return QueryDateRange( + date_range=self.query.dateRange, team=self.team, interval=self.query.interval, now=datetime.now() + ) + + def _is_stale(self, cached_result_package): + date_to = self.query_date_range.date_to() + interval = self.query_date_range.interval_name + return is_stale(self.team, date_to, interval, cached_result_package) + + def _refresh_frequency(self): + date_to = self.query_date_range.date_to() + date_from = self.query_date_range.date_from() + interval = self.query_date_range.interval_name + + delta_days: Optional[int] = None + if date_from and date_to: + delta = date_to - date_from + delta_days = ceil(delta.total_seconds() / timedelta(days=1).total_seconds()) + + refresh_frequency = BASE_MINIMUM_INSIGHT_REFRESH_INTERVAL + if interval == "hour" or (delta_days is not None and delta_days <= 7): + # The interval is shorter for short-term insights + refresh_frequency = REDUCED_MINIMUM_INSIGHT_REFRESH_INTERVAL + + return refresh_frequency diff --git a/posthog/hogql_queries/web_analytics/validationError.txt b/posthog/hogql_queries/web_analytics/validationError.txt new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py new file mode 100644 index 00000000000000..978fd480dacb98 --- /dev/null +++ b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py @@ -0,0 +1,97 @@ +from abc import ABC, abstractmethod +from datetime import datetime +from typing import Any, Generic, List, Optional, TypeVar + +from django.conf import settings +from django.core.cache import cache +from prometheus_client import Counter +from pydantic import BaseModel, ConfigDict + +from posthog.clickhouse.query_tagging import tag_queries +from posthog.hogql.timings import HogQLTimings +from posthog.metrics import LABEL_TEAM_ID +from posthog.models import Team +from posthog.schema import QueryTiming +from posthog.utils import generate_cache_key, get_safe_cache + +QUERY_CACHE_WRITE_COUNTER = Counter( + "posthog_web_query_cache_write_total", + "When a query result was persisted in the cache.", + labelnames=[LABEL_TEAM_ID], +) + +QUERY_CACHE_HIT_COUNTER = Counter( + "posthog_web_query_cache_hit_total", + "Whether we could fetch the query from the cache or not.", + labelnames=[LABEL_TEAM_ID, "cache_hit"], +) + +DataT = TypeVar("DataT") + + +class QueryResponse(BaseModel, Generic[DataT]): + model_config = ConfigDict( + extra="forbid", + ) + result: DataT + timings: Optional[List[QueryTiming]] = None + + +class CachedQueryResponse(QueryResponse): + model_config = ConfigDict( + extra="forbid", + ) + is_cached: bool + last_refresh: str + next_allowed_client_refresh: str + + +class WebAnalyticsQueryRunner(ABC): + team: Team + timings: HogQLTimings + + def __init__(self, query: Any, team: Team, timings: Optional[HogQLTimings] = None): + self.team = team + self.timings = timings or HogQLTimings() + + @abstractmethod + def calculate(self) -> QueryResponse: + raise NotImplementedError() + + def run(self, refresh_requested: bool) -> CachedQueryResponse: + cache_key = self._cache_key() + tag_queries(cache_key=cache_key) + + if not refresh_requested: + cached_response = get_safe_cache(cache_key) + if cached_response: + if not self._is_stale(cached_response): + QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="hit").inc() + cached_response.is_cached = True + return cached_response + else: + QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="stale").inc() + else: + QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="miss").inc() + + fresh_response_dict = self.calculate().model_dump() + fresh_response_dict["is_cached"] = False + fresh_response_dict["last_refresh"] = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") + fresh_response_dict["next_allowed_client_refresh"] = (datetime.now() + self._refresh_frequency()).strftime( + "%Y-%m-%dT%H:%M:%SZ" + ) + fresh_response = CachedQueryResponse(**fresh_response_dict) + cache.set(cache_key, fresh_response, settings.CACHED_RESULTS_TTL) + QUERY_CACHE_WRITE_COUNTER.labels(team_id=self.team.pk).inc() + return fresh_response + + def _cache_key(self) -> str: + return generate_cache_key(f"query_{self.__class__.__name__}_{self.team.pk}_{self.team.timezone}") + + @abstractmethod + def _is_stale(self, cached_result_package): + raise NotImplementedError() + + @abstractmethod + def _refresh_frequency(self): + raise NotImplementedError() diff --git a/posthog/schema.py b/posthog/schema.py index 207fb07d6e62c1..dc61cee12cd72d 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -468,6 +468,14 @@ class TrendsQueryResponse(BaseModel): timings: Optional[List[QueryTiming]] = None +class WebTopSourcesQuery(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + kind: Literal["WebTopSourcesQuery"] = "WebTopSourcesQuery" + query: Any + + class Breakdown(BaseModel): model_config = ConfigDict( extra="forbid", @@ -982,9 +990,9 @@ class DataTableNode(BaseModel): showSavedQueries: Optional[bool] = Field(default=None, description="Shows a list of saved queries") showSearch: Optional[bool] = Field(default=None, description="Include a free text search field (PersonsNode only)") showTimings: Optional[bool] = Field(default=None, description="Show a detailed query timing breakdown") - source: Union[EventsNode, EventsQuery, PersonsNode, HogQLQuery, TimeToSeeDataSessionsQuery] = Field( - ..., description="Source of the events" - ) + source: Union[ + EventsNode, EventsQuery, PersonsNode, HogQLQuery, TimeToSeeDataSessionsQuery, WebTopSourcesQuery + ] = Field(..., description="Source of the events") class PropertyGroupFilter(BaseModel):