Skip to content

Commit

Permalink
Get top source query working
Browse files Browse the repository at this point in the history
  • Loading branch information
robbie-c committed Sep 26, 2023
1 parent 2ca3c52 commit 297be07
Show file tree
Hide file tree
Showing 10 changed files with 116 additions and 99 deletions.
20 changes: 17 additions & 3 deletions frontend/src/queries/nodes/DataTable/DataTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,14 @@ import { LemonDivider } from 'lib/lemon-ui/LemonDivider'
import clsx from 'clsx'
import { SessionPlayerModal } from 'scenes/session-recordings/player/modal/SessionPlayerModal'
import { OpenEditorButton } from '~/queries/nodes/Node/OpenEditorButton'
import { isEventsQuery, isHogQlAggregation, isHogQLQuery, isPersonsNode, taxonomicFilterToHogQl } from '~/queries/utils'
import {
isEventsQuery,
isHogQlAggregation,
isHogQLQuery,
isPersonsNode,
isWebTopSourcesQuery,
taxonomicFilterToHogQl,
} from '~/queries/utils'
import { PersonPropertyFilters } from '~/queries/nodes/PersonsNode/PersonPropertyFilters'
import { PersonsSearch } from '~/queries/nodes/PersonsNode/PersonsSearch'
import { PersonDeleteModal } from 'scenes/persons/PersonDeleteModal'
Expand Down Expand Up @@ -115,7 +122,10 @@ export function DataTable({ uniqueKey, query, setQuery, context, cachedResults }
const isReadOnly = setQuery === undefined

const actionsColumnShown = showActions && isEventsQuery(query.source) && columnsInResponse?.includes('*')
const columnsInLemonTable = isHogQLQuery(query.source) ? columnsInResponse ?? columnsInQuery : columnsInQuery
const columnsInLemonTable =
isHogQLQuery(query.source) || isWebTopSourcesQuery(query.source)
? columnsInResponse ?? columnsInQuery
: columnsInQuery

const lemonColumns: LemonTableColumn<DataTableRow, any>[] = [
...columnsInLemonTable.map((key, index) => ({
Expand All @@ -132,7 +142,11 @@ export function DataTable({ uniqueKey, query, setQuery, context, cachedResults }
return { props: { colSpan: 0 } }
}
} else if (result) {
if (isEventsQuery(query.source) || isHogQLQuery(query.source)) {
if (
isEventsQuery(query.source) ||
isHogQLQuery(query.source) ||
isWebTopSourcesQuery(query.source)
) {
return renderColumn(key, result[index], result, query, setQuery, context)
}
return renderColumn(key, result[key], result, query, setQuery, context)
Expand Down
19 changes: 17 additions & 2 deletions frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@
},
{
"$ref": "#/definitions/TimeToSeeDataSessionsQuery"
},
{
"$ref": "#/definitions/WebTopSourcesQuery"
}
]
},
Expand Down Expand Up @@ -2273,16 +2276,28 @@
"required": ["result"],
"type": "object"
},
"WebAnalyticsFilters": {
"additionalProperties": false,
"type": "object"
},
"WebTopSourcesQuery": {
"additionalProperties": false,
"properties": {
"dateRange": {
"$ref": "#/definitions/DateRange"
},
"filters": {
"$ref": "#/definitions/WebAnalyticsFilters"
},
"kind": {
"const": "WebTopSourcesQuery",
"type": "string"
},
"query": {}
"response": {
"$ref": "#/definitions/HogQLQueryResponse"
}
},
"required": ["kind", "query"],
"required": ["kind", "filters"],
"type": "object"
}
}
Expand Down
10 changes: 8 additions & 2 deletions frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ export type AnyDataNode =
| HogQLQuery
| HogQLMetadata
| TimeToSeeDataSessionsQuery
| WebTopSourcesQuery

export type QuerySchema =
// Data nodes (see utils.ts)
Expand Down Expand Up @@ -486,11 +487,16 @@ export interface LifecycleQuery extends InsightsQueryBase {
response?: LifecycleQueryResponse
}

export interface WebAnalyticsQueryBase {}
export interface WebAnalyticsFilters {}

export interface WebAnalyticsQueryBase {
dateRange?: DateRange
}

export interface WebTopSourcesQuery extends WebAnalyticsQueryBase {
kind: NodeKind.WebTopSourcesQuery
query: any
filters: WebAnalyticsFilters
response?: HogQLQueryResponse
}
export interface WebTopSourcesQueryResponse extends QueryResponse {}

Expand Down
5 changes: 5 additions & 0 deletions frontend/src/queries/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import {
TimeToSeeDataJSONNode,
DatabaseSchemaQuery,
SavedInsightNode,
WebTopSourcesQuery,
} from '~/queries/schema'
import { TaxonomicFilterGroupType, TaxonomicFilterValue } from 'lib/components/TaxonomicFilter/types'
import { dayjs } from 'lib/dayjs'
Expand Down Expand Up @@ -90,6 +91,10 @@ export function isHogQLQuery(node?: Node | null): node is HogQLQuery {
return node?.kind === NodeKind.HogQLQuery
}

export function isWebTopSourcesQuery(node?: Node | null): node is WebTopSourcesQuery {
return node?.kind === NodeKind.WebTopSourcesQuery
}

export function containsHogQLQuery(node?: Node | null): boolean {
if (!node) {
return false
Expand Down
23 changes: 12 additions & 11 deletions frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
import { SceneExport } from 'scenes/sceneTypes'
import { webAnalyticsLogic } from 'scenes/web-analytics/webAnalyticsLogic'
import { Query } from '~/queries/Query/Query'
import { DataTableNode, NodeKind } from '~/queries/schema'
import { NodeKind } from '~/queries/schema'

export function WebAnalyticsScene(): JSX.Element {
const query: DataTableNode = {
full: true,
kind: NodeKind.DataTableNode,
source: {
kind: NodeKind.WebTopSourcesQuery,
query: {},
},
}

return (
<div>
Top pages
<Query query={query} readOnly={true} />
<Query
query={{
full: true,
kind: NodeKind.DataTableNode,
source: {
kind: NodeKind.WebTopSourcesQuery,
filters: {},
},
}}
readOnly={true}
/>
</div>
)
}
Expand Down
3 changes: 1 addition & 2 deletions posthog/api/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,8 @@ def process_query(
serializer.is_valid(raise_exception=True)
return get_session_events(serializer) or {}
elif query_kind == "WebTopSourcesQuery":
refresh_requested = refresh_requested_by_client(request) if request else False
top_sources_query_runner = TopSourcesQueryRunner(query_json, team)
return _unwrap_pydantic_dict(top_sources_query_runner.run(refresh_requested=refresh_requested))
return _unwrap_pydantic_dict(top_sources_query_runner.run())
else:
if query_json.get("source"):
return process_query(team, query_json["source"])
Expand Down
7 changes: 3 additions & 4 deletions posthog/hogql_queries/web_analytics/top_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.hogql_queries.web_analytics.web_analytics_query_runner import WebAnalyticsQueryRunner
from posthog.models.filters.mixins.utils import cached_property
from posthog.schema import HogQLQueryResponse


class TopSourcesQueryRunner(WebAnalyticsQueryRunner):
Expand Down Expand Up @@ -109,7 +110,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
)
return top_sources_query

def calculate(self):
def calculate(self) -> HogQLQueryResponse:
response = execute_hogql_query(
query_type="top_sources_query",
query=self.to_query(),
Expand All @@ -121,9 +122,7 @@ def calculate(self):

@cached_property
def query_date_range(self):
return QueryDateRange(
date_range=self.query.dateRange, team=self.team, interval=self.query.interval, now=datetime.now()
)
return QueryDateRange(date_range=self.query.dateRange, team=self.team, interval=None, now=datetime.now())

def _is_stale(self, cached_result_package):
date_to = self.query_date_range.date_to()
Expand Down
90 changes: 20 additions & 70 deletions posthog/hogql_queries/web_analytics/web_analytics_query_runner.py
Original file line number Diff line number Diff line change
@@ -1,92 +1,42 @@
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Any, Generic, List, Optional, TypeVar
from typing import Any, Optional, Dict

from django.conf import settings
from django.core.cache import cache
from prometheus_client import Counter
from pydantic import BaseModel, ConfigDict

from posthog.clickhouse.query_tagging import tag_queries
from posthog.hogql.timings import HogQLTimings
from posthog.metrics import LABEL_TEAM_ID
from posthog.models import Team
from posthog.schema import QueryTiming
from posthog.utils import generate_cache_key, get_safe_cache

QUERY_CACHE_WRITE_COUNTER = Counter(
"posthog_web_query_cache_write_total",
"When a query result was persisted in the cache.",
labelnames=[LABEL_TEAM_ID],
)

QUERY_CACHE_HIT_COUNTER = Counter(
"posthog_web_query_cache_hit_total",
"Whether we could fetch the query from the cache or not.",
labelnames=[LABEL_TEAM_ID, "cache_hit"],
)

DataT = TypeVar("DataT")


class QueryResponse(BaseModel, Generic[DataT]):
model_config = ConfigDict(
extra="forbid",
)
result: DataT
timings: Optional[List[QueryTiming]] = None


class CachedQueryResponse(QueryResponse):
model_config = ConfigDict(
extra="forbid",
)
is_cached: bool
last_refresh: str
next_allowed_client_refresh: str
from posthog.schema import HogQLQueryResponse
from posthog.types import WebAnalyticsQueryNode
from posthog.utils import generate_cache_key


class WebAnalyticsQueryRunner(ABC):
team: Team
timings: HogQLTimings
query: WebAnalyticsQueryNode

def __init__(self, query: Any, team: Team, timings: Optional[HogQLTimings] = None):
def __init__(
self, query: WebAnalyticsQueryNode | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None
):
self.team = team
self.timings = timings or HogQLTimings()
if isinstance(query, WebAnalyticsQueryNode):
self.query = query
else:
self.query = WebAnalyticsQueryNode.model_validate(query)

@abstractmethod
def calculate(self) -> QueryResponse:
def calculate(self) -> HogQLQueryResponse:
raise NotImplementedError()

def run(self, refresh_requested: bool) -> CachedQueryResponse:
cache_key = self._cache_key()
tag_queries(cache_key=cache_key)
def run(self) -> HogQLQueryResponse:
return self.calculate()

if not refresh_requested:
cached_response = get_safe_cache(cache_key)
if cached_response:
if not self._is_stale(cached_response):
QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="hit").inc()
cached_response.is_cached = True
return cached_response
else:
QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="stale").inc()
else:
QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="miss").inc()

fresh_response_dict = self.calculate().model_dump()
fresh_response_dict["is_cached"] = False
fresh_response_dict["last_refresh"] = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
fresh_response_dict["next_allowed_client_refresh"] = (datetime.now() + self._refresh_frequency()).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
fresh_response = CachedQueryResponse(**fresh_response_dict)
cache.set(cache_key, fresh_response, settings.CACHED_RESULTS_TTL)
QUERY_CACHE_WRITE_COUNTER.labels(team_id=self.team.pk).inc()
return fresh_response
def toJSON(self) -> str:
return self.query.model_dump_json(exclude_defaults=True, exclude_none=True)

def _cache_key(self) -> str:
return generate_cache_key(f"query_{self.__class__.__name__}_{self.team.pk}_{self.team.timezone}")
return generate_cache_key(
f"query_{self.__class__.__name__}_{self.toJSON()}_{self.team.pk}_{self.team.timezone}"
)

@abstractmethod
def _is_stale(self, cached_result_package):
Expand Down
26 changes: 22 additions & 4 deletions posthog/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,12 +468,11 @@ class TrendsQueryResponse(BaseModel):
timings: Optional[List[QueryTiming]] = None


class WebTopSourcesQuery(BaseModel):
class WebAnalyticsFilters(BaseModel):
pass
model_config = ConfigDict(
extra="forbid",
)
kind: Literal["WebTopSourcesQuery"] = "WebTopSourcesQuery"
query: Any


class Breakdown(BaseModel):
Expand Down Expand Up @@ -662,6 +661,16 @@ class TimeToSeeDataSessionsQuery(BaseModel):
teamId: Optional[float] = Field(default=None, description="Project to filter on. Defaults to current project")


class WebTopSourcesQuery(BaseModel):
model_config = ConfigDict(
extra="forbid",
)
dateRange: Optional[DateRange] = None
filters: WebAnalyticsFilters
kind: Literal["WebTopSourcesQuery"] = "WebTopSourcesQuery"
response: Optional[HogQLQueryResponse] = None


class DatabaseSchemaQuery(BaseModel):
model_config = ConfigDict(
extra="forbid",
Expand Down Expand Up @@ -1261,7 +1270,16 @@ class Model(RootModel):
LifecycleQuery,
TimeToSeeDataSessionsQuery,
DatabaseSchemaQuery,
Union[EventsNode, EventsQuery, ActionsNode, PersonsNode, HogQLQuery, HogQLMetadata, TimeToSeeDataSessionsQuery],
Union[
EventsNode,
EventsQuery,
ActionsNode,
PersonsNode,
HogQLQuery,
HogQLMetadata,
TimeToSeeDataSessionsQuery,
WebTopSourcesQuery,
],
]


Expand Down
12 changes: 11 additions & 1 deletion posthog/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,18 @@
from posthog.models.filters.path_filter import PathFilter
from posthog.models.filters.retention_filter import RetentionFilter
from posthog.models.filters.stickiness_filter import StickinessFilter
from posthog.schema import FunnelsQuery, LifecycleQuery, PathsQuery, RetentionQuery, StickinessQuery, TrendsQuery
from posthog.schema import (
FunnelsQuery,
LifecycleQuery,
PathsQuery,
RetentionQuery,
StickinessQuery,
TrendsQuery,
WebTopSourcesQuery,
)

FilterType = Union[Filter, PathFilter, RetentionFilter, StickinessFilter]

InsightQueryNode = Union[TrendsQuery, FunnelsQuery, RetentionQuery, PathsQuery, StickinessQuery, LifecycleQuery]

WebAnalyticsQueryNode = Union[WebTopSourcesQuery]

0 comments on commit 297be07

Please sign in to comment.