Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(insights): use cached data to help refresh trends query #22993

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 57 additions & 57 deletions livestream/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,62 +1,62 @@
version: "3.8"
version: '3.8'

services:
postgres:
image: postgres:16-alpine
restart: always
ports:
- "5432:5432"
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: liveevents
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s
timeout: 5s
postgres:
image: postgres:16-alpine
restart: always
ports:
- '5432:5432'
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: liveevents
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U postgres']
interval: 5s
timeout: 5s

redis:
image: redis:alpine
restart: always
ports:
- "6379:6379"
redis:
image: redis:alpine
restart: always
ports:
- '6379:6379'

redpanda:
image: vectorized/redpanda:v23.2.17
command:
- redpanda start
- --smp 1
- --overprovisioned
- --node-id 0
- --kafka-addr PLAINTEXT://0.0.0.0:29092,OUTSIDE://0.0.0.0:9092
- --advertise-kafka-addr PLAINTEXT://redpanda:29092,OUTSIDE://localhost:9092
- --pandaproxy-addr 0.0.0.0:8082
- --advertise-pandaproxy-addr localhost:8082
ports:
- 8081:8081
- 8082:8082
- 9092:9092
- 29092:29092
redpanda:
image: vectorized/redpanda:v23.2.17
command:
- redpanda start
- --smp 1
- --overprovisioned
- --node-id 0
- --kafka-addr PLAINTEXT://0.0.0.0:29092,OUTSIDE://0.0.0.0:9092
- --advertise-kafka-addr PLAINTEXT://redpanda:29092,OUTSIDE://localhost:9092
- --pandaproxy-addr 0.0.0.0:8082
- --advertise-pandaproxy-addr localhost:8082
ports:
- 8081:8081
- 8082:8082
- 9092:9092
- 29092:29092

console:
image: docker.redpanda.com/redpandadata/console:v2.3.8
restart: on-failure
entrypoint: /bin/sh
command: -c "echo \"$$CONSOLE_CONFIG_FILE\" > /tmp/config.yml; /app/console"
environment:
CONFIG_FILEPATH: /tmp/config.yml
CONSOLE_CONFIG_FILE: |
kafka:
brokers: ["redpanda:29092"]
schemaRegistry:
enabled: true
urls: ["http://redpanda:8081"]
connect:
enabled: true
clusters:
- name: datagen
url: http://connect:8083
ports:
- "8088:8088"
depends_on:
- redpanda
console:
image: docker.redpanda.com/redpandadata/console:v2.3.8
restart: on-failure
entrypoint: /bin/sh
command: -c "echo \"$$CONSOLE_CONFIG_FILE\" > /tmp/config.yml; /app/console"
environment:
CONFIG_FILEPATH: /tmp/config.yml
CONSOLE_CONFIG_FILE: |
kafka:
brokers: ["redpanda:29092"]
schemaRegistry:
enabled: true
urls: ["http://redpanda:8081"]
connect:
enabled: true
clusters:
- name: datagen
url: http://connect:8083
ports:
- '8088:8088'
depends_on:
- redpanda
1 change: 0 additions & 1 deletion mypy-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,6 @@ posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: Superclass
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: def to_actors_query(self) -> SelectQuery | SelectUnionQuery
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: Subclass:
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: def to_actors_query(self, time_frame: str | None, series_index: int, breakdown_value: str | int | None = ..., compare_value: Compare | None = ..., include_recordings: bool | None = ...) -> SelectQuery | SelectUnionQuery
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: error: Statement is unreachable [unreachable]
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: error: Argument 1 to "_event_property" of "TrendsQueryRunner" has incompatible type "str | float | list[str | float] | None"; expected "str" [arg-type]
posthog/hogql_queries/insights/stickiness_query_runner.py:0: error: Module "django.utils.timezone" does not explicitly export attribute "datetime" [attr-defined]
posthog/hogql_queries/insights/retention_query_runner.py:0: error: Item "None" of "JoinExpr | None" has no attribute "sample" [union-attr]
Expand Down
18 changes: 0 additions & 18 deletions posthog/api/test/__snapshots__/test_insight.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -1628,24 +1628,6 @@
LIMIT 21
'''
# ---
# name: TestInsight.test_listing_insights_does_not_nplus1.30
'''
SELECT "posthog_taggeditem"."id",
"posthog_taggeditem"."tag_id",
"posthog_taggeditem"."dashboard_id",
"posthog_taggeditem"."insight_id",
"posthog_taggeditem"."event_definition_id",
"posthog_taggeditem"."property_definition_id",
"posthog_taggeditem"."action_id",
"posthog_taggeditem"."feature_flag_id"
FROM "posthog_taggeditem"
WHERE "posthog_taggeditem"."insight_id" IN (1,
2,
3,
4,
5 /* ... */)
'''
# ---
# name: TestInsight.test_listing_insights_does_not_nplus1.4
'''
SELECT "posthog_team"."id",
Expand Down
10 changes: 5 additions & 5 deletions posthog/api/test/test_insight.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,7 +1216,7 @@ def test_insight_refreshing_query(self, properties_filter, spy_execute_hogql_que
_create_event(team=self.team, event="$pageview", distinct_id="1")
response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true").json()
self.assertNotIn("code", response)
self.assertEqual(spy_execute_hogql_query.call_count, 2)
self.assertEqual(spy_execute_hogql_query.call_count, 3)
self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1])
self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z")
self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change
Expand All @@ -1225,7 +1225,7 @@ def test_insight_refreshing_query(self, properties_filter, spy_execute_hogql_que
with freeze_time("2012-01-15T05:17:34.000Z"):
response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight_id}/").json()
self.assertNotIn("code", response)
self.assertEqual(spy_execute_hogql_query.call_count, 2)
self.assertEqual(spy_execute_hogql_query.call_count, 3)
self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1])
self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") # Using cached result
self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change
Expand All @@ -1235,7 +1235,7 @@ def test_insight_refreshing_query(self, properties_filter, spy_execute_hogql_que
# Make sure the /query/ endpoint reuses the same cached result
response = self.client.post(f"/api/projects/{self.team.id}/query/", {"query": query_dict}).json()
self.assertNotIn("code", response)
self.assertEqual(spy_execute_hogql_query.call_count, 2)
self.assertEqual(spy_execute_hogql_query.call_count, 3)
self.assertEqual(response["results"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1])
self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") # Using cached result
self.assertTrue(response["is_cached"])
Expand All @@ -1246,7 +1246,7 @@ def test_insight_refreshing_query(self, properties_filter, spy_execute_hogql_que
f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true&from_dashboard={dashboard_id}"
).json()
self.assertNotIn("code", response)
self.assertEqual(spy_execute_hogql_query.call_count, 3)
self.assertEqual(spy_execute_hogql_query.call_count, 4)
self.assertEqual(
response["result"][0]["data"],
[
Expand Down Expand Up @@ -1285,7 +1285,7 @@ def test_insight_refreshing_query(self, properties_filter, spy_execute_hogql_que
f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true&from_dashboard={dashboard_id}"
).json()
self.assertNotIn("code", response)
self.assertEqual(spy_execute_hogql_query.call_count, 4)
self.assertEqual(spy_execute_hogql_query.call_count, 5)
self.assertEqual(
response["result"][0]["data"],
[
Expand Down
2 changes: 1 addition & 1 deletion posthog/caching/calculate_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def calculate_cache_key(target: Union[DashboardTile, Insight]) -> Optional[str]:
return None # Uncacheable query-based insight
if dashboard is not None and dashboard.filters:
query_runner.apply_dashboard_filters(DashboardFilter(**dashboard.filters))
return query_runner.get_cache_key()
return query_runner.cache_key

if insight.filters:
return generate_insight_filters_hash(insight, dashboard)
Expand Down
13 changes: 10 additions & 3 deletions posthog/hogql_queries/actors_query_runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import itertools
from typing import Optional
from typing import TypeAlias, Optional
from collections.abc import Sequence, Iterator
from posthog.hogql import ast
from posthog.hogql.parser import parse_expr, parse_order_expr
Expand All @@ -8,13 +8,20 @@
from posthog.hogql_queries.insights.insight_actors_query_runner import InsightActorsQueryRunner
from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator
from posthog.hogql_queries.query_runner import QueryRunner, get_query_runner
from posthog.schema import ActorsQuery, ActorsQueryResponse, CachedActorsQueryResponse, DashboardFilter
from posthog.schema import (
ActorsQuery,
ActorsQueryResponse,
CachedActorsQueryResponse,
DashboardFilter,
CacheMissResponse,
)


class ActorsQueryRunner(QueryRunner):
query: ActorsQuery
response: ActorsQueryResponse
cached_response: CachedActorsQueryResponse
CachedResponseType: TypeAlias = CachedActorsQueryResponse
cached_response: CachedResponseType | CacheMissResponse

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
13 changes: 10 additions & 3 deletions posthog/hogql_queries/events_query_runner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from datetime import timedelta
from typing import Optional
from typing import TypeAlias, Optional

from dateutil.parser import isoparse
from django.db.models import Prefetch
Expand All @@ -18,7 +18,13 @@
from posthog.models.element import chain_to_elements
from posthog.models.person.person import get_distinct_ids_for_subquery
from posthog.models.person.util import get_persons_by_distinct_ids
from posthog.schema import DashboardFilter, EventsQuery, EventsQueryResponse, CachedEventsQueryResponse
from posthog.schema import (
DashboardFilter,
EventsQuery,
EventsQueryResponse,
CachedEventsQueryResponse,
CacheMissResponse,
)
from posthog.utils import relative_date_parse

# Allow-listed fields returned when you select "*" from events. Person and group fields will be nested later.
Expand All @@ -37,7 +43,8 @@
class EventsQueryRunner(QueryRunner):
query: EventsQuery
response: EventsQueryResponse
cached_response: CachedEventsQueryResponse
CachedResponseType: TypeAlias = CachedEventsQueryResponse
cached_response: CachedResponseType | CacheMissResponse

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
6 changes: 4 additions & 2 deletions posthog/hogql_queries/hogql_query_runner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, cast
from typing import TypeAlias, Optional, cast
from collections.abc import Callable

from posthog.hogql import ast
Expand All @@ -16,13 +16,15 @@
DashboardFilter,
HogQLFilters,
DateRange,
CacheMissResponse,
)


class HogQLQueryRunner(QueryRunner):
query: HogQLQuery
response: HogQLQueryResponse
cached_response: CachedHogQLQueryResponse
CachedResponseType: TypeAlias = CachedHogQLQueryResponse
cached_response: CachedResponseType | CacheMissResponse

def to_query(self) -> ast.SelectQuery:
if self.timings is None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import dataclasses
from typing import Literal, Optional, Any, TypedDict, cast
from typing import TypeAlias, Literal, Optional, Any, TypedDict, cast

from posthog.constants import AUTOCAPTURE_EVENT
from posthog.hogql.parser import parse_select
Expand Down Expand Up @@ -40,6 +40,7 @@
HogQLQueryModifiers,
HogQLQueryResponse,
EventOddsRatioSerialized,
CacheMissResponse,
)


Expand Down Expand Up @@ -87,7 +88,8 @@ class FunnelCorrelationQueryRunner(QueryRunner):

query: FunnelCorrelationQuery
response: FunnelCorrelationResponse
cached_response: CachedFunnelCorrelationResponse
CachedResponseType: TypeAlias = CachedFunnelCorrelationResponse
cached_response: CachedResponseType | CacheMissResponse

funnels_query: FunnelsQuery
actors_query: FunnelsActorsQuery
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import timedelta
from posthog.hogql.constants import HogQLGlobalSettings, MAX_BYTES_BEFORE_EXTERNAL_GROUP_BY
from math import ceil
from typing import Optional, Any
from typing import TypeAlias, Optional, Any

from django.utils.timezone import datetime
from posthog.caching.insights_api import (
Expand Down Expand Up @@ -29,13 +29,15 @@
FunnelsQuery,
FunnelsQueryResponse,
HogQLQueryModifiers,
CacheMissResponse,
)


class FunnelsQueryRunner(QueryRunner):
query: FunnelsQuery
response: FunnelsQueryResponse
cached_response: CachedFunnelsQueryResponse
CachedResponseType: TypeAlias = CachedFunnelsQueryResponse
cached_response: CachedResponseType | CacheMissResponse
context: FunnelQueryContext

def __init__(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import cast
from typing import TypeAlias, cast

from posthog.hogql import ast
from posthog.hogql_queries.insights.lifecycle_query_runner import LifecycleQueryRunner
Expand All @@ -9,13 +9,15 @@
InsightActorsQueryOptions,
InsightActorsQueryOptionsResponse,
CachedInsightActorsQueryOptionsResponse,
CacheMissResponse,
)


class InsightActorsQueryOptionsRunner(QueryRunner):
query: InsightActorsQueryOptions
response: InsightActorsQueryOptionsResponse
cached_response: CachedInsightActorsQueryOptionsResponse
CachedResponseType: TypeAlias = CachedInsightActorsQueryOptionsResponse
cached_response: CachedResponseType | CacheMissResponse

@cached_property
def source_runner(self) -> QueryRunner:
Expand Down
6 changes: 4 additions & 2 deletions posthog/hogql_queries/insights/lifecycle_query_runner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import timedelta
from math import ceil
from typing import Optional
from typing import Optional, TypeAlias

from django.utils.timezone import datetime
from posthog.caching.insights_api import (
Expand All @@ -25,14 +25,16 @@
EventsNode,
LifecycleQueryResponse,
InsightActorsQueryOptionsResponse,
CacheMissResponse,
)
from posthog.utils import format_label_date


class LifecycleQueryRunner(QueryRunner):
query: LifecycleQuery
response: LifecycleQueryResponse
cached_response: CachedLifecycleQueryResponse
CachedResponseType: TypeAlias = CachedLifecycleQueryResponse
cached_response: CachedResponseType | CacheMissResponse

def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
if self.query.samplingFactor == 0:
Expand Down
Loading
Loading