Skip to content

Commit

Permalink
feat(data-modeling): Data modelling django models and API (#24232)
Browse files Browse the repository at this point in the history
Co-authored-by: Eric Duong <[email protected]>
  • Loading branch information
2 people authored and pauldambra committed Aug 29, 2024
1 parent cf88e8d commit f5792a6
Show file tree
Hide file tree
Showing 13 changed files with 1,235 additions and 46 deletions.
2 changes: 1 addition & 1 deletion latest_migrations.manifest
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ contenttypes: 0002_remove_content_type_name
ee: 0016_rolemembership_organization_member
otp_static: 0002_throttling
otp_totp: 0002_auto_20190420_0723
posthog: 0462_change_replay_team_setting_defaults
posthog: 0463_datawarehousemodelpath_and_more
sessions: 0001_initial
social_django: 0010_uid_db_index
two_factor: 0007_auto_20201201_1019
27 changes: 21 additions & 6 deletions posthog/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,26 @@
from posthog.batch_exports import http as batch_exports
from posthog.settings import EE_AVAILABLE
from posthog.warehouse.api import (
external_data_schema,
external_data_source,
modeling,
saved_query,
table,
view_link,
external_data_schema,
)
from ..heatmaps.heatmaps_api import LegacyHeatmapViewSet, HeatmapViewSet
from .session import SessionViewSet

from ..heatmaps.heatmaps_api import HeatmapViewSet, LegacyHeatmapViewSet
from ..session_recordings.session_recording_api import SessionRecordingViewSet
from . import (
alert,
activity_log,
alert,
annotation,
app_metrics,
async_migration,
authentication,
comments,
dead_letter_queue,
debug_ch_queries,
early_access_feature,
error_tracking,
event_definition,
Expand All @@ -46,18 +48,18 @@
property_definition,
proxy_record,
query,
search,
scheduled_change,
search,
sharing,
survey,
tagged_item,
team,
uploaded_media,
user,
debug_ch_queries,
)
from .dashboards import dashboard, dashboard_templates
from .data_management import DataManagementViewSet
from .session import SessionViewSet


@decorators.api_view(["GET", "HEAD", "POST", "PUT", "PATCH", "DELETE"])
Expand Down Expand Up @@ -227,6 +229,19 @@ def api_not_found(request):
"environment_external_data_sources",
["team_id"],
)
projects_router.register(
r"warehouse_dag",
modeling.DataWarehouseModelDagViewSet,
"project_warehouse_dag",
["team_id"],
)
projects_router.register(
r"warehouse_model_paths",
modeling.DataWarehouseModelPathViewSet,
"project_warehouse_model_paths",
["team_id"],
)


projects_router.register(
r"external_data_schemas",
Expand Down
6 changes: 2 additions & 4 deletions posthog/hogql/context.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Literal, Optional, Any
from typing import TYPE_CHECKING, Any, Literal, Optional

from posthog.hogql.timings import HogQLTimings
from posthog.schema import HogQLNotice, HogQLQueryModifiers

if TYPE_CHECKING:
from posthog.hogql.transforms.property_types import PropertySwapper
from posthog.hogql.database.database import Database
from posthog.hogql.transforms.property_types import PropertySwapper
from posthog.models import Team


Expand Down Expand Up @@ -36,8 +36,6 @@ class HogQLContext:
enable_select_queries: bool = False
# Do we apply a limit of MAX_SELECT_RETURNED_ROWS=10000 to the topmost select query?
limit_top_select: bool = True
# How many nested views do we support on this query?
max_view_depth: int = 1
# Globals that will be resolved in the context of the query
globals: Optional[dict] = None

Expand Down
23 changes: 10 additions & 13 deletions posthog/hogql/resolver.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
from datetime import date, datetime
from typing import Optional, Any, cast, Literal
from typing import Any, Literal, Optional, cast
from uuid import UUID

from posthog.hogql import ast
from posthog.hogql.ast import FieldTraverserType, ConstantType
from posthog.hogql.database.schema.persons import PersonsTable
from posthog.hogql.functions import find_hogql_posthog_function
from posthog.hogql.ast import ConstantType, FieldTraverserType
from posthog.hogql.context import HogQLContext
from posthog.hogql.database.models import (
StringJSONDatabaseField,
FunctionCallTable,
LazyTable,
SavedQuery,
StringJSONDatabaseField,
)
from posthog.hogql.database.s3_table import S3Table
from posthog.hogql.database.schema.events import EventsTable
from posthog.hogql.database.schema.persons import PersonsTable
from posthog.hogql.errors import ImpossibleASTError, QueryError, ResolutionError
from posthog.hogql.functions import find_hogql_posthog_function
from posthog.hogql.functions.action import matches_action
from posthog.hogql.functions.cohort import cohort_query_node
from posthog.hogql.functions.mapping import validate_function_args, HOGQL_CLICKHOUSE_FUNCTIONS, compare_types
from posthog.hogql.functions.mapping import HOGQL_CLICKHOUSE_FUNCTIONS, compare_types, validate_function_args
from posthog.hogql.functions.sparkline import sparkline
from posthog.hogql.hogqlx import convert_to_hx, HOGQLX_COMPONENTS
from posthog.hogql.hogqlx import HOGQLX_COMPONENTS, convert_to_hx
from posthog.hogql.parser import parse_select
from posthog.hogql.resolver_utils import expand_hogqlx_query, lookup_cte_by_name, lookup_field_by_name
from posthog.hogql.visitor import CloningVisitor, clone_expr, TraversingVisitor
from posthog.hogql.visitor import CloningVisitor, TraversingVisitor, clone_expr
from posthog.models.utils import UUIDT
from posthog.hogql.database.schema.events import EventsTable
from posthog.hogql.database.s3_table import S3Table

# https://github.com/ClickHouse/ClickHouse/issues/23194 - "Describe how identifiers in SELECT queries are resolved"

Expand Down Expand Up @@ -306,9 +306,6 @@ def visit_join_expr(self, node: ast.JoinExpr):
if isinstance(database_table, SavedQuery):
self.current_view_depth += 1

if self.current_view_depth > self.context.max_view_depth:
raise QueryError("Nested views are not supported")

node.table = parse_select(str(database_table.query))

if isinstance(node.table, ast.SelectQuery):
Expand Down
5 changes: 3 additions & 2 deletions posthog/management/commands/setup_test_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
from infi.clickhouse_orm import Database

from posthog.clickhouse.schema import (
CREATE_DATA_QUERIES,
CREATE_DICTIONARY_QUERIES,
CREATE_DISTRIBUTED_TABLE_QUERIES,
CREATE_KAFKA_TABLE_QUERIES,
CREATE_MERGETREE_TABLE_QUERIES,
CREATE_MV_TABLE_QUERIES,
build_query,
CREATE_DATA_QUERIES,
CREATE_VIEW_QUERIES,
build_query,
)
from posthog.settings import (
CLICKHOUSE_CLUSTER,
Expand Down Expand Up @@ -103,6 +103,7 @@ def handle(self, *args, **kwargs):
# :TRICKY: Create extension and function depended on by models.
with connection.cursor() as cursor:
cursor.execute("CREATE EXTENSION pg_trgm")
cursor.execute("CREATE EXTENSION ltree")

return super().handle(*args, **kwargs)

Expand Down
74 changes: 74 additions & 0 deletions posthog/migrations/0463_datawarehousemodelpath_and_more.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Generated by Django 4.2.14 on 2024-08-12 12:04

import django.contrib.postgres.indexes
import django.db.models.constraints
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models

import posthog.models.utils
import posthog.warehouse.models.modeling


class Migration(migrations.Migration):
dependencies = [
("posthog", "0462_change_replay_team_setting_defaults"),
]

operations = [
migrations.RunSQL("CREATE EXTENSION ltree;", reverse_sql="DROP EXTENSION ltree;"),
migrations.CreateModel(
name="DataWarehouseModelPath",
fields=[
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True, null=True)),
(
"id",
models.UUIDField(
default=posthog.models.utils.UUIDT, editable=False, primary_key=True, serialize=False
),
),
("path", posthog.warehouse.models.modeling.LabelTreeField()),
(
"created_by",
models.ForeignKey(
blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL
),
),
(
"saved_query",
models.ForeignKey(
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="posthog.datawarehousesavedquery",
),
),
(
"table",
models.ForeignKey(
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="posthog.datawarehousetable",
),
),
("team", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="posthog.team")),
],
options={
"indexes": [
models.Index(fields=["team_id", "path"], name="team_id_path"),
models.Index(fields=["team_id", "saved_query_id"], name="team_id_saved_query_id"),
django.contrib.postgres.indexes.GistIndex(models.F("path"), name="model_path_path"),
],
},
),
migrations.AddConstraint(
model_name="datawarehousemodelpath",
constraint=models.UniqueConstraint(
deferrable=django.db.models.constraints.Deferrable["IMMEDIATE"],
fields=("team_id", "path"),
name="unique_team_id_path",
),
),
]
29 changes: 29 additions & 0 deletions posthog/warehouse/api/modeling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from rest_framework import request, response, serializers, viewsets

from posthog.api.routing import TeamAndOrgViewSetMixin
from posthog.api.shared import UserBasicSerializer
from posthog.warehouse.models import DataWarehouseModelPath


class DataWarehouseModelPathSerializer(serializers.ModelSerializer):
created_by = UserBasicSerializer(read_only=True)

class Meta:
model = DataWarehouseModelPath


class DataWarehouseModelPathViewSet(TeamAndOrgViewSetMixin, viewsets.ReadOnlyModelViewSet):
scope_object = "INTERNAL"

queryset = DataWarehouseModelPath.objects.all()
serializer_class = DataWarehouseModelPathSerializer


class DataWarehouseModelDagViewSet(TeamAndOrgViewSetMixin, viewsets.ViewSet):
scope_object = "INTERNAL"

def list(self, request: request.Request, *args, **kwargs) -> response.Response:
"""Return this team's DAG as a set of edges and nodes"""
dag = DataWarehouseModelPath.objects.get_dag(self.team)

return response.Response({"edges": dag.edges, "nodes": dag.nodes})
Loading

0 comments on commit f5792a6

Please sign in to comment.