Skip to content

Commit

Permalink
fix(data-warehouse): Dont hard delete joins (#25621)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gilbert09 authored Oct 16, 2024
1 parent bafa329 commit 405dd59
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 21 deletions.
21 changes: 21 additions & 0 deletions posthog/hogql/database/test/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,27 @@ def test_serialize_database_warehouse_table_s3(self):
assert field.type == "string"
assert field.schema_valid is True

def test_serialize_database_warehouse_with_deleted_joins(self):
DataWarehouseJoin.objects.create(
team=self.team,
source_table_name="events",
source_table_key="event",
joining_table_name="groups",
joining_table_key="key",
field_name="some_field",
deleted=True,
)

db = create_hogql_database(team_id=self.team.pk)

serialized_database = serialize_database(HogQLContext(team_id=self.team.pk, database=db))

events_table = serialized_database.get("events")
assert events_table is not None

joined_field = events_table.fields.get("some_field")
assert joined_field is None

def test_serialize_database_warehouse_table_s3_with_hyphens(self):
credentials = DataWarehouseCredential.objects.create(access_key="blah", access_secret="blah", team=self.team)
DataWarehouseTable.objects.create(
Expand Down
8 changes: 6 additions & 2 deletions posthog/warehouse/api/saved_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import structlog
from asgiref.sync import async_to_sync
from django.db import transaction
from django.db.models import Q
from rest_framework import exceptions, filters, request, response, serializers, status, viewsets
from rest_framework.decorators import action

Expand Down Expand Up @@ -155,8 +156,11 @@ def safely_get_queryset(self, queryset):

def destroy(self, request: request.Request, *args: Any, **kwargs: Any) -> response.Response:
instance: DataWarehouseSavedQuery = self.get_object()
DataWarehouseJoin.objects.filter(source_table_name=instance.name).delete()
DataWarehouseJoin.objects.filter(joining_table_name=instance.name).delete()

for join in DataWarehouseJoin.objects.filter(
Q(team_id=instance.team_id) & (Q(source_table_name=instance.name) | Q(joining_table_name=instance.name))
).exclude(deleted=True):
join.soft_delete()

if instance.table is not None:
instance.table.soft_delete()
Expand Down
40 changes: 24 additions & 16 deletions posthog/warehouse/data_load/source_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,31 @@

@database_sync_to_async
def database_operations(team_id: int, table_prefix: str) -> None:
customer_join_exists = DataWarehouseJoin.objects.filter(
team_id=team_id,
source_table_name="persons",
source_table_key="properties.email",
joining_table_name=f"{table_prefix}stripe_customer",
joining_table_key="email",
field_name=f"{table_prefix}stripe_customer",
).exists()
customer_join_exists = (
DataWarehouseJoin.objects.filter(
team_id=team_id,
source_table_name="persons",
source_table_key="properties.email",
joining_table_name=f"{table_prefix}stripe_customer",
joining_table_key="email",
field_name=f"{table_prefix}stripe_customer",
)
.exclude(deleted=True)
.exists()
)

invoice_join_exists = DataWarehouseJoin.objects.filter(
team_id=team_id,
source_table_name="persons",
source_table_key="properties.email",
joining_table_name=f"{table_prefix}stripe_invoice",
joining_table_key="customer_email",
field_name=f"{table_prefix}stripe_invoice",
).exists()
invoice_join_exists = (
DataWarehouseJoin.objects.filter(
team_id=team_id,
source_table_name="persons",
source_table_key="properties.email",
joining_table_name=f"{table_prefix}stripe_invoice",
joining_table_key="customer_email",
field_name=f"{table_prefix}stripe_invoice",
)
.exclude(deleted=True)
.exists()
)

if not customer_join_exists:
DataWarehouseJoin.objects.create(
Expand Down
7 changes: 6 additions & 1 deletion posthog/warehouse/models/join.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Optional
from warnings import warn

from datetime import datetime
from django.db import models

from posthog.hogql.ast import SelectQuery
Expand Down Expand Up @@ -41,6 +41,11 @@ class DataWarehouseJoin(CreatedMetaFields, UUIDModel, DeletedMetaFields):
joining_table_key = models.CharField(max_length=400)
field_name = models.CharField(max_length=400)

def soft_delete(self):
self.deleted = True
self.deleted_at = datetime.now()
self.save()

def join_function(
self, override_source_table_key: Optional[str] = None, override_joining_table_key: Optional[str] = None
):
Expand Down
6 changes: 4 additions & 2 deletions posthog/warehouse/models/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,10 @@ class TableFormat(models.TextChoices):
def soft_delete(self):
from posthog.warehouse.models.join import DataWarehouseJoin

DataWarehouseJoin.objects.filter(source_table_name=self.name).delete()
DataWarehouseJoin.objects.filter(joining_table_name=self.name).delete()
for join in DataWarehouseJoin.objects.filter(
Q(team_id=self.team.pk) & (Q(source_table_name=self.name) | Q(joining_table_name=self.name))
).exclude(deleted=True):
join.soft_delete()

self.deleted = True
self.deleted_at = datetime.now()
Expand Down

0 comments on commit 405dd59

Please sign in to comment.