Skip to content

Commit

Permalink
feat(data-warehouse): migrations for data sync (#18966)
Browse files Browse the repository at this point in the history
* just code for migrations

* use all timezones because update would cause common_timezones to be less inclusive

* install new dependencies

* add comment

* revert

* restore

* fix type
  • Loading branch information
EDsCODE authored Nov 29, 2023
1 parent 78e7e9f commit 1fcd437
Show file tree
Hide file tree
Showing 16 changed files with 246 additions and 57 deletions.
2 changes: 1 addition & 1 deletion latest_migrations.manifest
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ contenttypes: 0002_remove_content_type_name
ee: 0015_add_verified_properties
otp_static: 0002_throttling
otp_totp: 0002_auto_20190420_0723
posthog: 0366_alter_action_created_by
posthog: 0367_job_inputs
sessions: 0001_initial
social_django: 0010_uid_db_index
two_factor: 0007_auto_20201201_1019
65 changes: 65 additions & 0 deletions posthog/migrations/0367_job_inputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Generated by Django 3.2.19 on 2023-11-22 19:39

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import encrypted_fields.fields
import posthog.models.utils


class Migration(migrations.Migration):
dependencies = [
("posthog", "0366_alter_action_created_by"),
]

operations = [
migrations.AddField(
model_name="externaldatasource",
name="job_inputs",
field=encrypted_fields.fields.EncryptedJSONField(blank=True, null=True),
),
migrations.AddField(
model_name="datawarehousetable",
name="external_data_source",
field=models.ForeignKey(
blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to="posthog.externaldatasource"
),
),
migrations.CreateModel(
name="ExternalDataJob",
fields=[
("created_at", models.DateTimeField(auto_now_add=True)),
(
"id",
models.UUIDField(
default=posthog.models.utils.UUIDT, editable=False, primary_key=True, serialize=False
),
),
("status", models.CharField(max_length=400)),
("rows_synced", models.BigIntegerField(blank=True, null=True)),
(
"latest_error",
models.TextField(help_text="The latest error that occurred during this run.", null=True),
),
(
"created_by",
models.ForeignKey(
blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL
),
),
(
"pipeline",
models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="posthog.externaldatasource"),
),
("team", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="posthog.team")),
],
options={
"abstract": False,
},
),
migrations.AlterField(
model_name="team",
name="timezone",
field=models.CharField(choices=posthog.models.team.TIMEZONES, default="UTC", max_length=240),
),
]
2 changes: 1 addition & 1 deletion posthog/models/team/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

from .team_caching import get_team_in_cache, set_team_in_cache

TIMEZONES = [(tz, tz) for tz in pytz.common_timezones]
TIMEZONES = [(tz, tz) for tz in pytz.all_timezones]

# TODO: DEPRECATED; delete when these attributes can be fully removed from `Team` model
DEPRECATED_ATTRS = (
Expand Down
2 changes: 0 additions & 2 deletions posthog/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,8 +1004,6 @@ def get_available_timezones_with_offsets() -> Dict[str, float]:
offset = pytz.timezone(tz).utcoffset(now)
except Exception:
offset = pytz.timezone(tz).utcoffset(now + dt.timedelta(hours=2))
if offset is None:
continue
offset_hours = int(offset.total_seconds()) / 3600
result[tz] = offset_hours
return result
Expand Down
5 changes: 3 additions & 2 deletions posthog/warehouse/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .table import *
from .credential import *
from .datawarehouse_saved_query import *
from .view_link import *
from .external_data_job import *
from .external_data_source import *
from .table import *
from .view_link import *
5 changes: 3 additions & 2 deletions posthog/warehouse/models/credential.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from posthog.models.utils import UUIDModel, CreatedMetaFields, sane_repr
from django.db import models
from posthog.models.team import Team
from encrypted_fields.fields import EncryptedTextField

from posthog.models.team import Team
from posthog.models.utils import CreatedMetaFields, UUIDModel, sane_repr


class DataWarehouseCredential(CreatedMetaFields, UUIDModel):
access_key: EncryptedTextField = EncryptedTextField(max_length=500)
Expand Down
15 changes: 8 additions & 7 deletions posthog/warehouse/models/datawarehouse_saved_query.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from posthog.models.utils import UUIDModel, CreatedMetaFields, DeletedMetaFields
import re
from typing import Dict

from django.core.exceptions import ValidationError
from django.db import models
from posthog.models.team import Team

from posthog.hogql.database.models import SavedQuery
from posthog.hogql.database.database import Database
from typing import Dict
import re
from django.core.exceptions import ValidationError
from posthog.hogql.database.models import SavedQuery
from posthog.models.team import Team
from posthog.models.utils import CreatedMetaFields, DeletedMetaFields, UUIDModel
from posthog.warehouse.models.util import remove_named_tuples


Expand Down Expand Up @@ -56,9 +57,9 @@ def get_columns(self) -> Dict[str, str]:

@property
def s3_tables(self):
from posthog.hogql.parser import parse_select
from posthog.hogql.context import HogQLContext
from posthog.hogql.database.database import create_hogql_database
from posthog.hogql.parser import parse_select
from posthog.hogql.query import create_default_modifiers_for_team
from posthog.hogql.resolver import resolve_types
from posthog.models.property.util import S3TableVisitor
Expand Down
22 changes: 22 additions & 0 deletions posthog/warehouse/models/external_data_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from django.db import models

from posthog.models.team import Team
from posthog.models.utils import CreatedMetaFields, UUIDModel, sane_repr


class ExternalDataJob(CreatedMetaFields, UUIDModel):
class Status(models.TextChoices):
RUNNING = "Running", "Running"
FAILED = "Failed", "Failed"
COMPLETED = "Completed", "Completed"
CANCELLED = "Cancelled", "Cancelled"

team: models.ForeignKey = models.ForeignKey(Team, on_delete=models.CASCADE)
pipeline: models.ForeignKey = models.ForeignKey("posthog.ExternalDataSource", on_delete=models.CASCADE)
status: models.CharField = models.CharField(max_length=400)
rows_synced: models.BigIntegerField = models.BigIntegerField(null=True, blank=True)
latest_error: models.TextField = models.TextField(
null=True, help_text="The latest error that occurred during this run."
)

__repr__ = sane_repr("id")
15 changes: 14 additions & 1 deletion posthog/warehouse/models/external_data_source.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from posthog.models.utils import UUIDModel, CreatedMetaFields, sane_repr
import encrypted_fields
from django.db import models

from posthog.models.team import Team
from posthog.models.utils import CreatedMetaFields, UUIDModel, sane_repr


class ExternalDataSource(CreatedMetaFields, UUIDModel):
Expand All @@ -13,6 +15,17 @@ class Type(models.TextChoices):
team: models.ForeignKey = models.ForeignKey(Team, on_delete=models.CASCADE)
status: models.CharField = models.CharField(max_length=400)
source_type: models.CharField = models.CharField(max_length=128, choices=Type.choices)
job_inputs: encrypted_fields.fields.EncryptedJSONField = encrypted_fields.fields.EncryptedJSONField(
null=True, blank=True
)
are_tables_created: models.BooleanField = models.BooleanField(default=False)

__repr__ = sane_repr("source_id")

@property
def folder_path(self) -> str:
return f"team_{self.team_id}_{self.source_type}_{str(self.pk)}".lower().replace("-", "_")

@property
def draft_folder_path(self) -> str:
return f"team_{self.team_id}_{self.source_type}_{str(self.pk)}_draft".lower().replace("-", "_")
34 changes: 20 additions & 14 deletions posthog/warehouse/models/table.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
from posthog.models.utils import (
UUIDModel,
CreatedMetaFields,
sane_repr,
DeletedMetaFields,
)
from posthog.errors import wrap_query_error
from django.db import models
from posthog.models.team import Team

from posthog.client import sync_execute
from .credential import DataWarehouseCredential
from posthog.errors import wrap_query_error
from posthog.hogql.database.models import (
StringDatabaseField,
IntegerDatabaseField,
DateTimeDatabaseField,
DateDatabaseField,
StringJSONDatabaseField,
BooleanDatabaseField,
DateDatabaseField,
DateTimeDatabaseField,
IntegerDatabaseField,
StringArrayDatabaseField,
StringDatabaseField,
StringJSONDatabaseField,
)
from posthog.hogql.database.s3_table import S3Table
from posthog.models.team import Team
from posthog.models.utils import (
CreatedMetaFields,
DeletedMetaFields,
UUIDModel,
sane_repr,
)
from posthog.warehouse.models.util import remove_named_tuples

from .credential import DataWarehouseCredential

CLICKHOUSE_HOGQL_MAPPING = {
"UUID": StringDatabaseField,
"String": StringDatabaseField,
Expand Down Expand Up @@ -66,6 +68,10 @@ class TableFormat(models.TextChoices):
DataWarehouseCredential, on_delete=models.CASCADE, null=True, blank=True
)

external_data_source: models.ForeignKey = models.ForeignKey(
"ExternalDataSource", on_delete=models.CASCADE, null=True, blank=True
)

columns: models.JSONField = models.JSONField(
default=dict,
null=True,
Expand Down
2 changes: 1 addition & 1 deletion posthog/warehouse/models/test/test_table.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from posthog.test.base import BaseTest
from posthog.warehouse.models import DataWarehouseTable, DataWarehouseCredential
from posthog.warehouse.models import DataWarehouseCredential, DataWarehouseTable


class TestTable(BaseTest):
Expand Down
11 changes: 7 additions & 4 deletions posthog/warehouse/models/view_link.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from typing import Any, Dict

from django.db import models

from posthog.hogql.ast import SelectQuery
from posthog.hogql.context import HogQLContext
from posthog.models.utils import UUIDModel, CreatedMetaFields, DeletedMetaFields
from django.db import models
from posthog.hogql.errors import HogQLException
from posthog.models.team import Team
from posthog.models.utils import CreatedMetaFields, DeletedMetaFields, UUIDModel

from .datawarehouse_saved_query import DataWarehouseSavedQuery
from typing import Dict, Any
from posthog.hogql.errors import HogQLException


class DataWarehouseViewLink(CreatedMetaFields, UUIDModel, DeletedMetaFields):
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.in
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ types-markdown==3.3.9
types-PyYAML==6.0.1
types-freezegun==1.1.10
types-python-dateutil>=2.8.3
types-pytz==2021.3.2
types-pytz==2023.3
types-redis==4.3.20
types-retry==0.9.9.4
types-requests==2.26.1
Expand Down
10 changes: 6 additions & 4 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ python-dateutil==2.8.2
# -r requirements-dev.in
# faker
# freezegun
pytz==2021.1
pytz==2023.3
# via
# -c requirements.txt
# django
Expand Down Expand Up @@ -306,7 +306,7 @@ types-markdown==3.3.9
# via -r requirements-dev.in
types-python-dateutil==2.8.3
# via -r requirements-dev.in
types-pytz==2021.3.2
types-pytz==2023.3
# via -r requirements-dev.in
types-pyyaml==6.0.1
# via
Expand Down Expand Up @@ -343,8 +343,10 @@ urllib3-secure-extra==0.1.0
# urllib3
watchdog==2.1.8
# via pytest-watch
wheel==0.38.1
# via pip-tools
wheel==0.42.0
# via
# -c requirements.txt
# pip-tools

# The following packages are considered to be unsafe in a requirements file:
# pip
Expand Down
11 changes: 7 additions & 4 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
# - `pip-compile --rebuild requirements-dev.in`
#
aiohttp>=3.9.0
aioboto3==11.1
aioboto3==12.0.0
aiokafka>=0.8
antlr4-python3-runtime==4.13.1
amqp==5.1.1
boto3==1.26.76
boto3==1.28.16
boto3-stubs[s3]
brotli==1.1.0
celery==5.3.4
Expand All @@ -35,7 +35,8 @@ django-revproxy==0.12.0
djangorestframework==3.14.0
djangorestframework-csv==2.1.1
djangorestframework-dataclasses==1.2.0
django-fernet-encrypted-fields==0.1.2
django-fernet-encrypted-fields==0.1.3
dlt==0.3.24
dnspython==2.2.1
drf-exceptions-hog==0.4.0
drf-extensions==0.7.0
Expand Down Expand Up @@ -65,11 +66,13 @@ pydantic==2.3.0
pyjwt==2.4.0
python-dateutil>=2.8.2
python3-saml==1.12.0
pytz==2021.1
pytz==2023.3
redis==4.5.4
retry==0.9.2
requests==2.28.1
requests-oauthlib==1.3.0
s3fs==2023.10.0
stripe==7.4.0
selenium==4.1.5
sentry-sdk==1.14.0
semantic_version==2.8.5
Expand Down
Loading

0 comments on commit 1fcd437

Please sign in to comment.